Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,10 +13,11 @@ import ollama
|
|
| 13 |
|
| 14 |
# Model from run.sh
|
| 15 |
MODEL_ID_MAP = {
|
|
|
|
|
|
|
|
|
|
| 16 |
"(IBM)Granite3.3-2B": 'granite3.3:2b',
|
| 17 |
"(Meta)Llama3.2-3B-Instruct": 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU
|
| 18 |
-
"(阿里千問)Qwen3-4B-Instruct-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M',
|
| 19 |
-
"(阿里千問)Qwen3-Coder-30B-A3B-Instruct-1M": 'hf.co/unsloth/Qwen3-Coder-30B-A3B-Instruct-1M-GGUF:Q4_K_M',
|
| 20 |
#"(阿里千問)Qwen3-4B-Thinking-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M',
|
| 21 |
#"(Google)Gemma3n-e2b-it": 'gemma3n:e2b-it-q4_K_M',
|
| 22 |
#"(Tencent)混元-1.8B-Instruct":'hf.co/bartowski/tencent_Hunyuan-1.8B-Instruct-GGUF:Q4_K_M',
|
|
@@ -27,9 +28,8 @@ MODEL_ID_MAP = {
|
|
| 27 |
|
| 28 |
# Default System Prompt
|
| 29 |
DEFAULT_SYSTEM_PROMPT = """Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls:
|
| 30 |
-
1.
|
| 31 |
-
2.
|
| 32 |
-
3. 如果查詢是以英文輸入,使用英文回答"""
|
| 33 |
|
| 34 |
# --- Gradio Interface ---
|
| 35 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
|
|
@@ -78,15 +78,19 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
| 78 |
|
| 79 |
# --- New: System Prompt Options ---
|
| 80 |
SYSTEM_PROMPT_OPTIONS = {
|
| 81 |
-
"Smart & Accurate
|
| 82 |
"繁體中文回答":"無論如何,必須使用標準繁體中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!",
|
| 83 |
"简体中文回答":"无论如何,必须使用标准简体中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!",
|
| 84 |
-
"English
|
| 85 |
"Friendly & Conversational":"Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.",
|
| 86 |
"Professional & Formal":"Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.",
|
| 87 |
"Elon Musk style":"You must chat in Elon Musk style!",
|
| 88 |
-
"
|
| 89 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
You are a highly capable coding assistant specialized in software development, algorithms, and debugging. Your responses must be accurate, efficient, and tailored to the user's request. Always follow these principles:
|
| 91 |
1. Use clear, well-commented code.
|
| 92 |
2. Prioritize readability and best practices.
|
|
@@ -157,6 +161,10 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
| 157 |
It takes the history, prepends the system prompt, calls the Ollama API,
|
| 158 |
and streams the response back to the chatbot.
|
| 159 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
current_selected_model = MODEL_ID_MAP[selected_model_name]
|
| 162 |
|
|
@@ -164,10 +172,9 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
| 164 |
#if "Qwen3".lower() in current_selected_model:
|
| 165 |
# system_prompt = system_prompt+" /no_think"
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
# The 'history' variable from Gradio contains the entire conversation.
|
| 172 |
# We prepend the system prompt to this history to form the final payload.
|
| 173 |
messages = [{"role": "system", "content": system_prompt}] + history
|
|
|
|
| 13 |
|
| 14 |
# Model from run.sh
|
| 15 |
MODEL_ID_MAP = {
|
| 16 |
+
"(阿里千問)Qwen3-0.6B": 'ollama pull hf.co/unsloth/Qwen3-0.6B-GGUF:Q5_K_M',
|
| 17 |
+
"(阿里千問)Qwen3-1.7B": 'ollama pull hf.co/unsloth/Qwen3-1.7B-GGUF:Q5_K_M',
|
| 18 |
+
"(阿里千問)Qwen3-4B-Instruct-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M',
|
| 19 |
"(IBM)Granite3.3-2B": 'granite3.3:2b',
|
| 20 |
"(Meta)Llama3.2-3B-Instruct": 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU
|
|
|
|
|
|
|
| 21 |
#"(阿里千問)Qwen3-4B-Thinking-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M',
|
| 22 |
#"(Google)Gemma3n-e2b-it": 'gemma3n:e2b-it-q4_K_M',
|
| 23 |
#"(Tencent)混元-1.8B-Instruct":'hf.co/bartowski/tencent_Hunyuan-1.8B-Instruct-GGUF:Q4_K_M',
|
|
|
|
| 28 |
|
| 29 |
# Default System Prompt
|
| 30 |
DEFAULT_SYSTEM_PROMPT = """Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls:
|
| 31 |
+
1. 如果查詢是以中文輸入,使用標準繁體中文回答
|
| 32 |
+
2. 如果查詢是以英文輸入,使用英文回答"""
|
|
|
|
| 33 |
|
| 34 |
# --- Gradio Interface ---
|
| 35 |
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
|
|
|
|
| 78 |
|
| 79 |
# --- New: System Prompt Options ---
|
| 80 |
SYSTEM_PROMPT_OPTIONS = {
|
| 81 |
+
"Smart & Accurate & Auto TC/EN": DEFAULT_SYSTEM_PROMPT,
|
| 82 |
"繁體中文回答":"無論如何,必須使用標準繁體中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!",
|
| 83 |
"简体中文回答":"无论如何,必须使用标准简体中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!",
|
| 84 |
+
"English Chat":"You must reply by English. Answer everything in simple, smart, relevant and accurate style. No chatty!",
|
| 85 |
"Friendly & Conversational":"Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.",
|
| 86 |
"Professional & Formal":"Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.",
|
| 87 |
"Elon Musk style":"You must chat in Elon Musk style!",
|
| 88 |
+
"Test1(Auto TC/EN)":"Always detect the user's input language and respond in that same language. Do not translate unless explicitly requested. Answer everything in simple, smart, relevant and accurate style. No chatty!",
|
| 89 |
+
"Test2(Auto TC/EN)":"""Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls:
|
| 90 |
+
1. 如果查詢是以中文輸入,使用標準繁體中文回答,符合官方文書規範
|
| 91 |
+
2. 要提供引用規則依据
|
| 92 |
+
3. 如果查詢是以英文輸入,使用英文回答"""
|
| 93 |
+
"Good Coder":"""
|
| 94 |
You are a highly capable coding assistant specialized in software development, algorithms, and debugging. Your responses must be accurate, efficient, and tailored to the user's request. Always follow these principles:
|
| 95 |
1. Use clear, well-commented code.
|
| 96 |
2. Prioritize readability and best practices.
|
|
|
|
| 161 |
It takes the history, prepends the system prompt, calls the Ollama API,
|
| 162 |
and streams the response back to the chatbot.
|
| 163 |
"""
|
| 164 |
+
|
| 165 |
+
# Use selected predefined prompt unless custom is enabled
|
| 166 |
+
if not use_custom_prompt:
|
| 167 |
+
system_prompt = SYSTEM_PROMPT_OPTIONS[selected_prompt_key]
|
| 168 |
|
| 169 |
current_selected_model = MODEL_ID_MAP[selected_model_name]
|
| 170 |
|
|
|
|
| 172 |
#if "Qwen3".lower() in current_selected_model:
|
| 173 |
# system_prompt = system_prompt+" /no_think"
|
| 174 |
|
| 175 |
+
if any(substring in current_selected_model.lower() for substring in ["qwen3-0.6b", "qwen3-1.7b"]):
|
| 176 |
+
system_prompt = system_prompt+" /no_think"
|
| 177 |
+
|
|
|
|
| 178 |
# The 'history' variable from Gradio contains the entire conversation.
|
| 179 |
# We prepend the system prompt to this history to form the final payload.
|
| 180 |
messages = [{"role": "system", "content": system_prompt}] + history
|