Spaces:

SkyNetWalker
/

chatCPU

Running

App Files Files Community

SkyNetWalker commited on Aug 12

Commit

f4fc74b

verified ·

1 Parent(s): ea73162

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -10,16 +10,17 @@ import ollama
 # List of available models for selection.
 # IMPORTANT: These names must correspond to models that have been either
 # Model from run.sh
-AVAILABLE_MODELS = [
-    'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M',
-    #'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M',
-    'smollm2:360m-instruct-q5_K_M',
-    'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU
-    #'gemma3n:e2b-it-q4_K_M',
-    'granite3.3:2b',
-    'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M'
-]
 # Default System Prompt
@@ -36,9 +37,9 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
     # Model Selection
     with gr.Row():
-        selected_model = gr.Radio(
-            choices=AVAILABLE_MODELS,
-            value=AVAILABLE_MODELS[0], # Default to the first model in the list
             label="Select Model",
             info="Choose the LLM model to chat with.",
             interactive=True
@@ -75,7 +76,10 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
         # --- New: System Prompt Options ---
         SYSTEM_PROMPT_OPTIONS = {
-            "Smart & Accurate (Default)": DEFAULT_SYSTEM_PROMPT,
             "Friendly & Conversational": """Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.""",
             "Professional & Formal": """Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.""",
             "Elon Musk style": "You must chat in Elon Musk style!"
@@ -123,13 +127,15 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
     # --- Core Chat Logic ---
     # This function is the heart of the application.
-    def respond(history, system_prompt, stream_output, current_selected_model, selected_prompt_key, use_custom_prompt): # Added selected_prompt_key and use_custom_prompt
         """
         This is the single function that handles the entire chat process.
         It takes the history, prepends the system prompt, calls the Ollama API,
         and streams the response back to the chatbot.
         """
         #Disable Qwen3 thinking
         if "Qwen3".lower() in current_selected_model:
             system_prompt = system_prompt+" /no_think"
@@ -176,9 +182,9 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
         queue=False
     ).then(
         respond,
-        inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs
         outputs=[chatbot]
     )
 # Launch the Gradio interface
-demo.launch(server_name="0.0.0.0", server_port=7860)

 # List of available models for selection.
 # IMPORTANT: These names must correspond to models that have been either
 # Model from run.sh
+MODEL_ID_MAP = {
+    "Qwen3-4B-Instruct-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M',
+    #"Qwen3-4B-Thinking-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M',
+    "SmolLM2-360M": 'smollm2:360m-instruct-q5_K_M',
+    "Llama3.2-3B-Instruct": 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU
+    #"Gemma3n-e2b-it": 'gemma3n:e2b-it-q4_K_M',
+    "Granite3.3-2B": 'granite3.3:2b',
+    "Hunyuan-4B-Instruct": 'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M'
+}
 # Default System Prompt
     # Model Selection
     with gr.Row():
+        selected_model_label = gr.Radio(
+            choices=list(MODEL_ID_MAP.keys()),
+            value=list(MODEL_ID_MAP.keys())[0], # Default to first display name
             label="Select Model",
             info="Choose the LLM model to chat with.",
             interactive=True
         # --- New: System Prompt Options ---
         SYSTEM_PROMPT_OPTIONS = {
+            "Smart & Accurate (Auto TC/EN)": DEFAULT_SYSTEM_PROMPT,
+            "繁體中文回答":"無論如何，必須使用標準繁體中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!",
+            "简体中文回答":"无论如何，必须使用标准简体中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!",
+            "English Caht":"You must reply by English. Answer everything in simple, smart, relevant and accurate style. No chatty!",
             "Friendly & Conversational": """Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.""",
             "Professional & Formal": """Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.""",
             "Elon Musk style": "You must chat in Elon Musk style!"
     # --- Core Chat Logic ---
     # This function is the heart of the application.
+    def respond(history, system_prompt, stream_output, selected_model_name, selected_prompt_key, use_custom_prompt): # Added selected_model_name
         """
         This is the single function that handles the entire chat process.
         It takes the history, prepends the system prompt, calls the Ollama API,
         and streams the response back to the chatbot.
         """
+        current_selected_model = MODEL_ID_MAP[selected_model_name]
         #Disable Qwen3 thinking
         if "Qwen3".lower() in current_selected_model:
             system_prompt = system_prompt+" /no_think"
         queue=False
     ).then(
         respond,
+        inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model_label, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs
         outputs=[chatbot]
     )
 # Launch the Gradio interface
+demo.launch(server_name="0.0.0.0", server_port=7860)