Spaces:

SkyNetWalker
/

chatCPU

Sleeping

App Files Files Community

SkyNetWalker commited on Aug 12

Commit

224325d

verified ·

1 Parent(s): 6f4bea7

Create app.py

Browse files

Files changed (1) hide show

app.py +184 -0

app.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import requests
+check_ipinfo = requests.get("https://ipinfo.io").json()['country']
+print("Run-Location-As: ",check_ipinfo)
+import gradio as gr
+import ollama
+# List of available models for selection.
+# IMPORTANT: These names must correspond to models that have been either
+ollama pull hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M
+#ollama pull hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M
+ollama pull smollm2:360m-instruct-q5_K_M
+ollama pull hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M
+#ollama pull gemma3n:e2b-it-q4_K_M #slow on Spaces CPU
+ollama pull granite3.3:2b
+ollama pull hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M
+# Model from run.sh
+AVAILABLE_MODELS = [
+    'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M',
+    #'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M',
+    'smollm2:360m-instruct-q5_K_M',
+    'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU
+    #'gemma3n:e2b-it-q4_K_M',
+    'granite3.3:2b',
+    'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M'
+]
+#---fail to run
+    #'hf.co/ggml-org/SmolLM3-3B-GGUF:Q4_K_M',
+    #'hf.co/bartowski/nvidia_OpenReasoning-Nemotron-1.5B-GGUF:Q5_K_M',
+# Default System Prompt
+DEFAULT_SYSTEM_PROMPT = """
+    1. 如果查詢是以中文輸入，使用標準繁體中文回答，符合官方文書規範
+    2. 要提供引用規則依据
+    3. 如果查詢是以英文輸入，使用英文回答
+    Answer everything in simple, smart, relevant and accurate style, within 20 words. No chatty!
+    """
+# --- Gradio Interface ---
+with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
+    gr.Markdown(f"## Small Language Model (SLM) run with CPU") # Changed title to be more generic
+    gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)")
+    gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.")
+    # Model Selection
+    with gr.Row():
+        selected_model = gr.Radio(
+            choices=AVAILABLE_MODELS,
+            value=AVAILABLE_MODELS[0], # Default to the first model in the list
+            label="Select Model",
+            info="Choose the LLM model to chat with.",
+            interactive=True
+        )
+    chatbot = gr.Chatbot(
+        label="Conversation",
+        height=400,
+        type='messages',
+        layout="bubble"
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            show_label=False,
+            placeholder="Type your message here and press Enter...",
+            lines=1,
+            scale=4,
+            container=False
+        )
+    with gr.Accordion("Advanced Options", open=False):
+        with gr.Row():
+            stream_checkbox = gr.Checkbox(
+                label="Stream Output",
+                value=True,
+                info="Enable to see the response generate in real-time."
+            )
+            use_custom_prompt_checkbox = gr.Checkbox(
+                label="Use Custom System Prompt",
+                value=False,
+                info="Check this box to provide your own system prompt below."
+            )
+        # --- New: System Prompt Options ---
+        SYSTEM_PROMPT_OPTIONS = {
+            "Smart & Accurate (Default)": DEFAULT_SYSTEM_PROMPT,
+            "Friendly & Conversational": """Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.""",
+            "Professional & Formal": """Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts."""
+        }
+        system_prompt_selector = gr.Radio(
+            label="Choose a System Prompt Style",
+            choices=list(SYSTEM_PROMPT_OPTIONS.keys()),
+            value="Smart & Accurate (Default)",
+            interactive=True
+        )
+        system_prompt_textbox = gr.Textbox(
+            label="System Prompt",
+            value=DEFAULT_SYSTEM_PROMPT,
+            lines=3,
+            placeholder="Enter a system prompt to guide the model's behavior...",
+            interactive=False
+        )
+    # Function to toggle the interactivity of the system prompt textbox
+    def toggle_system_prompt(use_custom):
+        return gr.update(interactive=use_custom)
+    use_custom_prompt_checkbox.change(
+        fn=toggle_system_prompt,
+        inputs=use_custom_prompt_checkbox,
+        outputs=system_prompt_textbox,
+        queue=False
+    )
+    # --- Core Chat Logic ---
+    # This function is the heart of the application.
+    def respond(history, system_prompt, stream_output, current_selected_model, selected_prompt_key, use_custom_prompt): # Added selected_prompt_key and use_custom_prompt
+        """
+        This is the single function that handles the entire chat process.
+        It takes the history, prepends the system prompt, calls the Ollama API,
+        and streams the response back to the chatbot.
+        """
+        #Disable Qwen3 thinking
+        if "Qwen3".lower() in current_selected_model:
+            system_prompt = system_prompt+" /no_think"
+        # Use selected predefined prompt unless custom is enabled
+        if not use_custom_prompt:
+            system_prompt = SYSTEM_PROMPT_OPTIONS[selected_prompt_key]
+        # The 'history' variable from Gradio contains the entire conversation.
+        # We prepend the system prompt to this history to form the final payload.
+        messages = [{"role": "system", "content": system_prompt}] + history
+        # Add a placeholder for the assistant's response to the UI history.
+        # This creates the space where the streamed response will be displayed.
+        history.append({"role": "assistant", "content": ""})
+        # Stream the response from the Ollama API using the currently selected model
+        response_stream = ollama.chat(
+            model=current_selected_model, # Use the dynamically selected model
+            messages=messages,
+            stream=True
+        )
+        # Iterate through the stream, updating the placeholder with each new chunk.
+        for chunk in response_stream:
+            if chunk['message']['content']:
+                history[-1]['content'] += chunk['message']['content']
+                # Yield the updated history to the chatbot for a real-time effect.
+                yield history
+    # This function handles the user's submission.
+    def user_submit(history, user_message):
+        """
+        Adds the user's message to the chat history and clears the input box.
+        This prepares the state for the main 'respond' function.
+        """
+        return history + [{"role": "user", "content": user_message}], ""
+    # Gradio Event Wiring
+    msg.submit(
+        user_submit,
+        inputs=[chatbot, msg],
+        outputs=[chatbot, msg],
+        queue=False
+    ).then(
+        respond,
+        inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs
+        outputs=[chatbot]
+    )
+# Launch the Gradio interface
+demo.launch(server_name="0.0.0.0", server_port=7860)