Spaces:
Running
Running
| import requests | |
| check_ipinfo = requests.get("https://ipinfo.io").json()['country'] | |
| print("Run-Location-As: ",check_ipinfo) | |
| import gradio as gr | |
| import ollama | |
| # List of available models for selection. | |
| # IMPORTANT: These names must correspond to models that have been either | |
| # Model from run.sh | |
| MODEL_ID_MAP = { | |
| "(LiquidAI)LFM2-1.2B": 'hf.co/LiquidAI/LFM2-1.2B-GGUF:Q5_K_M', | |
| "(LiquidAI)LFM2-700M": 'hf.co/LiquidAI/LFM2-700M-GGUF:Q5_K_M', | |
| "(LiquidAI)LFM2-350M": 'hf.co/LiquidAI/LFM2-350M-GGUF:Q5_K_M', | |
| "(Google)Gemma-3-270M-it-qat": 'hf.co/unsloth/gemma-3-270m-it-qat-GGUF:Q5_K_M', | |
| "(阿里千問)Qwen3-4B-Instruct-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF:Q4_K_M', | |
| "(IBM)Granite3.3-2B": 'granite3.3:2b', | |
| "(Meta)Llama3.2-3B-Instruct": 'hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M', # OK speed with CPU | |
| #"(阿里千問)Qwen3-4B-Thinking-2507": 'hf.co/bartowski/Qwen_Qwen3-4B-Thinking-2507-GGUF:Q4_K_M', | |
| "(Google)Gemma3n-e2b-it": 'gemma3n:e2b-it-q4_K_M', | |
| #"(Tencent)混元-1.8B-Instruct":'hf.co/bartowski/tencent_Hunyuan-1.8B-Instruct-GGUF:Q4_K_M', | |
| #"(Tencent)混元-4B-Instruct": 'hf.co/bartowski/tencent_Hunyuan-4B-Instruct-GGUF:Q4_K_M', | |
| "(HuggingFace)SmolLM2-360M": 'smollm2:360m-instruct-q5_K_M' | |
| } | |
| # Default System Prompt | |
| DEFAULT_SYSTEM_PROMPT = """Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls: | |
| 1. 如果查詢是以中文輸入,使用標準繁體中文回答 | |
| 2. 如果查詢是以英文輸入,使用英文回答""" | |
| # --- Gradio Interface --- | |
| with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo: | |
| gr.Markdown(f"## HH demo: LLM/SLM chatbot running with CPU only.") # Changed title to be more generic | |
| gr.Markdown(f"(Run-Location-As: `{check_ipinfo}`)") | |
| gr.Markdown("Chat with the model, customize its behavior with a system prompt, and toggle streaming output.") | |
| # Model Selection | |
| with gr.Row(): | |
| selected_model_label = gr.Radio( | |
| choices=list(MODEL_ID_MAP.keys()), | |
| value=list(MODEL_ID_MAP.keys())[0], # Default to first display name | |
| label="Select Model", | |
| info="Choose the LLM model to chat with.", | |
| interactive=True | |
| ) | |
| chatbot = gr.Chatbot( | |
| label="Conversation", | |
| height=400, | |
| type='messages', | |
| layout="bubble" | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| show_label=False, | |
| placeholder="Type your message here and press Enter...", | |
| lines=1, | |
| scale=4, | |
| container=False | |
| ) | |
| with gr.Accordion("Advanced Options", open=False): | |
| with gr.Row(): | |
| stream_checkbox = gr.Checkbox( | |
| label="Stream Output", | |
| value=True, | |
| info="Enable to see the response generate in real-time." | |
| ) | |
| use_custom_prompt_checkbox = gr.Checkbox( | |
| label="Use Custom System Prompt", | |
| value=False, | |
| info="Check this box to provide your own system prompt below." | |
| ) | |
| # --- New: System Prompt Options --- | |
| SYSTEM_PROMPT_OPTIONS = { | |
| "AI Henry": DEFAULT_SYSTEM_PROMPT, | |
| "繁體中文回答":"無論如何,必須使用標準繁體中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
| "简体中文回答":"无论如何,必须使用标准简体中文回答. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
| "English Chat":"You must reply by English. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
| "Friendly & Conversational":"Respond in a warm, friendly, and engaging tone. Use natural language and offer helpful suggestions. Keep responses concise but personable.", | |
| "Professional & Formal":"Maintain a formal and professional tone. Use precise language, avoid slang, and ensure responses are suitable for business or academic contexts.", | |
| "Elon Musk style":"You must chat in Elon Musk style!", | |
| "Good Coder":""" | |
| You are a highly capable coding assistant specialized in software development, algorithms, and debugging. Your responses must be accurate, efficient, and tailored to the user's request. Always follow these principles: | |
| 1. Use clear, well-commented code. | |
| 2. Prioritize readability and best practices. | |
| 3. When asked to explain, provide concise, step-by-step reasoning. | |
| 4. When asked to generate code, include input/output examples if relevant. | |
| 5. If the user provides buggy code, identify the issue and suggest a fix. | |
| 6. If multiple solutions exist, briefly compare them and recommend the best. | |
| 7. Always respect the specified programming language, libraries, and constraints. | |
| 8. Never make assumptions beyond the user’s instructions unless explicitly asked. | |
| 9. If the task is ambiguous, ask clarifying questions before proceeding. | |
| 10. Avoid unnecessary boilerplate unless requested. | |
| 11. Use only open-source and free resources, libraries, and APIs. Do not suggest or rely on paid, proprietary, or license-restricted tools unless explicitly requested. | |
| 12. You code shall be fine to run on Colab or Kaggle. | |
| 13. Always include inner comments for user learning. | |
| 14. Always provide installation and operation steps. | |
| You support multiple languages including Python, JavaScript, TypeScript, C++, Java, Go, Rust, and Bash. You can also assist with frameworks like React, Node.js, Django, Flask, and more. | |
| Your goal is to help the user write better code, faster, and deepen their understanding of programming concepts. | |
| """, | |
| "Test1(Auto TC/EN)":"Always detect the user's input language and respond in that same language. Do not translate unless explicitly requested. Answer everything in simple, smart, relevant and accurate style. No chatty!", | |
| "Simulate Tencent Auto TC/EN":"""Answer everything in simple, smart, relevant and accurate style. No chatty! Besides, pls: | |
| 1. 如果查詢是以中文輸入,使用標準繁體中文回答,符合官方文書規範 | |
| 2. 要提供引用規則依据 | |
| 3. 如果查詢是以英文輸入,使用英文回答""" | |
| } | |
| system_prompt_selector = gr.Radio( | |
| label="Choose a System Prompt Style", | |
| choices=list(SYSTEM_PROMPT_OPTIONS.keys()), | |
| value="AI Henry", | |
| interactive=True | |
| ) | |
| system_prompt_textbox = gr.Textbox( | |
| label="System Prompt", | |
| value=DEFAULT_SYSTEM_PROMPT, | |
| lines=3, | |
| placeholder="Enter a system prompt to guide the model's behavior...", | |
| interactive=False | |
| ) | |
| # Function to toggle the interactivity of the system prompt textbox | |
| def toggle_system_prompt(use_custom): | |
| return gr.update(interactive=use_custom) | |
| use_custom_prompt_checkbox.change( | |
| fn=toggle_system_prompt, | |
| inputs=use_custom_prompt_checkbox, | |
| outputs=system_prompt_textbox, | |
| queue=False | |
| ) | |
| # Function to update textbox when prompt style changes | |
| def update_prompt_text(selected_key, use_custom): | |
| if not use_custom: | |
| return gr.update(value=SYSTEM_PROMPT_OPTIONS[selected_key]) | |
| else: | |
| return gr.update() | |
| system_prompt_selector.change( | |
| fn=update_prompt_text, | |
| inputs=[system_prompt_selector, use_custom_prompt_checkbox], | |
| outputs=system_prompt_textbox, | |
| queue=False | |
| ) | |
| # --- Core Chat Logic --- | |
| # This function is the heart of the application. | |
| def respond(history, system_prompt, stream_output, selected_model_name, selected_prompt_key, use_custom_prompt): # Added selected_model_name | |
| """ | |
| This is the single function that handles the entire chat process. | |
| It takes the history, prepends the system prompt, calls the Ollama API, | |
| and streams the response back to the chatbot. | |
| """ | |
| # Use selected predefined prompt unless custom is enabled | |
| if not use_custom_prompt: | |
| system_prompt = SYSTEM_PROMPT_OPTIONS[selected_prompt_key] | |
| current_selected_model = MODEL_ID_MAP[selected_model_name] | |
| #Disable Qwen3 thinking | |
| #if "Qwen3".lower() in current_selected_model: | |
| # system_prompt = system_prompt+" /no_think" | |
| if any(substring in current_selected_model.lower() for substring in ["qwen3-0.6b", "qwen3-1.7b"]): | |
| system_prompt = system_prompt+" /no_think" | |
| # The 'history' variable from Gradio contains the entire conversation. | |
| # We prepend the system prompt to this history to form the final payload. | |
| messages = [{"role": "system", "content": system_prompt}] + history | |
| # Add a placeholder for the assistant's response to the UI history. | |
| # This creates the space where the streamed response will be displayed. | |
| history.append({"role": "assistant", "content": ""}) | |
| # Stream the response from the Ollama API using the currently selected model | |
| response_stream = ollama.chat( | |
| model=current_selected_model, # Use the dynamically selected model | |
| messages=messages, | |
| stream=True | |
| ) | |
| # Iterate through the stream, updating the placeholder with each new chunk. | |
| for chunk in response_stream: | |
| if chunk['message']['content']: | |
| history[-1]['content'] += chunk['message']['content'] | |
| # Yield the updated history to the chatbot for a real-time effect. | |
| yield history | |
| # This function handles the user's submission. | |
| def user_submit(history, user_message): | |
| """ | |
| Adds the user's message to the chat history and clears the input box. | |
| This prepares the state for the main 'respond' function. | |
| """ | |
| return history + [{"role": "user", "content": user_message}], "" | |
| # Gradio Event Wiring | |
| msg.submit( | |
| user_submit, | |
| inputs=[chatbot, msg], | |
| outputs=[chatbot, msg], | |
| queue=False | |
| ).then( | |
| respond, | |
| inputs=[chatbot, system_prompt_textbox, stream_checkbox, selected_model_label, system_prompt_selector, use_custom_prompt_checkbox], # Pass new inputs | |
| outputs=[chatbot] | |
| ) | |
| # Launch the Gradio interface | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |