Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import os | |
| # --- 1. Model Downloading (No changes) --- | |
| print("===== Downloading model... =====") | |
| model_path = hf_hub_download( | |
| repo_id="RichardErkhov/openai-community_-_gpt2-xl-gguf", | |
| filename="gpt2-xl.Q6_K.gguf" | |
| ) | |
| print(f"Model downloaded to: {model_path}") | |
| # --- 2. Model Loading (Optimized for HF Space CPU) --- | |
| print("===== Loading model... =====") | |
| n_threads = os.cpu_count() | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=2048, | |
| n_threads=n_threads, | |
| n_gpu_layers=0 | |
| ) | |
| print(f"Model loaded for CPU execution with {n_threads} threads.") | |
| # --- 3. Chat Function with Streaming (No changes) --- | |
| def chat(message, history): | |
| history_prompt = "" | |
| for user_msg, assistant_msg in history: | |
| history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n" | |
| full_prompt = f"""### System: | |
| You are Dolphin 3.0, a helpful and friendly AI assistant. | |
| {history_prompt}### User: | |
| {message} | |
| ### Assistant:""" | |
| stream = llm( | |
| full_prompt, | |
| max_tokens=1024, | |
| stop=["</s>", "### User:", "### Assistant:"], | |
| stream=True | |
| ) | |
| partial_message = "" | |
| for output in stream: | |
| token = output['choices'][0]['text'] | |
| partial_message += token | |
| yield partial_message | |
| # --- 4. The Enhanced Chatbot UI (MAXIMUM COMPATIBILITY) --- | |
| # We are removing ALL custom button arguments to ensure this works on older Gradio versions. | |
| # Gradio will add the default 'Undo' and 'Clear' buttons for us. | |
| iface = gr.ChatInterface( | |
| fn=chat, | |
| title="🐬 Dolphin 3.0 on Hugging Face Spaces", | |
| description="A sleek, streaming chat interface running on a CPU Space.", | |
| chatbot=gr.Chatbot(height=500), | |
| textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7), | |
| theme="soft", | |
| examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]], | |
| cache_examples=False, | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |