Spaces:
Runtime error
Runtime error
File size: 2,068 Bytes
ba312c3 9169ab9 84d2105 633a2af 9169ab9 633a2af d8f4c07 9169ab9 47328f3 9169ab9 d8f4c07 ba312c3 633a2af d8f4c07 633a2af 84d2105 633a2af b57471d 84d2105 633a2af d039ddb 633a2af de3fc99 1cc005a de3fc99 633a2af 4e3600c 1cc005a b57471d 633a2af 1cc005a 633a2af 6c24cd8 b57471d 633a2af 1cc005a b57471d 633a2af 1cc005a 633a2af 1cc005a 4e3600c 633a2af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import os
# --- 1. Model Downloading (No changes) ---
print("===== Downloading model... =====")
model_path = hf_hub_download(
repo_id="RichardErkhov/openai-community_-_gpt2-xl-gguf",
filename="gpt2-xl.Q6_K.gguf"
)
print(f"Model downloaded to: {model_path}")
# --- 2. Model Loading (Optimized for HF Space CPU) ---
print("===== Loading model... =====")
n_threads = os.cpu_count()
llm = Llama(
model_path=model_path,
n_ctx=2048,
n_threads=n_threads,
n_gpu_layers=0
)
print(f"Model loaded for CPU execution with {n_threads} threads.")
# --- 3. Chat Function with Streaming (No changes) ---
def chat(message, history):
history_prompt = ""
for user_msg, assistant_msg in history:
history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
full_prompt = f"""### System:
You are Dolphin 3.0, a helpful and friendly AI assistant.
{history_prompt}### User:
{message}
### Assistant:"""
stream = llm(
full_prompt,
max_tokens=1024,
stop=["</s>", "### User:", "### Assistant:"],
stream=True
)
partial_message = ""
for output in stream:
token = output['choices'][0]['text']
partial_message += token
yield partial_message
# --- 4. The Enhanced Chatbot UI (MAXIMUM COMPATIBILITY) ---
# We are removing ALL custom button arguments to ensure this works on older Gradio versions.
# Gradio will add the default 'Undo' and 'Clear' buttons for us.
iface = gr.ChatInterface(
fn=chat,
title="🐬 Dolphin 3.0 on Hugging Face Spaces",
description="A sleek, streaming chat interface running on a CPU Space.",
chatbot=gr.Chatbot(height=500),
textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
theme="soft",
examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
cache_examples=False,
)
if __name__ == "__main__":
iface.launch() |