File size: 2,068 Bytes
ba312c3
9169ab9
84d2105
633a2af
9169ab9
633a2af
d8f4c07
9169ab9
47328f3
 
9169ab9
d8f4c07
ba312c3
633a2af
d8f4c07
633a2af
84d2105
 
633a2af
 
b57471d
84d2105
633a2af
 
 
d039ddb
633a2af
 
 
 
de3fc99
1cc005a
 
de3fc99
633a2af
 
4e3600c
1cc005a
b57471d
633a2af
1cc005a
633a2af
 
 
6c24cd8
b57471d
633a2af
 
 
 
 
1cc005a
b57471d
 
 
633a2af
1cc005a
633a2af
 
 
 
 
 
 
1cc005a
4e3600c
633a2af
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import os

# --- 1. Model Downloading (No changes) ---
print("===== Downloading model... =====")
model_path = hf_hub_download(
    repo_id="RichardErkhov/openai-community_-_gpt2-xl-gguf",
    filename="gpt2-xl.Q6_K.gguf"
)
print(f"Model downloaded to: {model_path}")

# --- 2. Model Loading (Optimized for HF Space CPU) ---
print("===== Loading model... =====")
n_threads = os.cpu_count()
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=n_threads,
    n_gpu_layers=0
)
print(f"Model loaded for CPU execution with {n_threads} threads.")


# --- 3. Chat Function with Streaming (No changes) ---
def chat(message, history):
    history_prompt = ""
    for user_msg, assistant_msg in history:
        history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"

    full_prompt = f"""### System:
You are Dolphin 3.0, a helpful and friendly AI assistant.

{history_prompt}### User:
{message}

### Assistant:"""

    stream = llm(
        full_prompt,
        max_tokens=1024,
        stop=["</s>", "### User:", "### Assistant:"],
        stream=True
    )

    partial_message = ""
    for output in stream:
        token = output['choices'][0]['text']
        partial_message += token
        yield partial_message

# --- 4. The Enhanced Chatbot UI (MAXIMUM COMPATIBILITY) ---
# We are removing ALL custom button arguments to ensure this works on older Gradio versions.
# Gradio will add the default 'Undo' and 'Clear' buttons for us.
iface = gr.ChatInterface(
    fn=chat,
    title="🐬 Dolphin 3.0 on Hugging Face Spaces",
    description="A sleek, streaming chat interface running on a CPU Space.",
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
    theme="soft",
    examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
    cache_examples=False,
)


if __name__ == "__main__":
    iface.launch()