Spaces:
Sleeping
Sleeping
File size: 3,333 Bytes
9780e52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
pipe = None
stop_inference = False
# Fancy styling
fancy_css = """
#main-container {
background-color: #f0f0f0;
font-family: 'Arial', sans-serif;
}
.gradio-container {
max-width: 700px;
margin: 0 auto;
padding: 20px;
background: white;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
border-radius: 10px;
}
.gr-button {
background-color: #4CAF50;
color: white;
border: none;
border-radius: 5px;
padding: 10px 20px;
cursor: pointer;
transition: background-color 0.3s ease;
}
.gr-button:hover {
background-color: #45a049;
}
.gr-slider input {
color: #4CAF50;
}
.gr-chat {
font-size: 16px;
}
#title {
text-align: center;
font-size: 2em;
margin-bottom: 20px;
color: #333;
}
"""
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken,
use_local_model: bool,
):
global pipe
# Build messages from history
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
response = ""
if use_local_model:
print("[MODE] local")
from transformers import pipeline
import torch
if pipe is None:
pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct")
# Build prompt as plain text
prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
outputs = pipe(
prompt,
max_new_tokens=max_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
)
response = outputs[0]["generated_text"][len(prompt):]
yield response.strip()
else:
print("[MODE] api")
if hf_token is None or not getattr(hf_token, "token", None):
yield "⚠️ Please log in with your Hugging Face account first."
return
client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
for chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
choices = chunk.choices
token = ""
if len(choices) and choices[0].delta.content:
token = choices[0].delta.content
response += token
yield response
chatbot = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
gr.Checkbox(label="Use Local Model", value=False),
],
type="messages",
)
with gr.Blocks(css=fancy_css) as demo:
with gr.Row():
gr.Markdown("<h1 style='text-align: center;'>🌟 Fancy AI Chatbot 🌟</h1>")
gr.LoginButton()
chatbot.render()
if __name__ == "__main__":
demo.launch()
|