File size: 3,662 Bytes
1513153 7ee65b2 1513153 646b139 e02d7d6 646b139 1513153 7ee65b2 1513153 7ee65b2 1513153 e02d7d6 1513153 7ee65b2 1513153 7ee65b2 1513153 7ee65b2 1513153 7ee65b2 1513153 7ee65b2 e02d7d6 7ee65b2 e02d7d6 7ee65b2 e02d7d6 7ee65b2 e02d7d6 7ee65b2 e02d7d6 7ee65b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
"""
๐งฎ Root_Math fine-tuned model chat app for Hugging Face Spaces.
Supports both Gradio UI and API access via `/chat`.
"""
# โ
Load Hugging Face API token securely
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not api_token:
raise ValueError("โ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
# โ
Define model names
base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
peft_model_name = "Hrushi02/Root_Math" # <-- model name stays the same
# โ
Load base model
print("๐ Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
use_auth_token=api_token
)
# โ
Load your fine-tuned PEFT adapter
print("๐ Loading fine-tuned adapter...")
model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
# โ
Load tokenizer
print("๐ Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
# โ
Define the response function
def respond(message, history, system_message, max_tokens, temperature, top_p):
"""Generate responses from your fine-tuned model."""
full_prompt = system_message + "\n\n"
for user_msg, bot_msg in history:
if user_msg:
full_prompt += f"User: {user_msg}\n"
if bot_msg:
full_prompt += f"Assistant: {bot_msg}\n"
full_prompt += f"User: {message}\nAssistant:"
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's last message
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
return response
# โ
Create Gradio Chat Interface
chat_ui = gr.ChatInterface(
fn=lambda message, history, system_message, max_tokens, temperature, top_p: (
respond(message, history, system_message, max_tokens, temperature, top_p)
),
additional_inputs=[
gr.Textbox(value="You are a helpful math assistant.", label="System message"),
gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
title="๐งฎ Root Math Assistant",
description="A fine-tuned math reasoning model by Hrushi02 using Unsloth + PEFT."
)
# โ
Add API endpoint `/chat` (for gradio_client access)
api_chat = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Message"),
gr.State(), # placeholder for chat history (can be None)
gr.Textbox(value="You are a helpful math assistant.", label="System message"),
gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
outputs="text",
api_name="/chat"
)
# โ
Combine UI + API
demo = gr.TabbedInterface([chat_ui, api_chat], ["Chat", "API"])
# โ
Launch app
if __name__ == "__main__":
demo.launch()
|