Root_Math / app.py
Hrushi02's picture
Update app.py
e02d7d6 verified
raw
history blame
3.66 kB
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
"""
๐Ÿงฎ Root_Math fine-tuned model chat app for Hugging Face Spaces.
Supports both Gradio UI and API access via `/chat`.
"""
# โœ… Load Hugging Face API token securely
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not api_token:
raise ValueError("โŒ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
# โœ… Define model names
base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
peft_model_name = "Hrushi02/Root_Math" # <-- model name stays the same
# โœ… Load base model
print("๐Ÿ”„ Loading base model...")
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
use_auth_token=api_token
)
# โœ… Load your fine-tuned PEFT adapter
print("๐Ÿ”„ Loading fine-tuned adapter...")
model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
# โœ… Load tokenizer
print("๐Ÿ”„ Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
# โœ… Define the response function
def respond(message, history, system_message, max_tokens, temperature, top_p):
"""Generate responses from your fine-tuned model."""
full_prompt = system_message + "\n\n"
for user_msg, bot_msg in history:
if user_msg:
full_prompt += f"User: {user_msg}\n"
if bot_msg:
full_prompt += f"Assistant: {bot_msg}\n"
full_prompt += f"User: {message}\nAssistant:"
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's last message
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
return response
# โœ… Create Gradio Chat Interface
chat_ui = gr.ChatInterface(
fn=lambda message, history, system_message, max_tokens, temperature, top_p: (
respond(message, history, system_message, max_tokens, temperature, top_p)
),
additional_inputs=[
gr.Textbox(value="You are a helpful math assistant.", label="System message"),
gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
title="๐Ÿงฎ Root Math Assistant",
description="A fine-tuned math reasoning model by Hrushi02 using Unsloth + PEFT."
)
# โœ… Add API endpoint `/chat` (for gradio_client access)
api_chat = gr.Interface(
fn=respond,
inputs=[
gr.Textbox(label="Message"),
gr.State(), # placeholder for chat history (can be None)
gr.Textbox(value="You are a helpful math assistant.", label="System message"),
gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
outputs="text",
api_name="/chat"
)
# โœ… Combine UI + API
demo = gr.TabbedInterface([chat_ui, api_chat], ["Chat", "API"])
# โœ… Launch app
if __name__ == "__main__":
demo.launch()