Root_Math / app.py
Hrushi02's picture
Update app.py
af57c50 verified
raw
history blame
2.98 kB
import gradio as gr
import os
from threading import Thread
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from peft import PeftModel
# --- 1. Load your Fine-Tuned Model and Tokenizer ---
# Make sure to set your HUGGINGFACEHUB_API_TOKEN in your Space's secrets
api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not api_token:
raise ValueError("❌ ERROR: Hugging Face API token is not set. Please set it in your Space secrets.")
# Define model names
base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
peft_model_name = "Hrushi02/Root_Math"
# Load base model and tokenizer
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto",
token=api_token
)
tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
# Load your fine-tuned PEFT model
model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
print("βœ… Model loaded successfully!")
# --- 2. Rewrite the Respond Function to Use YOUR Model ---
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# Create the chat history format
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
# Prepare for streaming
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# Tokenize the input
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
# Generation arguments
generation_kwargs = dict(
inputs=inputs,
streamer=streamer,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
)
# Start generation in a separate thread
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
# Yield the generated tokens
response = ""
for token in streamer:
response += token
yield response
# --- 3. Launch the Gradio Interface (No Changes Here) ---
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a math assistant. Solve the following math problem.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()