Spaces:

Hrushi02
/

Root_Math

Sleeping

App Files Files Community

Root_Math / app.py

Hrushi02

Update app.py

646b139 verified about 2 months ago

raw

history blame

3.41 kB

	```python
	import gradio as gr
	import os
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	import torch

	# Load Hugging Face API token securely
	api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")

	if not api_token:
	raise ValueError("❌ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")

	# Define model names
	base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
	peft_model_name = "Hrushi02/Root_Math"

	# Load base model with authentication
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_name,
	torch_dtype=torch.float16,
	device_map="auto",
	use_auth_token=api_token # ✅ Correct
	)

	# Load fine-tuned model
	model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)

	# Ensure pad_token is set
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	# Build messages list
	messages = [{"role": "system", "content": system_message}]

	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	messages.append({"role": "user", "content": message})

	# Apply chat template
	prompt = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)

	# Tokenize input
	inputs = tokenizer([prompt], return_tensors="pt").to(model.device)

	# Generate response with streaming
	with torch.no_grad():
	for new_token in model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	repetition_penalty=1.1,
	streamer=None, # We'll handle streaming manually
	):
	# Decode the new token
	new_token_decoded = tokenizer.decode(new_token[-1:], skip_special_tokens=True)
	yield new_token_decoded

	# Note: For true token-by-token streaming in Gradio, the above yields per-token.
	# If you want full sentence streaming, accumulate and yield periodically, but this matches the original's per-token yield.

	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(value="You are a helpful math assistant specialized in solving equations and finding roots.", label="System message"),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	chatbot=gr.Chatbot(type="messages"), # Modern format to avoid deprecation
	title="Root Math Chatbot",
	description="A fine-tuned Qwen2.5-Math model for solving roots and math problems."
	)

	if __name__ == "__main__":
	demo.launch()
	```