Spaces:

Hrushi02
/

Root_Math

Sleeping

App Files Files Community

Hrushi02 commited on Oct 16

Commit

b8c533f

verified ·

1 Parent(s): e02d7d6

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -47

app.py CHANGED Viewed

@@ -5,40 +5,49 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import PeftModel
 """
-🧮 Root_Math fine-tuned model chat app for Hugging Face Spaces.
-Supports both Gradio UI and API access via `/chat`.
 """
-# ✅ Load Hugging Face API token securely
 api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 if not api_token:
     raise ValueError("❌ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
-# ✅ Define model names
-base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
-peft_model_name = "Hrushi02/Root_Math"  # <-- model name stays the same
 # ✅ Load base model
-print("🔄 Loading base model...")
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
-    torch_dtype=torch.float16,
     device_map="auto",
-    use_auth_token=api_token
 )
-# ✅ Load your fine-tuned PEFT adapter
-print("🔄 Loading fine-tuned adapter...")
 model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
 # ✅ Load tokenizer
 print("🔄 Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
-# ✅ Define the response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    """Generate responses from your fine-tuned model."""
     full_prompt = system_message + "\n\n"
     for user_msg, bot_msg in history:
         if user_msg:
@@ -52,26 +61,22 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
             do_sample=True
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the assistant's last message
     if "Assistant:" in response:
         response = response.split("Assistant:")[-1].strip()
-    return response
-# ✅ Create Gradio Chat Interface
-chat_ui = gr.ChatInterface(
-    fn=lambda message, history, system_message, max_tokens, temperature, top_p: (
-        respond(message, history, system_message, max_tokens, temperature, top_p)
-    ),
     additional_inputs=[
         gr.Textbox(value="You are a helpful math assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
@@ -79,30 +84,9 @@ chat_ui = gr.ChatInterface(
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
     title="🧮 Root Math Assistant",
-    description="A fine-tuned math reasoning model by Hrushi02 using Unsloth + PEFT."
 )
-# ✅ Add API endpoint `/chat` (for gradio_client access)
-api_chat = gr.Interface(
-    fn=respond,
-    inputs=[
-        gr.Textbox(label="Message"),
-        gr.State(),  # placeholder for chat history (can be None)
-        gr.Textbox(value="You are a helpful math assistant.", label="System message"),
-        gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-    ],
-    outputs="text",
-    api_name="/chat"
-)
-# ✅ Combine UI + API
-demo = gr.TabbedInterface([chat_ui, api_chat], ["Chat", "API"])
 # ✅ Launch app
 if __name__ == "__main__":
     demo.launch()

 from peft import PeftModel
 """
+🧮 Root_Math fine-tuned model chat app
+Auto-detects GPU/CPU and loads appropriate base model.
 """
+# ✅ Load Hugging Face API token
 api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 if not api_token:
     raise ValueError("❌ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
+# ✅ Detect environment
+use_cuda = torch.cuda.is_available()
+if use_cuda:
+    print("🚀 GPU detected — loading 4-bit quantized model for efficiency.")
+    base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
+    dtype = torch.float16
+else:
+    print("💻 CPU detected — loading full-precision model (no quantization).")
+    base_model_name = "unsloth/qwen2.5-math-7b"
+    dtype = torch.float32
+peft_model_name = "Hrushi02/Root_Math"
 # ✅ Load base model
+print(f"🔄 Loading base model: {base_model_name} ...")
 base_model = AutoModelForCausalLM.from_pretrained(
     base_model_name,
+    torch_dtype=dtype,
     device_map="auto",
+    token=api_token
 )
+# ✅ Load fine-tuned adapter
+print(f"🔄 Loading fine-tuned adapter: {peft_model_name} ...")
 model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
 # ✅ Load tokenizer
 print("🔄 Loading tokenizer...")
 tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
+# ✅ Response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    """Generate a response using Root_Math model."""
     full_prompt = system_message + "\n\n"
     for user_msg, bot_msg in history:
         if user_msg:
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=int(max_tokens),
+            temperature=float(temperature),
+            top_p=float(top_p),
             do_sample=True
         )
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     if "Assistant:" in response:
         response = response.split("Assistant:")[-1].strip()
+    yield response
+# ✅ Gradio UI
+demo = gr.ChatInterface(
+    respond,
     additional_inputs=[
         gr.Textbox(value="You are a helpful math assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
     title="🧮 Root Math Assistant",
+    description="Fine-tuned by Hrushi02 using Unsloth + PEFT for mathematical reasoning."
 )
 # ✅ Launch app
 if __name__ == "__main__":
     demo.launch()