Spaces:

Hrushi02
/

Root_Math

Sleeping

App Files Files Community

Hrushi02 commited on Oct 16

Commit

ea305a2

verified ·

1 Parent(s): b8c533f

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -17

app.py CHANGED Viewed

@@ -2,10 +2,9 @@ import os
 import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
 """
-🧮 Root_Math fine-tuned model chat app
 Auto-detects GPU/CPU and loads appropriate base model.
 """
@@ -18,32 +17,27 @@ if not api_token:
 use_cuda = torch.cuda.is_available()
 if use_cuda:
-    print("🚀 GPU detected — loading 4-bit quantized model for efficiency.")
-    base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
     dtype = torch.float16
 else:
-    print("💻 CPU detected — loading full-precision model (no quantization).")
-    base_model_name = "unsloth/qwen2.5-math-7b"
     dtype = torch.float32
-peft_model_name = "Hrushi02/Root_Math"
-# ✅ Load base model
-print(f"🔄 Loading base model: {base_model_name} ...")
-base_model = AutoModelForCausalLM.from_pretrained(
-    base_model_name,
     torch_dtype=dtype,
     device_map="auto",
     token=api_token
 )
-# ✅ Load fine-tuned adapter
-print(f"🔄 Loading fine-tuned adapter: {peft_model_name} ...")
-model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
 # ✅ Load tokenizer
 print("🔄 Loading tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
 # ✅ Response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
@@ -84,7 +78,7 @@ demo = gr.ChatInterface(
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
     title="🧮 Root Math Assistant",
-    description="Fine-tuned by Hrushi02 using Unsloth + PEFT for mathematical reasoning."
 )
 # ✅ Launch app

 import torch
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 """
+🧮 Root_Math full model chat app
 Auto-detects GPU/CPU and loads appropriate base model.
 """
 use_cuda = torch.cuda.is_available()
 if use_cuda:
+    print("🚀 GPU detected — using float16 model for efficiency.")
     dtype = torch.float16
 else:
+    print("💻 CPU detected — using float32 model.")
     dtype = torch.float32
+# ✅ Load your full fine-tuned model directly
+model_name = "Hrushi02/Root_Math"  # Your repo
+print(f"🔄 Loading full model: {model_name} ...")
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
     torch_dtype=dtype,
     device_map="auto",
     token=api_token
 )
 # ✅ Load tokenizer
 print("🔄 Loading tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
 # ✅ Response function
 def respond(message, history, system_message, max_tokens, temperature, top_p):
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
     title="🧮 Root Math Assistant",
+    description="Fine-tuned by Hrushi02 for mathematical reasoning."
 )
 # ✅ Launch app