Hrushi02 commited on
Commit
b8c533f
ยท
verified ยท
1 Parent(s): e02d7d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -47
app.py CHANGED
@@ -5,40 +5,49 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from peft import PeftModel
6
 
7
  """
8
- ๐Ÿงฎ Root_Math fine-tuned model chat app for Hugging Face Spaces.
9
- Supports both Gradio UI and API access via `/chat`.
10
  """
11
 
12
- # โœ… Load Hugging Face API token securely
13
  api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
14
  if not api_token:
15
  raise ValueError("โŒ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
16
 
17
- # โœ… Define model names
18
- base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
19
- peft_model_name = "Hrushi02/Root_Math" # <-- model name stays the same
 
 
 
 
 
 
 
 
 
 
20
 
21
  # โœ… Load base model
22
- print("๐Ÿ”„ Loading base model...")
23
  base_model = AutoModelForCausalLM.from_pretrained(
24
  base_model_name,
25
- torch_dtype=torch.float16,
26
  device_map="auto",
27
- use_auth_token=api_token
28
  )
29
 
30
- # โœ… Load your fine-tuned PEFT adapter
31
- print("๐Ÿ”„ Loading fine-tuned adapter...")
32
  model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
33
 
34
  # โœ… Load tokenizer
35
  print("๐Ÿ”„ Loading tokenizer...")
36
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
37
 
38
-
39
- # โœ… Define the response function
40
  def respond(message, history, system_message, max_tokens, temperature, top_p):
41
- """Generate responses from your fine-tuned model."""
42
  full_prompt = system_message + "\n\n"
43
  for user_msg, bot_msg in history:
44
  if user_msg:
@@ -52,26 +61,22 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
52
  with torch.no_grad():
53
  outputs = model.generate(
54
  **inputs,
55
- max_new_tokens=max_tokens,
56
- temperature=temperature,
57
- top_p=top_p,
58
  do_sample=True
59
  )
60
 
61
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
62
-
63
- # Extract only the assistant's last message
64
  if "Assistant:" in response:
65
  response = response.split("Assistant:")[-1].strip()
66
 
67
- return response
68
 
69
 
70
- # โœ… Create Gradio Chat Interface
71
- chat_ui = gr.ChatInterface(
72
- fn=lambda message, history, system_message, max_tokens, temperature, top_p: (
73
- respond(message, history, system_message, max_tokens, temperature, top_p)
74
- ),
75
  additional_inputs=[
76
  gr.Textbox(value="You are a helpful math assistant.", label="System message"),
77
  gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
@@ -79,30 +84,9 @@ chat_ui = gr.ChatInterface(
79
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
80
  ],
81
  title="๐Ÿงฎ Root Math Assistant",
82
- description="A fine-tuned math reasoning model by Hrushi02 using Unsloth + PEFT."
83
  )
84
 
85
-
86
- # โœ… Add API endpoint `/chat` (for gradio_client access)
87
- api_chat = gr.Interface(
88
- fn=respond,
89
- inputs=[
90
- gr.Textbox(label="Message"),
91
- gr.State(), # placeholder for chat history (can be None)
92
- gr.Textbox(value="You are a helpful math assistant.", label="System message"),
93
- gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
94
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
95
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
96
- ],
97
- outputs="text",
98
- api_name="/chat"
99
- )
100
-
101
-
102
- # โœ… Combine UI + API
103
- demo = gr.TabbedInterface([chat_ui, api_chat], ["Chat", "API"])
104
-
105
-
106
  # โœ… Launch app
107
  if __name__ == "__main__":
108
  demo.launch()
 
5
  from peft import PeftModel
6
 
7
  """
8
+ ๐Ÿงฎ Root_Math fine-tuned model chat app
9
+ Auto-detects GPU/CPU and loads appropriate base model.
10
  """
11
 
12
+ # โœ… Load Hugging Face API token
13
  api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
14
  if not api_token:
15
  raise ValueError("โŒ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
16
 
17
+ # โœ… Detect environment
18
+ use_cuda = torch.cuda.is_available()
19
+
20
+ if use_cuda:
21
+ print("๐Ÿš€ GPU detected โ€” loading 4-bit quantized model for efficiency.")
22
+ base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
23
+ dtype = torch.float16
24
+ else:
25
+ print("๐Ÿ’ป CPU detected โ€” loading full-precision model (no quantization).")
26
+ base_model_name = "unsloth/qwen2.5-math-7b"
27
+ dtype = torch.float32
28
+
29
+ peft_model_name = "Hrushi02/Root_Math"
30
 
31
  # โœ… Load base model
32
+ print(f"๐Ÿ”„ Loading base model: {base_model_name} ...")
33
  base_model = AutoModelForCausalLM.from_pretrained(
34
  base_model_name,
35
+ torch_dtype=dtype,
36
  device_map="auto",
37
+ token=api_token
38
  )
39
 
40
+ # โœ… Load fine-tuned adapter
41
+ print(f"๐Ÿ”„ Loading fine-tuned adapter: {peft_model_name} ...")
42
  model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
43
 
44
  # โœ… Load tokenizer
45
  print("๐Ÿ”„ Loading tokenizer...")
46
  tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
47
 
48
+ # โœ… Response function
 
49
  def respond(message, history, system_message, max_tokens, temperature, top_p):
50
+ """Generate a response using Root_Math model."""
51
  full_prompt = system_message + "\n\n"
52
  for user_msg, bot_msg in history:
53
  if user_msg:
 
61
  with torch.no_grad():
62
  outputs = model.generate(
63
  **inputs,
64
+ max_new_tokens=int(max_tokens),
65
+ temperature=float(temperature),
66
+ top_p=float(top_p),
67
  do_sample=True
68
  )
69
 
70
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
71
  if "Assistant:" in response:
72
  response = response.split("Assistant:")[-1].strip()
73
 
74
+ yield response
75
 
76
 
77
+ # โœ… Gradio UI
78
+ demo = gr.ChatInterface(
79
+ respond,
 
 
80
  additional_inputs=[
81
  gr.Textbox(value="You are a helpful math assistant.", label="System message"),
82
  gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"),
 
84
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
85
  ],
86
  title="๐Ÿงฎ Root Math Assistant",
87
+ description="Fine-tuned by Hrushi02 using Unsloth + PEFT for mathematical reasoning."
88
  )
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # โœ… Launch app
91
  if __name__ == "__main__":
92
  demo.launch()