Hrushi02 commited on
Commit
ea305a2
ยท
verified ยท
1 Parent(s): b8c533f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -17
app.py CHANGED
@@ -2,10 +2,9 @@ import os
2
  import torch
3
  import gradio as gr
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
- from peft import PeftModel
6
 
7
  """
8
- ๐Ÿงฎ Root_Math fine-tuned model chat app
9
  Auto-detects GPU/CPU and loads appropriate base model.
10
  """
11
 
@@ -18,32 +17,27 @@ if not api_token:
18
  use_cuda = torch.cuda.is_available()
19
 
20
  if use_cuda:
21
- print("๐Ÿš€ GPU detected โ€” loading 4-bit quantized model for efficiency.")
22
- base_model_name = "unsloth/qwen2.5-math-7b-bnb-4bit"
23
  dtype = torch.float16
24
  else:
25
- print("๐Ÿ’ป CPU detected โ€” loading full-precision model (no quantization).")
26
- base_model_name = "unsloth/qwen2.5-math-7b"
27
  dtype = torch.float32
28
 
29
- peft_model_name = "Hrushi02/Root_Math"
 
30
 
31
- # โœ… Load base model
32
- print(f"๐Ÿ”„ Loading base model: {base_model_name} ...")
33
- base_model = AutoModelForCausalLM.from_pretrained(
34
- base_model_name,
35
  torch_dtype=dtype,
36
  device_map="auto",
37
  token=api_token
38
  )
39
 
40
- # โœ… Load fine-tuned adapter
41
- print(f"๐Ÿ”„ Loading fine-tuned adapter: {peft_model_name} ...")
42
- model = PeftModel.from_pretrained(base_model, peft_model_name, token=api_token)
43
-
44
  # โœ… Load tokenizer
45
  print("๐Ÿ”„ Loading tokenizer...")
46
- tokenizer = AutoTokenizer.from_pretrained(base_model_name, token=api_token)
 
47
 
48
  # โœ… Response function
49
  def respond(message, history, system_message, max_tokens, temperature, top_p):
@@ -84,7 +78,7 @@ demo = gr.ChatInterface(
84
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
85
  ],
86
  title="๐Ÿงฎ Root Math Assistant",
87
- description="Fine-tuned by Hrushi02 using Unsloth + PEFT for mathematical reasoning."
88
  )
89
 
90
  # โœ… Launch app
 
2
  import torch
3
  import gradio as gr
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
5
 
6
  """
7
+ ๐Ÿงฎ Root_Math full model chat app
8
  Auto-detects GPU/CPU and loads appropriate base model.
9
  """
10
 
 
17
  use_cuda = torch.cuda.is_available()
18
 
19
  if use_cuda:
20
+ print("๐Ÿš€ GPU detected โ€” using float16 model for efficiency.")
 
21
  dtype = torch.float16
22
  else:
23
+ print("๐Ÿ’ป CPU detected โ€” using float32 model.")
 
24
  dtype = torch.float32
25
 
26
+ # โœ… Load your full fine-tuned model directly
27
+ model_name = "Hrushi02/Root_Math" # Your repo
28
 
29
+ print(f"๐Ÿ”„ Loading full model: {model_name} ...")
30
+ model = AutoModelForCausalLM.from_pretrained(
31
+ model_name,
 
32
  torch_dtype=dtype,
33
  device_map="auto",
34
  token=api_token
35
  )
36
 
 
 
 
 
37
  # โœ… Load tokenizer
38
  print("๐Ÿ”„ Loading tokenizer...")
39
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
40
+
41
 
42
  # โœ… Response function
43
  def respond(message, history, system_message, max_tokens, temperature, top_p):
 
78
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
79
  ],
80
  title="๐Ÿงฎ Root Math Assistant",
81
+ description="Fine-tuned by Hrushi02 for mathematical reasoning."
82
  )
83
 
84
  # โœ… Launch app