Hrushi02 commited on
Commit
3dcc254
ยท
verified ยท
1 Parent(s): ea305a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -16
app.py CHANGED
@@ -5,7 +5,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
  """
7
  ๐Ÿงฎ Root_Math full model chat app
8
- Auto-detects GPU/CPU and loads appropriate base model.
9
  """
10
 
11
  # โœ… Load Hugging Face API token
@@ -13,29 +13,23 @@ api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
13
  if not api_token:
14
  raise ValueError("โŒ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
15
 
16
- # โœ… Detect environment
17
- use_cuda = torch.cuda.is_available()
18
 
19
- if use_cuda:
20
- print("๐Ÿš€ GPU detected โ€” using float16 model for efficiency.")
21
- dtype = torch.float16
22
- else:
23
- print("๐Ÿ’ป CPU detected โ€” using float32 model.")
24
- dtype = torch.float32
25
 
26
- # โœ… Load your full fine-tuned model directly
27
- model_name = "Hrushi02/Root_Math" # Your repo
28
-
29
- print(f"๐Ÿ”„ Loading full model: {model_name} ...")
30
  model = AutoModelForCausalLM.from_pretrained(
31
  model_name,
32
  torch_dtype=dtype,
33
  device_map="auto",
34
- token=api_token
35
  )
36
 
37
  # โœ… Load tokenizer
38
- print("๐Ÿ”„ Loading tokenizer...")
39
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
40
 
41
 
@@ -68,7 +62,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
68
  yield response
69
 
70
 
71
- # โœ… Gradio UI
72
  demo = gr.ChatInterface(
73
  respond,
74
  additional_inputs=[
 
5
 
6
  """
7
  ๐Ÿงฎ Root_Math full model chat app
8
+ Supports private/public repo and GPU/CPU auto-detection.
9
  """
10
 
11
  # โœ… Load Hugging Face API token
 
13
  if not api_token:
14
  raise ValueError("โŒ ERROR: Hugging Face API token is not set. Please set it as an environment variable.")
15
 
16
+ # โœ… Correct model repo name (case-sensitive)
17
+ model_name = "Hrushi02/Root_Math" # double-check on HF website
18
 
19
+ # โœ… Device and dtype
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ dtype = torch.float16 if device == "cuda" else torch.float32
22
+ print(f"โšก Loading model on {device.upper()} with dtype={dtype}")
 
 
23
 
24
+ # โœ… Load the model (directly, full fine-tuned)
 
 
 
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
  torch_dtype=dtype,
28
  device_map="auto",
29
+ token=api_token # required for private repo
30
  )
31
 
32
  # โœ… Load tokenizer
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
34
 
35
 
 
62
  yield response
63
 
64
 
65
+ # โœ… Gradio ChatInterface
66
  demo = gr.ChatInterface(
67
  respond,
68
  additional_inputs=[