olacode55 commited on
Commit
388390c
·
verified ·
1 Parent(s): 0a12030

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -14,13 +14,13 @@ login(token="hf_" + hf_token)
14
  base_model = "meta-llama/Llama-2-7b-chat-hf"
15
  adapter_model = "zimble-llama2-finetunedhybride"
16
 
17
- tokenizer = AutoTokenizer.from_pretrained(merged_model_repo, use_auth_token=hf_token)
18
 
19
  # Enable memory-efficient loading if needed
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
- merged_model_repo,
24
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
  device_map="auto",
26
  low_cpu_mem_usage=True,
 
14
  base_model = "meta-llama/Llama-2-7b-chat-hf"
15
  adapter_model = "zimble-llama2-finetunedhybride"
16
 
17
+ tokenizer = AutoTokenizer.from_pretrained(adapter_model, use_auth_token=hf_token)
18
 
19
  # Enable memory-efficient loading if needed
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
+ adapter_model,
24
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
  device_map="auto",
26
  low_cpu_mem_usage=True,