Spaces:

olacode55
/

zimble

Sleeping

olacode55 commited on 13 days ago

Commit

388390c

verified ·

1 Parent(s): 0a12030

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,13 +14,13 @@ login(token="hf_" + hf_token)
 base_model = "meta-llama/Llama-2-7b-chat-hf"
 adapter_model = "zimble-llama2-finetunedhybride"
-tokenizer = AutoTokenizer.from_pretrained(merged_model_repo, use_auth_token=hf_token)
 # Enable memory-efficient loading if needed
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = AutoModelForCausalLM.from_pretrained(
-    merged_model_repo,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto",
     low_cpu_mem_usage=True,

 base_model = "meta-llama/Llama-2-7b-chat-hf"
 adapter_model = "zimble-llama2-finetunedhybride"
+tokenizer = AutoTokenizer.from_pretrained(adapter_model, use_auth_token=hf_token)
 # Enable memory-efficient loading if needed
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = AutoModelForCausalLM.from_pretrained(
+    adapter_model,
     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     device_map="auto",
     low_cpu_mem_usage=True,