Update app.py
Browse files
app.py
CHANGED
|
@@ -6,14 +6,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 6 |
# === STEP 1: Authenticate with Hugging Face ===
|
| 7 |
# Make sure you set your HF token as an environment variable or paste it here temporarily
|
| 8 |
# For security, prefer environment variable (recommended)
|
| 9 |
-
hf_token = "TLpIICgZJrDCTgVTsaaydFFWbWyGKiGAPa"
|
| 10 |
-
login(token="hf_" + hf_token)
|
| 11 |
|
| 12 |
# === STEP 2: Load base and adapter models ===
|
| 13 |
base_model = "meta-llama/Llama-2-7b-chat-hf"
|
| 14 |
adapter_model = "zimble-llama2-finetunedhybride"
|
| 15 |
|
| 16 |
-
tokenizer = AutoTokenizer.from_pretrained(adapter_model
|
| 17 |
|
| 18 |
# Enable memory-efficient loading if needed
|
| 19 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 6 |
# === STEP 1: Authenticate with Hugging Face ===
|
| 7 |
# Make sure you set your HF token as an environment variable or paste it here temporarily
|
| 8 |
# For security, prefer environment variable (recommended)
|
| 9 |
+
#hf_token = "TLpIICgZJrDCTgVTsaaydFFWbWyGKiGAPa"
|
| 10 |
+
#login(token="hf_" + hf_token)
|
| 11 |
|
| 12 |
# === STEP 2: Load base and adapter models ===
|
| 13 |
base_model = "meta-llama/Llama-2-7b-chat-hf"
|
| 14 |
adapter_model = "zimble-llama2-finetunedhybride"
|
| 15 |
|
| 16 |
+
tokenizer = AutoTokenizer.from_pretrained(adapter_model)
|
| 17 |
|
| 18 |
# Enable memory-efficient loading if needed
|
| 19 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|