Update app.py
Browse files
app.py
CHANGED
|
@@ -24,7 +24,7 @@ def load_quantized_model(model_path):
|
|
| 24 |
# Use Hugging Face transformers to load the quantized model directly
|
| 25 |
model = LlamaForCausalLM.from_pretrained(
|
| 26 |
model_path,
|
| 27 |
-
use_auth_token=
|
| 28 |
device_map="auto", # Auto-distributes across CPU/GPU
|
| 29 |
torch_dtype=torch.float16, # Reduces memory usage
|
| 30 |
low_cpu_mem_usage=True # Optimized RAM loading
|
|
|
|
| 24 |
# Use Hugging Face transformers to load the quantized model directly
|
| 25 |
model = LlamaForCausalLM.from_pretrained(
|
| 26 |
model_path,
|
| 27 |
+
use_auth_token=HUGGINGFACE_TOKEN,
|
| 28 |
device_map="auto", # Auto-distributes across CPU/GPU
|
| 29 |
torch_dtype=torch.float16, # Reduces memory usage
|
| 30 |
low_cpu_mem_usage=True # Optimized RAM loading
|