Spaces:

Steph254
/

demo_1

Runtime error

Steph254 commited on Mar 18

Commit

6d76df7

verified ·

1 Parent(s): 47e05d5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import gradio as gr
 import torch
 import json
-from transformers import LlamaTokenizer, AutoModelForCausalLM
 from peft import PeftModel
 # Set Hugging Face Token for Authentication
@@ -14,27 +14,36 @@ if not HUGGINGFACE_TOKEN:
 print("✅ HUGGINGFACE_TOKEN is set.")
 # Model Paths
-QUANTIZED_MODEL = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Directly using quantized model
 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
 # Function to load Llama model (without LoRA)
-def load_llama_model(model_name):
-    print(f"🔄 Loading Model: {model_name}")
-    tokenizer = LlamaTokenizer.from_pretrained(model_name, token=HUGGINGFACE_TOKEN)
-    try:
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            token=HUGGINGFACE_TOKEN,
-            trust_remote_code=True  # Allows loading non-standard model formats
-        )
-    except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        raise ValueError(f"❌ Model {model_name} may not have valid weight files. Check the Hugging Face repository.")
-    print("✅ Model loaded successfully!")
-    return tokenizer, model
 # Load the quantized Llama model
 tokenizer, model = load_llama_model(QUANTIZED_MODEL)

 import gradio as gr
 import torch
 import json
+from transformers import LlamaTokenizer, LlamaForCausalLM
 from peft import PeftModel
 # Set Hugging Face Token for Authentication
 print("✅ HUGGINGFACE_TOKEN is set.")
 # Model Paths
+MODEL_PATH = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Directly using quantized model
 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"
 # Function to load Llama model (without LoRA)
+# Load Model Manually (for Quantized Models)
+def load_quantized_model(model_path):
+    print(f"🔄 Loading Quantized Model: {model_path}")
+    # Load config file manually
+    from transformers import LlamaConfig
+    config = LlamaConfig.from_pretrained(model_path)
+    # Initialize model
+    model = LlamaForCausalLM(config)
+    # Load quantized state_dict
+    checkpoint_path = os.path.join(model_path, "consolidated.00.pth")
+    state_dict = torch.load(checkpoint_path, map_location="cpu")
+    # Load state dict into model
+    model.load_state_dict(state_dict, strict=False)
+    print("✅ Quantized model loaded successfully!")
+    return model
+# Load Tokenizer
+tokenizer = LlamaTokenizer.from_pretrained(MODEL_PATH, token=HUGGINGFACE_TOKEN)
+# Load the model
+model = load_quantized_model(MODEL_PATH)
 # Load the quantized Llama model
 tokenizer, model = load_llama_model(QUANTIZED_MODEL)