Spaces:

Steph254
/

demo_1

Runtime error

App Files Files Community

Steph254 commited on Mar 18

Commit

f2dcdc2

verified ·

1 Parent(s): 79ccf40

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -25

app.py CHANGED Viewed

@@ -14,38 +14,39 @@ QLORA_ADAPTER = "meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8"  # Ensure this
 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"  # Ensure this is correct
 # Function to load Llama model
-def load_llama_model(model_name, is_guard=False):
-    print(f"Loading model: {model_name}")
     try:
         # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            use_fast=False,
-            token=HUGGINGFACE_TOKEN
-        )
-        # Load model
-        model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype=torch.float32,
-            device_map="cpu",  # Ensure it runs on CPU
-            token=HUGGINGFACE_TOKEN
-        )
         # Load QLoRA adapter if applicable
-        if not is_guard and "QLORA" in model_name:
             print("Loading QLoRA adapter...")
-            model = PeftModel.from_pretrained(
-                model,
-                model_name,
-                token=HUGGINGFACE_TOKEN
-            )
             print("Merging LoRA weights...")
-            model = model.merge_and_unload()  # Merge LoRA weights for inference
         return tokenizer, model
     except Exception as e:
-        print(f"Error loading model {model_name}: {e}")
         raise
 # Load Llama 3.2 model

 LLAMA_GUARD_NAME = "meta-llama/Llama-Guard-3-1B-INT4"  # Ensure this is correct
 # Function to load Llama model
+def load_llama_model(model_path, is_guard=False):
+    print(f"Loading model: {model_path}")
     try:
         # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HUGGINGFACE_TOKEN)
+        # Load config first (to avoid shape mismatch errors)
+        config = AutoModelForCausalLM.from_pretrained(BASE_MODEL, config_only=True).config
+        # 🔹 Manually load the `.pth` file
+        state_dict_path = os.path.join(model_path, "consolidated.00.pth")
+        if not os.path.exists(state_dict_path):
+            raise FileNotFoundError(f"Missing model weights: {state_dict_path}")
+        state_dict = torch.load(state_dict_path, map_location="cpu")
+        # Load model from config and manually apply weights
+        model = AutoModelForCausalLM.from_config(config)
+        model.load_state_dict(state_dict, strict=False)  # Use strict=False to allow missing keys
+        model.eval()  # Set to inference mode
         # Load QLoRA adapter if applicable
+        if not is_guard and "QLORA" in model_path:
             print("Loading QLoRA adapter...")
+            model = PeftModel.from_pretrained(model, model_path, token=HUGGINGFACE_TOKEN)
             print("Merging LoRA weights...")
+            model = model.merge_and_unload()
         return tokenizer, model
     except Exception as e:
+        print(f"❌ Error loading model {model_path}: {e}")
         raise
 # Load Llama 3.2 model