Spaces:

tomg-group-umd
/

DynaGuard

Running on Zero

App Files Files Community

taruschirag commited on Sep 3

Commit

02b6bc9

verified ·

1 Parent(s): e101ec5

Update app.py

Browse files

Changed it to 2 step process to deal with Meta error.

Files changed (1) hide show

app.py +31 -10

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import torch
 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
 # --- Constants ---
@@ -76,17 +77,37 @@ class ModelWrapper:
         self.tokenizer.pad_token_id = self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
         print(f"Loading model: {model_name}...")
-        # We can now use the same, simpler loading logic for all models.
-        # The `from_pretrained` method will handle downloading from the Hub
-        # and applying the device_map.
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            device_map="auto",
-            torch_dtype=torch.bfloat16,
-            offload_folder="offload"  # Keep this for memory management
-        ).eval()
         print(f"Model {model_name} loaded successfully.")
     def get_message_template(self, system_content=None, user_content=None, assistant_content=None):

 import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 from accelerate import init_empty_weights, load_checkpoint_and_dispatch
+from huggingface_hub import snapshot_download
 # --- Constants ---
         self.tokenizer.pad_token_id = self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
         print(f"Loading model: {model_name}...")
+        # For large models, we use a more robust, memory-safe loading method.
+        # This explicitly handles the "meta tensor" device placement.
+        if "8b" in model_name.lower() or "4b" in model_name.lower():
+            # Step 1: Download the model files and get the local path.
+            print(f"Ensuring model checkpoint is available locally for {model_name}...")
+            checkpoint_path = snapshot_download(repo_id=model_name)
+            print(f"Checkpoint is at: {checkpoint_path}")
+            # Step 2: Create the model's "skeleton" on the meta device (no memory used).
+            config = AutoConfig.from_pretrained(model_name, torch_dtype=torch.bfloat16)
+            with init_empty_weights():
+                model_empty = AutoModelForCausalLM.from_config(config)
+            # Step 3: Load the real weights from the local files directly onto the GPU(s).
+            # This function is designed to handle the meta->device transition correctly.
+            self.model = load_checkpoint_and_dispatch(
+                model_empty,
+                checkpoint_path,
+                device_map="auto",
+                offload_folder="offload"
+            ).eval()
+        else: # For smaller models, the simpler method is fine.
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                device_map="auto",
+                torch_dtype=torch.bfloat16
+            ).eval()
         print(f"Model {model_name} loaded successfully.")
     def get_message_template(self, system_content=None, user_content=None, assistant_content=None):