Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse filesfixing the model loading bug
app.py
CHANGED
|
@@ -94,20 +94,17 @@ class ModelWrapper:
|
|
| 94 |
self.tokenizer.pad_token_id = self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
|
| 95 |
|
| 96 |
print(f"Loading model: {model_name}...")
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
else:
|
| 109 |
-
self.model = AutoModelForCausalLM.from_pretrained(
|
| 110 |
-
model_name, device_map="auto", torch_dtype=torch.bfloat16).eval()
|
| 111 |
print(f"Model {model_name} loaded successfully.")
|
| 112 |
|
| 113 |
def get_message_template(self, system_content=None, user_content=None, assistant_content=None):
|
|
|
|
| 94 |
self.tokenizer.pad_token_id = self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
|
| 95 |
|
| 96 |
print(f"Loading model: {model_name}...")
|
| 97 |
+
|
| 98 |
+
# We can now use the same, simpler loading logic for all models.
|
| 99 |
+
# The `from_pretrained` method will handle downloading from the Hub
|
| 100 |
+
# and applying the device_map.
|
| 101 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 102 |
+
model_name,
|
| 103 |
+
device_map="auto",
|
| 104 |
+
torch_dtype=torch.bfloat16,
|
| 105 |
+
offload_folder="offload" # Keep this for memory management
|
| 106 |
+
).eval()
|
| 107 |
+
|
|
|
|
|
|
|
|
|
|
| 108 |
print(f"Model {model_name} loaded successfully.")
|
| 109 |
|
| 110 |
def get_message_template(self, system_content=None, user_content=None, assistant_content=None):
|