GPT-2-with_gpu

Running

sagar007 commited on Jun 29, 2024

Commit

5ae24e1

verified ·

1 Parent(s): 559a174

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -100,15 +100,27 @@ class GPT(nn.Module):
         return logits, loss
-# Load the trained model
 def load_model(model_path):
     config = GPTConfig()
     model = GPT(config)
-    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
     model.eval()
     return model
-model = load_model('gpt_5000.pt')  # Replace with the actual path to your .pth file
 enc = tiktoken.get_encoding('gpt2')
 def generate_text(prompt, max_length=100, temperature=0.7):

         return logits, loss
+# Updated load_model function
 def load_model(model_path):
     config = GPTConfig()
     model = GPT(config)
+    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
+    print("Checkpoint keys:", checkpoint.keys())  # Debug print
+    if 'model_state_dict' in checkpoint:
+        # If the checkpoint contains a 'model_state_dict' key, use that
+        model.load_state_dict(checkpoint['model_state_dict'])
+    else:
+        # Otherwise, try to load the state dict directly
+        model.load_state_dict(checkpoint)
     model.eval()
     return model
+# Load the trained model
+model = load_model('gpt_5000.pt')  # Replace with the actual path to your .pt file
 enc = tiktoken.get_encoding('gpt2')
 def generate_text(prompt, max_length=100, temperature=0.7):