GPT-2-with_gpu

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

e60730b

verified ·

1 Parent(s): 25893d0

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -16

app.py CHANGED Viewed

@@ -121,14 +121,12 @@ class GPT(nn.Module):
         return logits, loss
-# Load the model
 def load_model(model_path):
     config = GPTConfig()
     model = GPT(config)
-    checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
-    print("Checkpoint keys:", checkpoint.keys())  # Debug print
     if 'model_state_dict' in checkpoint:
         model.load_state_dict(checkpoint['model_state_dict'])
@@ -136,24 +134,17 @@ def load_model(model_path):
         model.load_state_dict(checkpoint)
     model.eval()
     return model
 # Load the model
 model = load_model('gpt_model.pth')  # Replace with the actual path to your .pt file
 enc = tiktoken.get_encoding('gpt2')
-# Improved text generation function
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-import tiktoken
-import gradio as gr
-# [Your existing model code remains unchanged]
-# Modify the generate_text function to be asynchronous
 async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
-    input_ids = torch.tensor(enc.encode(prompt)).unsqueeze(0)
     generated = []
     with torch.no_grad():
@@ -179,7 +170,9 @@ async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
     if len(generated) == max_length:
         yield "... (output truncated due to length)"
-# Modify the gradio_generate function to be asynchronous
 async def gradio_generate(prompt, max_length, temperature, top_k):
     output = ""
     async for token in generate_text(prompt, max_length, temperature, top_k):

         return logits, loss
+@spaces.GPU
 def load_model(model_path):
     config = GPTConfig()
     model = GPT(config)
+    checkpoint = torch.load(model_path, map_location=torch.device('cuda'))
     if 'model_state_dict' in checkpoint:
         model.load_state_dict(checkpoint['model_state_dict'])
         model.load_state_dict(checkpoint)
     model.eval()
+    model.to('cuda')
     return model
 # Load the model
 model = load_model('gpt_model.pth')  # Replace with the actual path to your .pt file
 enc = tiktoken.get_encoding('gpt2')
+# Update the generate_text function
+@spaces.GPU(duration=60)  # Adjust duration as needed
 async def generate_text(prompt, max_length=432, temperature=0.8, top_k=40):
+    input_ids = torch.tensor(enc.encode(prompt)).unsqueeze(0).cuda()
     generated = []
     with torch.no_grad():
     if len(generated) == max_length:
         yield "... (output truncated due to length)"
+# Update the gradio_generate function
+@spaces.GPU(duration=60)  # Adjust duration as needed
 async def gradio_generate(prompt, max_length, temperature, top_k):
     output = ""
     async for token in generate_text(prompt, max_length, temperature, top_k):