Spaces:

pro-grammer
/

StoryCrafterLLM

Running

App Files Files Community

pro-grammer commited on Oct 2, 2024

Commit

66e9630

verified ·

1 Parent(s): 8e09dca

Update model.py

Browse files

Files changed (1) hide show

model.py +18 -10

model.py CHANGED Viewed

@@ -114,16 +114,19 @@ class GPTLanguageModel(nn.Module):
     @torch.no_grad()
     def generate(self, idx, max_new_tokens, temperature=0.8, top_k=50, eos_token=None, max_consecutive_exclamations=2):
-        generated_tokens = []
         consecutive_exclamations = 0
         for _ in range(max_new_tokens):
-            # Forward pass
-            logits, _ = self(idx)
             logits = logits[:, -1, :] / temperature
-            # Top-k sampling
-            top_k_logits, top_k_indices = torch.topk(logits, top_k)
             probs = F.softmax(top_k_logits, dim=-1)
             idx_next = top_k_indices[0, torch.multinomial(probs[0], num_samples=1)]
@@ -135,13 +138,13 @@ class GPTLanguageModel(nn.Module):
             else:
                 consecutive_exclamations = 0
-            generated_tokens.append(idx_next.item())
             idx = torch.cat((idx, idx_next.unsqueeze(0).unsqueeze(1)), dim=1)
             # Stop if EOS token is generated
             if eos_token is not None and idx_next.item() == eos_token:
                 break
         return idx
 # Set up the device
@@ -161,8 +164,8 @@ weight_decay = 0.1
 # Create an instance of the model
 model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
-# Load the trained weights
-model.load_state_dict(torch.load("model_weights.pth", map_location=device))
 # Set the model to evaluation mode
 model.eval()
@@ -178,6 +181,11 @@ max_new_tokens = 300
 temperature = 0.6  # Slightly lower temperature
 top_k = 40  # Adjust as needed
 # Generate text
 generated_text_idx = model.generate(context, max_new_tokens, temperature=temperature, top_k=top_k, eos_token=eos_token, max_consecutive_exclamations=2)
 generated_text = enc.decode(generated_text_idx[0].tolist())

     @torch.no_grad()
     def generate(self, idx, max_new_tokens, temperature=0.8, top_k=50, eos_token=None, max_consecutive_exclamations=2):
         consecutive_exclamations = 0
         for _ in range(max_new_tokens):
+            # Crop idx to the last block_size tokens if it exceeds block_size
+            idx_cond = idx[:, -self.block_size:]
+            # Get the predictions
+            logits, _ = self(idx_cond)
+            # Focus only on the last time step
             logits = logits[:, -1, :] / temperature
+            # Apply top-k sampling
+            top_k_logits, top_k_indices = torch.topk(logits, min(top_k, logits.size(-1)))
             probs = F.softmax(top_k_logits, dim=-1)
             idx_next = top_k_indices[0, torch.multinomial(probs[0], num_samples=1)]
             else:
                 consecutive_exclamations = 0
+            # Append sampled index to the running sequence
             idx = torch.cat((idx, idx_next.unsqueeze(0).unsqueeze(1)), dim=1)
             # Stop if EOS token is generated
             if eos_token is not None and idx_next.item() == eos_token:
                 break
         return idx
 # Set up the device
 # Create an instance of the model
 model = GPTLanguageModel(vocab_size, n_embd, block_size, n_layers, n_heads).to(device)
+# Load the model (with weights_only=True for security)
+model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
 # Set the model to evaluation mode
 model.eval()
 temperature = 0.6  # Slightly lower temperature
 top_k = 40  # Adjust as needed
 # Generate text
+# Load the model (with weights_only=True for security)
+model.load_state_dict(torch.load("model_weights.pth", map_location=device, weights_only=True))
+# Generate text
+context = torch.tensor([enc.encode("Once upon a time there was a knight called Bob and he rode into his greatest battle yet")], dtype=torch.long, device=device)
 generated_text_idx = model.generate(context, max_new_tokens, temperature=temperature, top_k=top_k, eos_token=eos_token, max_consecutive_exclamations=2)
 generated_text = enc.decode(generated_text_idx[0].tolist())