Spaces:

Adityak204
/

JuliusCaesarGPT

Sleeping

App Files Files Community

Adityak204 commited on Jan 17

Commit

47d6804

1 Parent(s): 359189c

Upload file

Browse files

Files changed (8) hide show

.gitignore +67 -0
app.py +113 -0
docs/itos.json +1 -0
docs/sample_prediction.png +0 -0
docs/stoi.json +1 -0
requirements.txt +3 -0
src/__init__.py +0 -0
src/gpt_base.py +151 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,67 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+env/
+ENV/
+.env
+.venv
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.project
+.pydevproject
+.settings
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb_checkpoints/
+# PyTorch
+*.pth
+*.pt
+*.pkl
+# Logs and databases
+*.log
+*.sqlite
+*.db
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Project specific
+runs/
+checkpoints/
+outputs/
+logs/
+lightning_logs/

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import streamlit as st
+import torch
+from pathlib import Path
+import math
+from dataclasses import dataclass
+import torch.nn as nn
+import torch.nn.functional as F
+from src.gpt_base import GPT
+import json
+from huggingface_hub import hf_hub_download
+# Config class for model parameters
+@dataclass
+class GPTConfig:
+    block_size: int = 1024  # max sequence length
+    vocab_size: int = 65
+    num_layer: int = 12  # number of layers
+    num_head: int = 12  # number of heads
+    emb_dim: int = 768  # embedding dimension
+    dropout: float = 0.1  # dropout rate
+# Copy all the model classes (GPT, MultiHeadAttention, FeedForward, TransformerBlock) here
+# [Previous model code goes here]
+# Load stoi and itos from docs
+with open("docs/stoi.json") as f:
+    stoi = json.load(f)
+with open("docs/itos.json") as f:
+    itos = json.load(f)
+# Encoding/Decoding functions
+def encode(s):
+    return [stoi[c] for c in s]
+def decode(l):
+    return "".join([itos[i] for i in l])
+def predict_next_word(text, model, seq_len=50):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    for _ in range(seq_len):
+        xb = torch.tensor(encode(text)).unsqueeze(0).to(device)
+        yb = model(xb)
+        next_word = yb[0, -1].argmax().item()
+        text += itos[str(next_word)]
+    return text
+# Streamlit app
+st.title("GPT Text Generation")
+# Add some usage instructions
+st.markdown(
+    """
+### How to use:
+1. Enter your text prompt in the text box above
+2. Adjust the sequence length using the slider
+3. Click 'Generate Text' to see the model's output
+Note: Longer sequence lengths will take more time to generate.
+"""
+)
+# Input text box
+input_text = st.text_area("Enter your text prompt:", height=100)
+# Sequence length slider
+seq_length = st.slider(
+    "Select sequence length for prediction:",
+    min_value=50,
+    max_value=500,
+    value=200,
+    step=50,
+)
+# Model loading and prediction
+if st.button("Generate Text"):
+    if input_text:
+        try:
+            # Initialize model
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            config = GPTConfig()
+            model = GPT(config)
+            model = model.to(device)
+            # Load checkpoint
+            # checkpoint_path = "/Users/aditya/Documents/self_learning/ERA V3/week 12/model artifacts/gpt_model_and_loss.pth"
+            model_repo = "Adityak204/JuliusCaesarGPT"
+            model_filename = "gpt_model_and_loss.pth"
+            checkpoint_path = hf_hub_download(
+                repo_id=model_repo, filename=model_filename
+            )
+            with st.spinner("Loading model and generating text..."):
+                _dict = torch.load(checkpoint_path, map_location=device)
+                model_state_dict = _dict["model_state_dict"]
+                model.load_state_dict(model_state_dict)
+                # Generate text
+                generated_text = predict_next_word(input_text, model, seq_length)
+                # Display results
+                st.subheader("Generated Text:")
+                st.write(generated_text)
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+    else:
+        st.warning("Please enter some text first!")

docs/itos.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"0": "\n", "1": " ", "2": "!", "3": "$", "4": "&", "5": "'", "6": ",", "7": "-", "8": ".", "9": "3", "10": ":", "11": ";", "12": "?", "13": "A", "14": "B", "15": "C", "16": "D", "17": "E", "18": "F", "19": "G", "20": "H", "21": "I", "22": "J", "23": "K", "24": "L", "25": "M", "26": "N", "27": "O", "28": "P", "29": "Q", "30": "R", "31": "S", "32": "T", "33": "U", "34": "V", "35": "W", "36": "X", "37": "Y", "38": "Z", "39": "a", "40": "b", "41": "c", "42": "d", "43": "e", "44": "f", "45": "g", "46": "h", "47": "i", "48": "j", "49": "k", "50": "l", "51": "m", "52": "n", "53": "o", "54": "p", "55": "q", "56": "r", "57": "s", "58": "t", "59": "u", "60": "v", "61": "w", "62": "x", "63": "y", "64": "z"}

docs/sample_prediction.png ADDED Viewed

docs/stoi.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"\n": 0, " ": 1, "!": 2, "$": 3, "&": 4, "'": 5, ",": 6, "-": 7, ".": 8, "3": 9, ":": 10, ";": 11, "?": 12, "A": 13, "B": 14, "C": 15, "D": 16, "E": 17, "F": 18, "G": 19, "H": 20, "I": 21, "J": 22, "K": 23, "L": 24, "M": 25, "N": 26, "O": 27, "P": 28, "Q": 29, "R": 30, "S": 31, "T": 32, "U": 33, "V": 34, "W": 35, "X": 36, "Y": 37, "Z": 38, "a": 39, "b": 40, "c": 41, "d": 42, "e": 43, "f": 44, "g": 45, "h": 46, "i": 47, "j": 48, "k": 49, "l": 50, "m": 51, "n": 52, "o": 53, "p": 54, "q": 55, "r": 56, "s": 57, "t": 58, "u": 59, "v": 60, "w": 61, "x": 62, "y": 63, "z": 64}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+json
+streamlit
+torch

src/__init__.py ADDED Viewed

File without changes

src/gpt_base.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from dataclasses import dataclass
+class MultiHeadAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        # Ensure embedding dimension is divisible by number of heads
+        assert config.emb_dim % config.num_head == 0
+        self.n_head = config.num_head
+        self.n_embd = config.emb_dim
+        self.head_size = config.emb_dim // config.num_head
+        # Separate projections for Q, K, V instead of a single projection
+        self.q_proj = nn.Linear(config.emb_dim, config.emb_dim)
+        self.k_proj = nn.Linear(config.emb_dim, config.emb_dim)
+        self.v_proj = nn.Linear(config.emb_dim, config.emb_dim)
+        self.out_proj = nn.Linear(config.emb_dim, config.emb_dim)
+        self.attn_dropout = nn.Dropout(config.dropout)
+        self.resid_dropout = nn.Dropout(config.dropout)
+        # Causal mask
+        self.register_buffer(
+            "mask",
+            torch.tril(torch.ones(config.block_size, config.block_size)).view(
+                1, 1, config.block_size, config.block_size
+            ),
+        )
+    def forward(self, x):
+        B, T, C = x.size()  # batch, sequence length, embedding dim
+        # Separate projections for Q, K, V
+        q = self.q_proj(x)  # (B, T, C)
+        k = self.k_proj(x)  # (B, T, C)
+        v = self.v_proj(x)  # (B, T, C)
+        # Reshape heads
+        q = q.view(B, T, self.n_head, self.head_size).transpose(1, 2)  # (B, nh, T, hs)
+        k = k.view(B, T, self.n_head, self.head_size).transpose(1, 2)  # (B, nh, T, hs)
+        v = v.view(B, T, self.n_head, self.head_size).transpose(1, 2)  # (B, nh, T, hs)
+        # Compute attention scores
+        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))  # (B, nh, T, T)
+        att = att.masked_fill(self.mask[:, :, :T, :T] == 0, float("-inf"))
+        att = F.softmax(att, dim=-1)
+        att = self.attn_dropout(att)
+        # Apply attention to values
+        y = att @ v  # (B, nh, T, hs)
+        # Reshape and project output
+        y = y.transpose(1, 2).contiguous().view(B, T, C)  # (B, T, C)
+        y = self.out_proj(y)
+        y = self.resid_dropout(y)
+        return y
+class FeedForward(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.c_fc = nn.Linear(config.emb_dim, 4 * config.emb_dim)
+        self.c_proj = nn.Linear(4 * config.emb_dim, config.emb_dim)
+        self.dropout = nn.Dropout(config.dropout)
+        self.gelu = nn.GELU()
+    def forward(self, x):
+        x = self.gelu(self.c_fc(x))
+        x = self.dropout(self.c_proj(x))
+        return x
+class TransformerBlock(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln_1 = nn.LayerNorm(config.emb_dim)
+        self.ln_2 = nn.LayerNorm(config.emb_dim)
+        self.attn = MultiHeadAttention(config)
+        self.mlp = FeedForward(config)
+    def forward(self, x):
+        x = x + self.attn(self.ln_1(x))
+        x = x + self.mlp(self.ln_2(x))
+        return x
+class GPT(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.transformer = nn.ModuleDict(
+            {
+                "wte": nn.Embedding(config.vocab_size, config.emb_dim),
+                "wpe": nn.Embedding(config.block_size, config.emb_dim),
+                "drop": nn.Dropout(config.dropout),
+                "h": nn.ModuleList(
+                    [TransformerBlock(config) for _ in range(config.num_layer)]
+                ),
+                "ln_f": nn.LayerNorm(config.emb_dim),
+            }
+        )
+        self.lm_head = nn.Linear(config.emb_dim, config.vocab_size, bias=False)
+        # Initialize weights
+        self.apply(self._init_weights)
+        # Tie weights between embedding and final linear layer
+        self.transformer.wte.weight = self.lm_head.weight
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+        elif isinstance(module, nn.LayerNorm):
+            torch.nn.init.ones_(module.weight)
+            torch.nn.init.zeros_(module.bias)
+    def forward(self, idx, targets=None):
+        device = idx.device
+        b, t = idx.size()
+        assert (
+            t <= self.config.block_size
+        ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+        # Get positions
+        pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)  # (1, t)
+        # Get embeddings
+        tok_emb = self.transformer.wte(idx)  # (b, t, n_embd)
+        pos_emb = self.transformer.wpe(pos)  # (1, t, n_embd)
+        x = self.transformer.drop(tok_emb + pos_emb)
+        # Apply transformer blocks
+        for block in self.transformer.h:
+            x = block(x)
+        x = self.transformer.ln_f(x)
+        logits = self.lm_head(x)
+        return logits