Adityak204 commited on
Commit
47d6804
·
1 Parent(s): 359189c

Upload file

Browse files
.gitignore ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ venv/
25
+ env/
26
+ ENV/
27
+ .env
28
+ .venv
29
+
30
+ # IDE
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+ *.swo
35
+ .project
36
+ .pydevproject
37
+ .settings
38
+
39
+ # Jupyter Notebook
40
+ .ipynb_checkpoints
41
+ *.ipynb_checkpoints/
42
+
43
+ # PyTorch
44
+ *.pth
45
+ *.pt
46
+ *.pkl
47
+
48
+ # Logs and databases
49
+ *.log
50
+ *.sqlite
51
+ *.db
52
+
53
+ # OS generated files
54
+ .DS_Store
55
+ .DS_Store?
56
+ ._*
57
+ .Spotlight-V100
58
+ .Trashes
59
+ ehthumbs.db
60
+ Thumbs.db
61
+
62
+ # Project specific
63
+ runs/
64
+ checkpoints/
65
+ outputs/
66
+ logs/
67
+ lightning_logs/
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from pathlib import Path
4
+ import math
5
+ from dataclasses import dataclass
6
+ import torch.nn as nn
7
+ import torch.nn.functional as F
8
+ from src.gpt_base import GPT
9
+ import json
10
+ from huggingface_hub import hf_hub_download
11
+
12
+
13
+ # Config class for model parameters
14
+ @dataclass
15
+ class GPTConfig:
16
+ block_size: int = 1024 # max sequence length
17
+ vocab_size: int = 65
18
+ num_layer: int = 12 # number of layers
19
+ num_head: int = 12 # number of heads
20
+ emb_dim: int = 768 # embedding dimension
21
+ dropout: float = 0.1 # dropout rate
22
+
23
+
24
+ # Copy all the model classes (GPT, MultiHeadAttention, FeedForward, TransformerBlock) here
25
+ # [Previous model code goes here]
26
+
27
+ # Load stoi and itos from docs
28
+ with open("docs/stoi.json") as f:
29
+ stoi = json.load(f)
30
+
31
+ with open("docs/itos.json") as f:
32
+ itos = json.load(f)
33
+
34
+
35
+ # Encoding/Decoding functions
36
+ def encode(s):
37
+ return [stoi[c] for c in s]
38
+
39
+
40
+ def decode(l):
41
+ return "".join([itos[i] for i in l])
42
+
43
+
44
+ def predict_next_word(text, model, seq_len=50):
45
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
46
+ for _ in range(seq_len):
47
+ xb = torch.tensor(encode(text)).unsqueeze(0).to(device)
48
+ yb = model(xb)
49
+ next_word = yb[0, -1].argmax().item()
50
+ text += itos[str(next_word)]
51
+ return text
52
+
53
+
54
+ # Streamlit app
55
+ st.title("GPT Text Generation")
56
+ # Add some usage instructions
57
+ st.markdown(
58
+ """
59
+ ### How to use:
60
+ 1. Enter your text prompt in the text box above
61
+ 2. Adjust the sequence length using the slider
62
+ 3. Click 'Generate Text' to see the model's output
63
+
64
+ Note: Longer sequence lengths will take more time to generate.
65
+ """
66
+ )
67
+
68
+ # Input text box
69
+ input_text = st.text_area("Enter your text prompt:", height=100)
70
+
71
+ # Sequence length slider
72
+ seq_length = st.slider(
73
+ "Select sequence length for prediction:",
74
+ min_value=50,
75
+ max_value=500,
76
+ value=200,
77
+ step=50,
78
+ )
79
+
80
+ # Model loading and prediction
81
+ if st.button("Generate Text"):
82
+ if input_text:
83
+ try:
84
+ # Initialize model
85
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
86
+ config = GPTConfig()
87
+ model = GPT(config)
88
+ model = model.to(device)
89
+
90
+ # Load checkpoint
91
+ # checkpoint_path = "/Users/aditya/Documents/self_learning/ERA V3/week 12/model artifacts/gpt_model_and_loss.pth"
92
+ model_repo = "Adityak204/JuliusCaesarGPT"
93
+ model_filename = "gpt_model_and_loss.pth"
94
+ checkpoint_path = hf_hub_download(
95
+ repo_id=model_repo, filename=model_filename
96
+ )
97
+
98
+ with st.spinner("Loading model and generating text..."):
99
+ _dict = torch.load(checkpoint_path, map_location=device)
100
+ model_state_dict = _dict["model_state_dict"]
101
+ model.load_state_dict(model_state_dict)
102
+
103
+ # Generate text
104
+ generated_text = predict_next_word(input_text, model, seq_length)
105
+
106
+ # Display results
107
+ st.subheader("Generated Text:")
108
+ st.write(generated_text)
109
+
110
+ except Exception as e:
111
+ st.error(f"An error occurred: {str(e)}")
112
+ else:
113
+ st.warning("Please enter some text first!")
docs/itos.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": "\n", "1": " ", "2": "!", "3": "$", "4": "&", "5": "'", "6": ",", "7": "-", "8": ".", "9": "3", "10": ":", "11": ";", "12": "?", "13": "A", "14": "B", "15": "C", "16": "D", "17": "E", "18": "F", "19": "G", "20": "H", "21": "I", "22": "J", "23": "K", "24": "L", "25": "M", "26": "N", "27": "O", "28": "P", "29": "Q", "30": "R", "31": "S", "32": "T", "33": "U", "34": "V", "35": "W", "36": "X", "37": "Y", "38": "Z", "39": "a", "40": "b", "41": "c", "42": "d", "43": "e", "44": "f", "45": "g", "46": "h", "47": "i", "48": "j", "49": "k", "50": "l", "51": "m", "52": "n", "53": "o", "54": "p", "55": "q", "56": "r", "57": "s", "58": "t", "59": "u", "60": "v", "61": "w", "62": "x", "63": "y", "64": "z"}
docs/sample_prediction.png ADDED
docs/stoi.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"\n": 0, " ": 1, "!": 2, "$": 3, "&": 4, "'": 5, ",": 6, "-": 7, ".": 8, "3": 9, ":": 10, ";": 11, "?": 12, "A": 13, "B": 14, "C": 15, "D": 16, "E": 17, "F": 18, "G": 19, "H": 20, "I": 21, "J": 22, "K": 23, "L": 24, "M": 25, "N": 26, "O": 27, "P": 28, "Q": 29, "R": 30, "S": 31, "T": 32, "U": 33, "V": 34, "W": 35, "X": 36, "Y": 37, "Z": 38, "a": 39, "b": 40, "c": 41, "d": 42, "e": 43, "f": 44, "g": 45, "h": 46, "i": 47, "j": 48, "k": 49, "l": 50, "m": 51, "n": 52, "o": 53, "p": 54, "q": 55, "r": 56, "s": 57, "t": 58, "u": 59, "v": 60, "w": 61, "x": 62, "y": 63, "z": 64}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ json
2
+ streamlit
3
+ torch
src/__init__.py ADDED
File without changes
src/gpt_base.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import math
5
+ from dataclasses import dataclass
6
+
7
+
8
+ class MultiHeadAttention(nn.Module):
9
+ def __init__(self, config):
10
+ super().__init__()
11
+ # Ensure embedding dimension is divisible by number of heads
12
+ assert config.emb_dim % config.num_head == 0
13
+
14
+ self.n_head = config.num_head
15
+ self.n_embd = config.emb_dim
16
+ self.head_size = config.emb_dim // config.num_head
17
+
18
+ # Separate projections for Q, K, V instead of a single projection
19
+ self.q_proj = nn.Linear(config.emb_dim, config.emb_dim)
20
+ self.k_proj = nn.Linear(config.emb_dim, config.emb_dim)
21
+ self.v_proj = nn.Linear(config.emb_dim, config.emb_dim)
22
+ self.out_proj = nn.Linear(config.emb_dim, config.emb_dim)
23
+
24
+ self.attn_dropout = nn.Dropout(config.dropout)
25
+ self.resid_dropout = nn.Dropout(config.dropout)
26
+
27
+ # Causal mask
28
+ self.register_buffer(
29
+ "mask",
30
+ torch.tril(torch.ones(config.block_size, config.block_size)).view(
31
+ 1, 1, config.block_size, config.block_size
32
+ ),
33
+ )
34
+
35
+ def forward(self, x):
36
+ B, T, C = x.size() # batch, sequence length, embedding dim
37
+
38
+ # Separate projections for Q, K, V
39
+ q = self.q_proj(x) # (B, T, C)
40
+ k = self.k_proj(x) # (B, T, C)
41
+ v = self.v_proj(x) # (B, T, C)
42
+
43
+ # Reshape heads
44
+ q = q.view(B, T, self.n_head, self.head_size).transpose(1, 2) # (B, nh, T, hs)
45
+ k = k.view(B, T, self.n_head, self.head_size).transpose(1, 2) # (B, nh, T, hs)
46
+ v = v.view(B, T, self.n_head, self.head_size).transpose(1, 2) # (B, nh, T, hs)
47
+
48
+ # Compute attention scores
49
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1))) # (B, nh, T, T)
50
+ att = att.masked_fill(self.mask[:, :, :T, :T] == 0, float("-inf"))
51
+ att = F.softmax(att, dim=-1)
52
+ att = self.attn_dropout(att)
53
+
54
+ # Apply attention to values
55
+ y = att @ v # (B, nh, T, hs)
56
+
57
+ # Reshape and project output
58
+ y = y.transpose(1, 2).contiguous().view(B, T, C) # (B, T, C)
59
+ y = self.out_proj(y)
60
+ y = self.resid_dropout(y)
61
+
62
+ return y
63
+
64
+
65
+ class FeedForward(nn.Module):
66
+ def __init__(self, config):
67
+ super().__init__()
68
+ self.c_fc = nn.Linear(config.emb_dim, 4 * config.emb_dim)
69
+ self.c_proj = nn.Linear(4 * config.emb_dim, config.emb_dim)
70
+ self.dropout = nn.Dropout(config.dropout)
71
+ self.gelu = nn.GELU()
72
+
73
+ def forward(self, x):
74
+ x = self.gelu(self.c_fc(x))
75
+ x = self.dropout(self.c_proj(x))
76
+ return x
77
+
78
+
79
+ class TransformerBlock(nn.Module):
80
+ def __init__(self, config):
81
+ super().__init__()
82
+ self.ln_1 = nn.LayerNorm(config.emb_dim)
83
+ self.ln_2 = nn.LayerNorm(config.emb_dim)
84
+ self.attn = MultiHeadAttention(config)
85
+ self.mlp = FeedForward(config)
86
+
87
+ def forward(self, x):
88
+ x = x + self.attn(self.ln_1(x))
89
+ x = x + self.mlp(self.ln_2(x))
90
+ return x
91
+
92
+
93
+ class GPT(nn.Module):
94
+ def __init__(self, config):
95
+ super().__init__()
96
+ self.config = config
97
+
98
+ self.transformer = nn.ModuleDict(
99
+ {
100
+ "wte": nn.Embedding(config.vocab_size, config.emb_dim),
101
+ "wpe": nn.Embedding(config.block_size, config.emb_dim),
102
+ "drop": nn.Dropout(config.dropout),
103
+ "h": nn.ModuleList(
104
+ [TransformerBlock(config) for _ in range(config.num_layer)]
105
+ ),
106
+ "ln_f": nn.LayerNorm(config.emb_dim),
107
+ }
108
+ )
109
+
110
+ self.lm_head = nn.Linear(config.emb_dim, config.vocab_size, bias=False)
111
+
112
+ # Initialize weights
113
+ self.apply(self._init_weights)
114
+
115
+ # Tie weights between embedding and final linear layer
116
+ self.transformer.wte.weight = self.lm_head.weight
117
+
118
+ def _init_weights(self, module):
119
+ if isinstance(module, nn.Linear):
120
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
121
+ if module.bias is not None:
122
+ torch.nn.init.zeros_(module.bias)
123
+ elif isinstance(module, nn.Embedding):
124
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
125
+ elif isinstance(module, nn.LayerNorm):
126
+ torch.nn.init.ones_(module.weight)
127
+ torch.nn.init.zeros_(module.bias)
128
+
129
+ def forward(self, idx, targets=None):
130
+ device = idx.device
131
+ b, t = idx.size()
132
+ assert (
133
+ t <= self.config.block_size
134
+ ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
135
+
136
+ # Get positions
137
+ pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0) # (1, t)
138
+
139
+ # Get embeddings
140
+ tok_emb = self.transformer.wte(idx) # (b, t, n_embd)
141
+ pos_emb = self.transformer.wpe(pos) # (1, t, n_embd)
142
+ x = self.transformer.drop(tok_emb + pos_emb)
143
+
144
+ # Apply transformer blocks
145
+ for block in self.transformer.h:
146
+ x = block(x)
147
+
148
+ x = self.transformer.ln_f(x)
149
+ logits = self.lm_head(x)
150
+
151
+ return logits