hash-map commited on
Commit
7586e33
·
verified ·
1 Parent(s): 9d11688

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +355 -354
model.py CHANGED
@@ -1,355 +1,356 @@
1
- import gradio as gr
2
- import tensorflow as tf
3
- import sentencepiece as spm
4
- import numpy as np
5
- from tensorflow import keras
6
- from tensorflow.keras import layers
7
- import os
8
- text_pairs=[
9
- ("Farmers fear that the elephant will destroy the crops","వర్షాలకు చేతికి వచ్చిన పంట దెబ్బతిన్నదని రైతులు వాపోతున్నారు"),
10
- ("The death toll in the state stands at 9,863","దీంతో రాష్ట్రంలో ఇప్పటి వరకు మొత్తం డిశ్చార్జ్‌ల సంఖ్య 9,15,626కి చేరింది"),
11
- ("Koo is available in Hindi, Kannada, Telugu, Tamil, Bengali, Gujarati and Marathi","ప్రశ్నలతో రూపొందించిన వీడియోలు మాత్రం ఆంగ్లం, హిందీ, మరాఠీ, కన్నడ, గుజరాతీ, బెంగాల్ భాషల్లో చూడోచ్చు" ) ,
12
- ("How can the court direct the government to do this?","ప్రభుత్వం ఎలా వ్యవహరించి ఉండాల్సింది?" ),
13
- ("America is safer today" ,"అమెరికాలో పరిస్థితి రోజురోజుకూ దారుణంగా మారుతోంది" ),
14
- ("I don't look into that, to be president" ,"నేను ముఖ్యమంత్రిని కావాలని అనుకోలేదన్నారు" ),
15
- ("He had tested positive for coronavirus" ,"కరోనా లక్షణాలు కనిపించడంతో టెస్ట్ చేసుకున్న ఆయనకు పాజిటివ్ గా నిర్దారణ అయ్యింది" ),
16
- ("New Delhi: Amid the novel coronavirus situation in the country, locals in Delhi are taking precautionary measures in Delhi","న్యూడిల్లీ: దేశవ్యాప్తంగా కరోనా మహమ్మారి విజృంభిస్తున్న నేపథ్యంలో కేంద్ర ప్రభుత్వం మరింత అప్రమత్తమైంది" ),
17
- ("She was rescued yesterday and admitted to a hospital" ,"శనివారం నాడు ఆమె ఆసుపత్రి నుండి డిశ్చార్జ్ అయ్యారు")
18
-
19
- ]
20
- # -----------------------
21
- # 3. Load SentencePiece models in TensorFlow
22
- # -----------------------
23
- def load_spm(path):
24
- with open(path, "rb") as f:
25
- return f.read()
26
-
27
- spm_model_en = load_spm("spm_en.model")
28
- spm_model_te = load_spm("spm_te.model")
29
-
30
- tokenizer_en = tf_text.SentencepieceTokenizer(model=spm_model_en)
31
- tokenizer_te = tf_text.SentencepieceTokenizer(model=spm_model_te)
32
-
33
- # -----------------------
34
- # 4. Encode text pairs
35
- # -----------------------
36
- sequence_length = 50
37
-
38
- def encode_source(texts):
39
- return tokenizer_en.tokenize(texts).to_tensor(shape=(None, sequence_length))
40
-
41
- def encode_target(texts):
42
- return tokenizer_te.tokenize(texts).to_tensor(shape=(None, sequence_length + 1))
43
- # Convert a batch of token IDs to strings
44
-
45
-
46
- # Example: build dataset
47
- english_texts = [pair[0] for pair in text_pairs]
48
- telugu_texts = [pair[1] for pair in text_pairs]
49
-
50
- X = encode_source(tf.constant(english_texts))
51
- Y = encode_target(tf.constant(telugu_texts))
52
-
53
- import random
54
- for i in range(5):
55
- print(random.choice(text_pairs))
56
- len(text_pairs)
57
-
58
- for idx in range(len(text_pairs)):
59
- english ,telugu = text_pairs[i]
60
- spanish = "[start] " + telugu + " [end]"
61
- text_pairs.append((english, telugu))
62
- class TransformerDecoder(layers.Layer):
63
- def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
64
- super().__init__(**kwargs)
65
- self.embed_dim = embed_dim
66
- self.dense_dim = dense_dim
67
- self.num_heads = num_heads
68
- self.attention_1 = layers.MultiHeadAttention(
69
- num_heads=num_heads, key_dim=embed_dim)
70
- self.attention_2 = layers.MultiHeadAttention(
71
- num_heads=num_heads, key_dim=embed_dim)
72
- self.dense_proj = keras.Sequential(
73
- [layers.Dense(dense_dim, activation="relu"),
74
- layers.Dense(embed_dim),]
75
- )
76
- self.layernorm_1 = layers.LayerNormalization()
77
- self.layernorm_2 = layers.LayerNormalization()
78
- self.layernorm_3 = layers.LayerNormalization()
79
- self.supports_masking = True
80
-
81
- def get_config(self):
82
- config = super().get_config()
83
- config.update({
84
- "embed_dim": self.embed_dim,
85
- "num_heads": self.num_heads,
86
- "dense_dim": self.dense_dim,
87
- })
88
- return config
89
-
90
- def get_causal_attention_mask(self, inputs):
91
- input_shape = tf.shape(inputs)
92
- batch_size, sequence_length = input_shape[0], input_shape[1]
93
- i = tf.range(sequence_length)[:, tf.newaxis]
94
- j = tf.range(sequence_length)
95
- mask = tf.cast(i >= j, dtype="int32")
96
- mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
97
- mult = tf.concat(
98
- [tf.expand_dims(batch_size, -1),
99
- tf.constant([1, 1], dtype=tf.int32)], axis=0)
100
- return tf.tile(mask, mult)
101
-
102
- def call(self, inputs, encoder_outputs, mask=None):
103
- causal_mask = self.get_causal_attention_mask(inputs)
104
- if mask is not None:
105
- padding_mask = tf.cast(
106
- mask[:, tf.newaxis, :], dtype="int32")
107
- padding_mask = tf.minimum(padding_mask, causal_mask)
108
- else:
109
- padding_mask = mask
110
- attention_output_1 = self.attention_1(
111
- query=inputs,
112
- value=inputs,
113
- key=inputs,
114
- attention_mask=causal_mask)
115
- attention_output_1 = self.layernorm_1(inputs + attention_output_1)
116
- attention_output_2 = self.attention_2(
117
- query=attention_output_1,
118
- value=encoder_outputs,
119
- key=encoder_outputs,
120
- attention_mask=padding_mask,
121
- )
122
- attention_output_2 = self.layernorm_2(
123
- attention_output_1 + attention_output_2)
124
- proj_output = self.dense_proj(attention_output_2)
125
- return self.layernorm_3(attention_output_2 + proj_output)
126
- import tensorflow as tf
127
- from tensorflow import keras
128
- from tensorflow.keras import layers
129
-
130
- # Define the PositionalEmbedding layer
131
- class PositionalEmbedding(layers.Layer):
132
- def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
133
- super().__init__(**kwargs)
134
- self.token_embeddings = layers.Embedding(
135
- input_dim=vocab_size, output_dim=embed_dim
136
- )
137
- self.position_embeddings = layers.Embedding(
138
- input_dim=sequence_length, output_dim=embed_dim
139
- )
140
- self.sequence_length = sequence_length
141
- self.vocab_size = vocab_size
142
- self.embed_dim = embed_dim
143
-
144
- def call(self, inputs):
145
- length = tf.shape(inputs)[-1]
146
- positions = tf.range(start=0, limit=length, delta=1)
147
- embedded_tokens = self.token_embeddings(inputs)
148
- embedded_positions = self.position_embeddings(positions)
149
- return embedded_tokens + embedded_positions
150
-
151
- def compute_mask(self, inputs, mask=None):
152
- # Properly handle mask computation within Keras
153
- if mask is None:
154
- return None
155
- return mask
156
-
157
- def get_config(self):
158
- config = super().get_config()
159
- config.update({
160
- "sequence_length": self.sequence_length,
161
- "vocab_size": self.vocab_size,
162
- "embed_dim": self.embed_dim,
163
- })
164
- return config
165
-
166
- # Define the TransformerEncoder layer (example implementation)
167
- class TransformerEncoder(layers.Layer):
168
- def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
169
- super().__init__(**kwargs)
170
- self.embed_dim = embed_dim
171
- self.dense_dim = dense_dim
172
- self.num_heads = num_heads
173
- self.attention = layers.MultiHeadAttention(
174
- num_heads=num_heads, key_dim=embed_dim
175
- )
176
- self.dense_proj = keras.Sequential([
177
- layers.Dense(dense_dim, activation="relu"),
178
- layers.Dense(embed_dim),
179
- ])
180
- self.layernorm_1 = layers.LayerNormalization()
181
- self.layernorm_2 = layers.LayerNormalization()
182
-
183
- def call(self, inputs, mask=None):
184
- if mask is not None:
185
- mask = mask[:, tf.newaxis, :]
186
- attention_output = self.attention(inputs, inputs, attention_mask=mask)
187
- proj_input = self.layernorm_1(inputs + attention_output)
188
- proj_output = self.dense_proj(proj_input)
189
- return self.layernorm_2(proj_input + proj_output)
190
-
191
- def get_config(self):
192
- config = super().get_config()
193
- config.update({
194
- "embed_dim": self.embed_dim,
195
- "dense_dim": self.dense_dim,
196
- "num_heads": self.num_heads,
197
- })
198
- return config
199
-
200
-
201
- import sentencepiece as spm
202
- sp_te = spm.SentencePieceProcessor(model_file="spm_te.model")
203
-
204
- def decode_ids(ids):
205
- return sp_te.decode(ids)
206
-
207
-
208
-
209
- import tensorflow as tf
210
- from tensorflow import keras
211
-
212
- loss_object = keras.losses.SparseCategoricalCrossentropy(
213
- from_logits=True, reduction="none"
214
- )
215
-
216
- def masked_loss(y_true, y_pred):
217
- # Normal sparse CE (batch, seq_len)
218
- loss_ = loss_object(y_true, y_pred)
219
-
220
- # Create mask (ignore pad = 0)
221
- mask = tf.cast(tf.not_equal(y_true, 0), loss_.dtype)
222
-
223
- # Apply mask
224
- loss_ = loss_ * mask
225
-
226
- # Return mean only over non-masked tokens
227
- return tf.reduce_sum(loss_) / tf.reduce_sum(mask)
228
-
229
- def masked_accuracy(y_true, y_pred):
230
- y_pred = tf.argmax(y_pred, axis=-1, output_type=y_true.dtype)
231
-
232
- matches = tf.cast(tf.equal(y_true, y_pred), tf.float32)
233
- mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
234
-
235
- return tf.reduce_sum(matches * mask) / tf.reduce_sum(mask)
236
-
237
- # Define callbacks
238
- transformer = keras.models.load_model(
239
- "full_transformer.keras",
240
- custom_objects={
241
- "TransformerEncoder": TransformerEncoder,
242
- "PositionalEmbedding": PositionalEmbedding,
243
- "TransformerDecoder":TransformerDecoder,
244
- "masked_loss":masked_loss,
245
- "masked_accuracy":masked_accuracy
246
-
247
- }
248
- )
249
- # Define callbacks
250
- transformer = keras.models.load_model(
251
- "full_transformer (2).keras",
252
- custom_objects={
253
- "TransformerEncoder": TransformerEncoder,
254
- "PositionalEmbedding": PositionalEmbedding,
255
- "TransformerDecoder":TransformerDecoder,
256
- "masked_loss":masked_loss,
257
- "masked_accuracy":masked_accuracy
258
-
259
- }
260
- )
261
- # Define callbacks
262
- transformer2 = keras.models.load_model(
263
- "full_transformer (1).keras",
264
- custom_objects={
265
- "TransformerEncoder": TransformerEncoder,
266
- "PositionalEmbedding": PositionalEmbedding,
267
- "TransformerDecoder":TransformerDecoder,
268
- "masked_loss":masked_loss,
269
- "masked_accuracy":masked_accuracy
270
-
271
- }
272
- )
273
- # Define callbacks
274
- transformer3 = keras.models.load_model(
275
- "full_transformer.keras",
276
- custom_objects={
277
- "TransformerEncoder": TransformerEncoder,
278
- "PositionalEmbedding": PositionalEmbedding,
279
- "TransformerDecoder":TransformerDecoder,
280
- "masked_loss":masked_loss,
281
- "masked_accuracy":masked_accuracy
282
-
283
- }
284
- )
285
-
286
- def decode_tokens(token_ids):
287
- # token_ids: tf.Tensor shape (seq_len,)
288
- token_ids = tf.expand_dims(token_ids, 0) # add batch dim
289
- decoded = tokenizer_te.detokenize(token_ids) # returns tf.Tensor of shape (1,)
290
- return decoded[0].numpy().decode("utf-8")
291
- import tensorflow as tf
292
- import numpy as np
293
-
294
- def encode_source(texts):
295
- return tokenizer_en.tokenize(texts).to_tensor(shape=(None, sequence_length))
296
-
297
- # Modified decode_sequence to return tokens and text
298
- def decode_sequence(input_sentence, t=transformer, max_len=50):
299
- tokenized_input = encode_source([input_sentence])
300
-
301
- # Initialize sequence with start token
302
- start_id = tokenizer_te.string_to_id('[start]').numpy()
303
- end_id = tokenizer_te.string_to_id('[end]').numpy()
304
- seq = [start_id]
305
-
306
- for _ in range(max_len):
307
- if seq[-1] == end_id:
308
- break
309
-
310
- tgt = tf.expand_dims(seq, 0)
311
- predictions = t([tokenized_input, tgt])
312
-
313
- # Get probabilities for the last predicted token
314
- probs = tf.nn.softmax(predictions[0, len(seq)-1, :]).numpy()
315
- next_id = np.argmax(probs) # Select most probable token
316
- seq.append(int(next_id))
317
-
318
- # Decode sequence to text
319
- decoded = tokenizer_te.detokenize(tf.constant([seq])).numpy()[0]
320
- decoded_text = decoded.decode("utf-8").replace("[start]", "").replace("[end]", "").strip()
321
-
322
- return decoded_text, seq
323
-
324
-
325
- max_decoded_sentence_length = 50
326
-
327
- # Evaluate some random samples
328
- test_eng_texts = [pair[0] for pair in text_pairs]
329
- final_pairs = [pair[1] for pair in text_pairs]
330
-
331
-
332
- for _ in range(5):
333
- idx = random.randint(0, len(test_eng_texts) - 1)
334
- input_sentence = test_eng_texts[idx]
335
- decoded = decode_sequence(input_sentence,transformer)
336
- original = final_pairs[idx].replace("[start]", "").replace("[end]", "").strip()
337
-
338
-
339
-
340
-
341
- idx = random.randint(0, len(test_eng_texts) - 1)
342
- input_sentence = test_eng_texts[idx]
343
- decoded = decode_sequence(input_sentence,transformer3)
344
- original = final_pairs[idx].replace("[start]", "").replace("[end]", "").strip()
345
-
346
- # BLEU expects tokenized sentences
347
- original_tokens = tokenizer_te.tokenize([original]).numpy()[0]
348
- decoded_tokens = tokenizer_te.tokenize([decoded]).numpy()[0]
349
- print("original tokens:",original_tokens)
350
- print("decoded_tokens",decoded_tokens)
351
- print(original)
352
- print(decoded)
353
-
354
- # Example decoding
 
355
  print(decode_sequence("your response to the question is not good you need to improve and this is order not request",transformer3))
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ import sentencepiece as spm
4
+ import numpy as np
5
+ from tensorflow import keras
6
+ from tensorflow.keras import layers
7
+ import tensorflow_text as tf_text
8
+ import os
9
+ text_pairs=[
10
+ ("Farmers fear that the elephant will destroy the crops","వర్షాలకు చేతికి వచ్చిన పంట దెబ్బతిన్నదని రైతులు వాపోతున్నారు"),
11
+ ("The death toll in the state stands at 9,863","దీంతో రాష్ట్రంలో ఇప్పటి వరకు మొత్తం డిశ్చార్జ్‌ల సంఖ్య 9,15,626కి చేరింది"),
12
+ ("Koo is available in Hindi, Kannada, Telugu, Tamil, Bengali, Gujarati and Marathi","ప్రశ్నలతో రూపొందించిన వీడియోలు మాత్రం ఆంగ్లం, హిందీ, మరాఠీ, కన్నడ, గు��రాతీ, బెంగాల్ భాషల్లో చూడోచ్చు" ) ,
13
+ ("How can the court direct the government to do this?","ప్రభుత్వం ఎలా వ్యవహరించి ఉండాల్సింది?" ),
14
+ ("America is safer today" ,"అమెరికాలో పరిస్థితి రోజురోజుకూ దారుణంగా మారుతోంది" ),
15
+ ("I don't look into that, to be president" ,"నేను ముఖ్యమంత్రిని కావాలని అనుకోలేదన్నారు" ),
16
+ ("He had tested positive for coronavirus" ,"కరోనా లక్షణాలు కనిపించడంతో టెస్ట్ చేసుకున్న ఆయనకు పాజిటివ్ గా నిర్దారణ అయ్యింది" ),
17
+ ("New Delhi: Amid the novel coronavirus situation in the country, locals in Delhi are taking precautionary measures in Delhi","న్యూడిల్లీ: దేశవ్యాప్తంగా కరోనా మహమ్మారి విజృంభిస్తున్న నేపథ్యంలో కేంద్ర ప్రభుత్వం మరింత అప్రమత్తమైంది" ),
18
+ ("She was rescued yesterday and admitted to a hospital" ,"శనివారం నాడు ఆమె ఆసుపత్రి నుండి డిశ్చార్జ్ అయ్యారు")
19
+
20
+ ]
21
+ # -----------------------
22
+ # 3. Load SentencePiece models in TensorFlow
23
+ # -----------------------
24
+ def load_spm(path):
25
+ with open(path, "rb") as f:
26
+ return f.read()
27
+
28
+ spm_model_en = load_spm("spm_en.model")
29
+ spm_model_te = load_spm("spm_te.model")
30
+
31
+ tokenizer_en = tf_text.SentencepieceTokenizer(model=spm_model_en)
32
+ tokenizer_te = tf_text.SentencepieceTokenizer(model=spm_model_te)
33
+
34
+ # -----------------------
35
+ # 4. Encode text pairs
36
+ # -----------------------
37
+ sequence_length = 50
38
+
39
+ def encode_source(texts):
40
+ return tokenizer_en.tokenize(texts).to_tensor(shape=(None, sequence_length))
41
+
42
+ def encode_target(texts):
43
+ return tokenizer_te.tokenize(texts).to_tensor(shape=(None, sequence_length + 1))
44
+ # Convert a batch of token IDs to strings
45
+
46
+
47
+ # Example: build dataset
48
+ english_texts = [pair[0] for pair in text_pairs]
49
+ telugu_texts = [pair[1] for pair in text_pairs]
50
+
51
+ X = encode_source(tf.constant(english_texts))
52
+ Y = encode_target(tf.constant(telugu_texts))
53
+
54
+ import random
55
+ for i in range(5):
56
+ print(random.choice(text_pairs))
57
+ len(text_pairs)
58
+
59
+ for idx in range(len(text_pairs)):
60
+ english ,telugu = text_pairs[i]
61
+ spanish = "[start] " + telugu + " [end]"
62
+ text_pairs.append((english, telugu))
63
+ class TransformerDecoder(layers.Layer):
64
+ def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
65
+ super().__init__(**kwargs)
66
+ self.embed_dim = embed_dim
67
+ self.dense_dim = dense_dim
68
+ self.num_heads = num_heads
69
+ self.attention_1 = layers.MultiHeadAttention(
70
+ num_heads=num_heads, key_dim=embed_dim)
71
+ self.attention_2 = layers.MultiHeadAttention(
72
+ num_heads=num_heads, key_dim=embed_dim)
73
+ self.dense_proj = keras.Sequential(
74
+ [layers.Dense(dense_dim, activation="relu"),
75
+ layers.Dense(embed_dim),]
76
+ )
77
+ self.layernorm_1 = layers.LayerNormalization()
78
+ self.layernorm_2 = layers.LayerNormalization()
79
+ self.layernorm_3 = layers.LayerNormalization()
80
+ self.supports_masking = True
81
+
82
+ def get_config(self):
83
+ config = super().get_config()
84
+ config.update({
85
+ "embed_dim": self.embed_dim,
86
+ "num_heads": self.num_heads,
87
+ "dense_dim": self.dense_dim,
88
+ })
89
+ return config
90
+
91
+ def get_causal_attention_mask(self, inputs):
92
+ input_shape = tf.shape(inputs)
93
+ batch_size, sequence_length = input_shape[0], input_shape[1]
94
+ i = tf.range(sequence_length)[:, tf.newaxis]
95
+ j = tf.range(sequence_length)
96
+ mask = tf.cast(i >= j, dtype="int32")
97
+ mask = tf.reshape(mask, (1, input_shape[1], input_shape[1]))
98
+ mult = tf.concat(
99
+ [tf.expand_dims(batch_size, -1),
100
+ tf.constant([1, 1], dtype=tf.int32)], axis=0)
101
+ return tf.tile(mask, mult)
102
+
103
+ def call(self, inputs, encoder_outputs, mask=None):
104
+ causal_mask = self.get_causal_attention_mask(inputs)
105
+ if mask is not None:
106
+ padding_mask = tf.cast(
107
+ mask[:, tf.newaxis, :], dtype="int32")
108
+ padding_mask = tf.minimum(padding_mask, causal_mask)
109
+ else:
110
+ padding_mask = mask
111
+ attention_output_1 = self.attention_1(
112
+ query=inputs,
113
+ value=inputs,
114
+ key=inputs,
115
+ attention_mask=causal_mask)
116
+ attention_output_1 = self.layernorm_1(inputs + attention_output_1)
117
+ attention_output_2 = self.attention_2(
118
+ query=attention_output_1,
119
+ value=encoder_outputs,
120
+ key=encoder_outputs,
121
+ attention_mask=padding_mask,
122
+ )
123
+ attention_output_2 = self.layernorm_2(
124
+ attention_output_1 + attention_output_2)
125
+ proj_output = self.dense_proj(attention_output_2)
126
+ return self.layernorm_3(attention_output_2 + proj_output)
127
+ import tensorflow as tf
128
+ from tensorflow import keras
129
+ from tensorflow.keras import layers
130
+
131
+ # Define the PositionalEmbedding layer
132
+ class PositionalEmbedding(layers.Layer):
133
+ def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
134
+ super().__init__(**kwargs)
135
+ self.token_embeddings = layers.Embedding(
136
+ input_dim=vocab_size, output_dim=embed_dim
137
+ )
138
+ self.position_embeddings = layers.Embedding(
139
+ input_dim=sequence_length, output_dim=embed_dim
140
+ )
141
+ self.sequence_length = sequence_length
142
+ self.vocab_size = vocab_size
143
+ self.embed_dim = embed_dim
144
+
145
+ def call(self, inputs):
146
+ length = tf.shape(inputs)[-1]
147
+ positions = tf.range(start=0, limit=length, delta=1)
148
+ embedded_tokens = self.token_embeddings(inputs)
149
+ embedded_positions = self.position_embeddings(positions)
150
+ return embedded_tokens + embedded_positions
151
+
152
+ def compute_mask(self, inputs, mask=None):
153
+ # Properly handle mask computation within Keras
154
+ if mask is None:
155
+ return None
156
+ return mask
157
+
158
+ def get_config(self):
159
+ config = super().get_config()
160
+ config.update({
161
+ "sequence_length": self.sequence_length,
162
+ "vocab_size": self.vocab_size,
163
+ "embed_dim": self.embed_dim,
164
+ })
165
+ return config
166
+
167
+ # Define the TransformerEncoder layer (example implementation)
168
+ class TransformerEncoder(layers.Layer):
169
+ def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
170
+ super().__init__(**kwargs)
171
+ self.embed_dim = embed_dim
172
+ self.dense_dim = dense_dim
173
+ self.num_heads = num_heads
174
+ self.attention = layers.MultiHeadAttention(
175
+ num_heads=num_heads, key_dim=embed_dim
176
+ )
177
+ self.dense_proj = keras.Sequential([
178
+ layers.Dense(dense_dim, activation="relu"),
179
+ layers.Dense(embed_dim),
180
+ ])
181
+ self.layernorm_1 = layers.LayerNormalization()
182
+ self.layernorm_2 = layers.LayerNormalization()
183
+
184
+ def call(self, inputs, mask=None):
185
+ if mask is not None:
186
+ mask = mask[:, tf.newaxis, :]
187
+ attention_output = self.attention(inputs, inputs, attention_mask=mask)
188
+ proj_input = self.layernorm_1(inputs + attention_output)
189
+ proj_output = self.dense_proj(proj_input)
190
+ return self.layernorm_2(proj_input + proj_output)
191
+
192
+ def get_config(self):
193
+ config = super().get_config()
194
+ config.update({
195
+ "embed_dim": self.embed_dim,
196
+ "dense_dim": self.dense_dim,
197
+ "num_heads": self.num_heads,
198
+ })
199
+ return config
200
+
201
+
202
+ import sentencepiece as spm
203
+ sp_te = spm.SentencePieceProcessor(model_file="spm_te.model")
204
+
205
+ def decode_ids(ids):
206
+ return sp_te.decode(ids)
207
+
208
+
209
+
210
+ import tensorflow as tf
211
+ from tensorflow import keras
212
+
213
+ loss_object = keras.losses.SparseCategoricalCrossentropy(
214
+ from_logits=True, reduction="none"
215
+ )
216
+
217
+ def masked_loss(y_true, y_pred):
218
+ # Normal sparse CE (batch, seq_len)
219
+ loss_ = loss_object(y_true, y_pred)
220
+
221
+ # Create mask (ignore pad = 0)
222
+ mask = tf.cast(tf.not_equal(y_true, 0), loss_.dtype)
223
+
224
+ # Apply mask
225
+ loss_ = loss_ * mask
226
+
227
+ # Return mean only over non-masked tokens
228
+ return tf.reduce_sum(loss_) / tf.reduce_sum(mask)
229
+
230
+ def masked_accuracy(y_true, y_pred):
231
+ y_pred = tf.argmax(y_pred, axis=-1, output_type=y_true.dtype)
232
+
233
+ matches = tf.cast(tf.equal(y_true, y_pred), tf.float32)
234
+ mask = tf.cast(tf.not_equal(y_true, 0), tf.float32)
235
+
236
+ return tf.reduce_sum(matches * mask) / tf.reduce_sum(mask)
237
+
238
+ # Define callbacks
239
+ transformer = keras.models.load_model(
240
+ "full_transformer.keras",
241
+ custom_objects={
242
+ "TransformerEncoder": TransformerEncoder,
243
+ "PositionalEmbedding": PositionalEmbedding,
244
+ "TransformerDecoder":TransformerDecoder,
245
+ "masked_loss":masked_loss,
246
+ "masked_accuracy":masked_accuracy
247
+
248
+ }
249
+ )
250
+ # Define callbacks
251
+ transformer = keras.models.load_model(
252
+ "full_transformer (2).keras",
253
+ custom_objects={
254
+ "TransformerEncoder": TransformerEncoder,
255
+ "PositionalEmbedding": PositionalEmbedding,
256
+ "TransformerDecoder":TransformerDecoder,
257
+ "masked_loss":masked_loss,
258
+ "masked_accuracy":masked_accuracy
259
+
260
+ }
261
+ )
262
+ # Define callbacks
263
+ transformer2 = keras.models.load_model(
264
+ "full_transformer (1).keras",
265
+ custom_objects={
266
+ "TransformerEncoder": TransformerEncoder,
267
+ "PositionalEmbedding": PositionalEmbedding,
268
+ "TransformerDecoder":TransformerDecoder,
269
+ "masked_loss":masked_loss,
270
+ "masked_accuracy":masked_accuracy
271
+
272
+ }
273
+ )
274
+ # Define callbacks
275
+ transformer3 = keras.models.load_model(
276
+ "full_transformer.keras",
277
+ custom_objects={
278
+ "TransformerEncoder": TransformerEncoder,
279
+ "PositionalEmbedding": PositionalEmbedding,
280
+ "TransformerDecoder":TransformerDecoder,
281
+ "masked_loss":masked_loss,
282
+ "masked_accuracy":masked_accuracy
283
+
284
+ }
285
+ )
286
+
287
+ def decode_tokens(token_ids):
288
+ # token_ids: tf.Tensor shape (seq_len,)
289
+ token_ids = tf.expand_dims(token_ids, 0) # add batch dim
290
+ decoded = tokenizer_te.detokenize(token_ids) # returns tf.Tensor of shape (1,)
291
+ return decoded[0].numpy().decode("utf-8")
292
+ import tensorflow as tf
293
+ import numpy as np
294
+
295
+ def encode_source(texts):
296
+ return tokenizer_en.tokenize(texts).to_tensor(shape=(None, sequence_length))
297
+
298
+ # Modified decode_sequence to return tokens and text
299
+ def decode_sequence(input_sentence, t=transformer, max_len=50):
300
+ tokenized_input = encode_source([input_sentence])
301
+
302
+ # Initialize sequence with start token
303
+ start_id = tokenizer_te.string_to_id('[start]').numpy()
304
+ end_id = tokenizer_te.string_to_id('[end]').numpy()
305
+ seq = [start_id]
306
+
307
+ for _ in range(max_len):
308
+ if seq[-1] == end_id:
309
+ break
310
+
311
+ tgt = tf.expand_dims(seq, 0)
312
+ predictions = t([tokenized_input, tgt])
313
+
314
+ # Get probabilities for the last predicted token
315
+ probs = tf.nn.softmax(predictions[0, len(seq)-1, :]).numpy()
316
+ next_id = np.argmax(probs) # Select most probable token
317
+ seq.append(int(next_id))
318
+
319
+ # Decode sequence to text
320
+ decoded = tokenizer_te.detokenize(tf.constant([seq])).numpy()[0]
321
+ decoded_text = decoded.decode("utf-8").replace("[start]", "").replace("[end]", "").strip()
322
+
323
+ return decoded_text, seq
324
+
325
+
326
+ max_decoded_sentence_length = 50
327
+
328
+ # Evaluate some random samples
329
+ test_eng_texts = [pair[0] for pair in text_pairs]
330
+ final_pairs = [pair[1] for pair in text_pairs]
331
+
332
+
333
+ for _ in range(5):
334
+ idx = random.randint(0, len(test_eng_texts) - 1)
335
+ input_sentence = test_eng_texts[idx]
336
+ decoded = decode_sequence(input_sentence,transformer)
337
+ original = final_pairs[idx].replace("[start]", "").replace("[end]", "").strip()
338
+
339
+
340
+
341
+
342
+ idx = random.randint(0, len(test_eng_texts) - 1)
343
+ input_sentence = test_eng_texts[idx]
344
+ decoded = decode_sequence(input_sentence,transformer3)
345
+ original = final_pairs[idx].replace("[start]", "").replace("[end]", "").strip()
346
+
347
+ # BLEU expects tokenized sentences
348
+ original_tokens = tokenizer_te.tokenize([original]).numpy()[0]
349
+ decoded_tokens = tokenizer_te.tokenize([decoded]).numpy()[0]
350
+ print("original tokens:",original_tokens)
351
+ print("decoded_tokens",decoded_tokens)
352
+ print(original)
353
+ print(decoded)
354
+
355
+ # Example decoding
356
  print(decode_sequence("your response to the question is not good you need to improve and this is order not request",transformer3))