hamxaameer commited on
Commit
63134d2
Β·
verified Β·
1 Parent(s): 27dad6b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +681 -0
app.py ADDED
@@ -0,0 +1,681 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import torch
4
+ import numpy as np
5
+ from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
6
+ from nltk.tokenize import word_tokenize
7
+ import nltk
8
+ import time
9
+ import os
10
+
11
+ # Download required NLTK data
12
+ try:
13
+ nltk.download('punkt', quiet=True)
14
+ nltk.download('punkt_tab', quiet=True)
15
+ except:
16
+ pass
17
+
18
+ # Global variables to store loaded model
19
+ loaded_model = None
20
+ loaded_tokenizer = None
21
+ loaded_config = None
22
+ generation_history = []
23
+
24
+ # Auto-load model on startup
25
+ def initialize_model():
26
+ """Initialize model automatically on app startup"""
27
+ return load_model_from_pickle("best_model.pkl")
28
+
29
+ def load_model_from_pickle(pickle_path="best_model.pkl"):
30
+ """Load model from pickle file (auto-loads on startup)"""
31
+ global loaded_model, loaded_tokenizer, loaded_config
32
+
33
+ try:
34
+ # Check if file exists
35
+ if not os.path.exists(pickle_path):
36
+ return f"❌ Model file not found: {pickle_path}\n\nPlease ensure best_model.pkl is uploaded to the HuggingFace Space."
37
+
38
+ # Simple, direct load - model should already be CPU-compatible
39
+ try:
40
+ model_package = torch.load(pickle_path, map_location='cpu')
41
+ except Exception as e:
42
+ error_msg = str(e)
43
+
44
+ # Check if it's the CUDA deserialization error
45
+ if 'Attempting to deserialize object on a CUDA device' in error_msg:
46
+ return """❌ Model file is GPU-trained and not CPU-compatible.
47
+
48
+ ⚠️ SOLUTION: Convert the model on Colab BEFORE downloading:
49
+
50
+ Run this in your Colab notebook (where you trained the model):
51
+
52
+ ```python
53
+ import torch
54
+ import pickle
55
+
56
+ # Load GPU model
57
+ with open('best_model.pkl', 'rb') as f:
58
+ model_package = pickle.load(f)
59
+
60
+ # Move to CPU
61
+ if 'model' in model_package:
62
+ model_package['model'] = model_package['model'].cpu()
63
+ for param in model_package['model'].parameters():
64
+ param.data = param.data.cpu()
65
+ for buffer in model_package['model'].buffers():
66
+ buffer.data = buffer.data.cpu()
67
+
68
+ # Save CPU version
69
+ torch.save(model_package, 'best_model_cpu.pkl')
70
+
71
+ # Download
72
+ from google.colab import files
73
+ files.download('best_model_cpu.pkl')
74
+ ```
75
+
76
+ Then upload 'best_model_cpu.pkl' to this Space and rename it to 'best_model.pkl'.
77
+
78
+ πŸ“– See COLAB_INSTRUCTIONS.md for detailed steps.
79
+ """
80
+ else:
81
+ return f"❌ Error loading model: {error_msg}\n\nPlease check that the file is a valid PyTorch pickle."
82
+
83
+ # Success! Model loaded with one of the strategies above
84
+ # Handle a few common package shapes.
85
+ if isinstance(model_package, dict):
86
+ loaded_model = model_package.get('model', None)
87
+ loaded_tokenizer = model_package.get('tokenizer', None)
88
+ loaded_config = model_package.get('config', {}) or {}
89
+ else:
90
+ # Unknown package format: assume the object itself is the model
91
+ loaded_model = model_package
92
+ loaded_tokenizer = None
93
+ loaded_config = {}
94
+
95
+ # If user saved a state_dict instead of a model object, provide guidance
96
+ if isinstance(loaded_model, dict) and 'state_dict' in loaded_model:
97
+ # the file contains something like {'state_dict': ...}
98
+ return ("❌ The pickle appears to contain a state_dict rather than a full model object. "
99
+ "This app expects a pickled model object (model instance).\n"
100
+ "If you only have a state_dict, re-create the model architecture and load the state_dict before pickling, "
101
+ "or provide a pickled model object saved with torch.save(model, path).")
102
+
103
+ if loaded_model is None:
104
+ return ("❌ No model object found inside the pickle. Please ensure the pickle contains a dict with keys "
105
+ "'model', 'tokenizer', and 'config' (or the model object itself).")
106
+
107
+ # Set model to evaluation mode and move to appropriate device
108
+ try:
109
+ loaded_model.eval()
110
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
111
+ loaded_model = loaded_model.to(device)
112
+ except Exception as e:
113
+ return (f"❌ Error preparing model for inference: {str(e)}\n\n"
114
+ "This can happen if the saved object is not a proper torch.nn.Module or if tensors couldn't be mapped to the current device.")
115
+
116
+ config_info = f"""βœ… Model loaded successfully!
117
+
118
+ πŸ“Š Model Configuration:
119
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
120
+ β€’ Base Model: {loaded_config.get('model_name', 'GPT-2')}
121
+ β€’ Training Epochs: {loaded_config.get('num_epochs', 'N/A')}
122
+ β€’ Training Samples: {loaded_config.get('training_samples', 'N/A'):,}
123
+ β€’ Validation Samples: {loaded_config.get('validation_samples', 'N/A'):,}
124
+ β€’ BLEU Score: {loaded_config.get('bleu_score', 0):.4f}
125
+ β€’ Perplexity: {loaded_config.get('perplexity', 0):.2f}
126
+ β€’ Final Loss: {loaded_config.get('final_loss', 0):.4f}
127
+ β€’ Device: {device}
128
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
129
+
130
+ πŸš€ Model is ready to generate code!
131
+ """
132
+
133
+ return config_info
134
+
135
+ except Exception as e:
136
+ # Final catch-all for any unexpected errors
137
+ err = str(e)
138
+ return f"❌ Unexpected error loading model: {err}\n\nPlease ensure best_model.pkl is properly uploaded and compatible with this environment."
139
+
140
+ def calculate_bleu_score(reference, hypothesis):
141
+ """Calculate BLEU score between reference and generated code"""
142
+ try:
143
+ # Tokenize
144
+ ref_tokens = word_tokenize(reference.lower())
145
+ hyp_tokens = word_tokenize(hypothesis.lower())
146
+
147
+ # Calculate BLEU with smoothing
148
+ smooth = SmoothingFunction()
149
+ bleu_1 = sentence_bleu([ref_tokens], hyp_tokens, weights=(1, 0, 0, 0), smoothing_function=smooth.method1)
150
+ bleu_2 = sentence_bleu([ref_tokens], hyp_tokens, weights=(0.5, 0.5, 0, 0), smoothing_function=smooth.method1)
151
+ bleu_3 = sentence_bleu([ref_tokens], hyp_tokens, weights=(0.33, 0.33, 0.33, 0), smoothing_function=smooth.method1)
152
+ bleu_4 = sentence_bleu([ref_tokens], hyp_tokens, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth.method1)
153
+
154
+ return bleu_1, bleu_2, bleu_3, bleu_4
155
+ except Exception as e:
156
+ return 0.0, 0.0, 0.0, 0.0
157
+
158
+ def calculate_code_metrics(reference, generated):
159
+ """Calculate various code similarity metrics"""
160
+ try:
161
+ # Length ratio
162
+ len_ratio = len(generated) / max(len(reference), 1)
163
+
164
+ # Word overlap
165
+ ref_words = set(reference.lower().split())
166
+ gen_words = set(generated.lower().split())
167
+
168
+ if len(ref_words) > 0:
169
+ precision = len(ref_words.intersection(gen_words)) / len(gen_words) if len(gen_words) > 0 else 0
170
+ recall = len(ref_words.intersection(gen_words)) / len(ref_words)
171
+ f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
172
+ else:
173
+ precision = recall = f1 = 0
174
+
175
+ # Character-level similarity
176
+ char_overlap = sum(1 for c in generated if c in reference) / max(len(generated), 1)
177
+
178
+ return {
179
+ 'length_ratio': len_ratio,
180
+ 'precision': precision,
181
+ 'recall': recall,
182
+ 'f1_score': f1,
183
+ 'char_overlap': char_overlap
184
+ }
185
+ except Exception as e:
186
+ return {
187
+ 'length_ratio': 0,
188
+ 'precision': 0,
189
+ 'recall': 0,
190
+ 'f1_score': 0,
191
+ 'char_overlap': 0
192
+ }
193
+
194
+ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
195
+ """Generate code from pseudo-code using loaded model"""
196
+ global loaded_model, loaded_tokenizer, generation_history
197
+
198
+ if loaded_model is None or loaded_tokenizer is None:
199
+ return "❌ Please upload and load a model first!", "", "", ""
200
+
201
+ if not pseudo_code.strip():
202
+ return "❌ Please enter pseudo-code description!", "", "", ""
203
+
204
+ try:
205
+ start_time = time.time()
206
+
207
+ # Format input
208
+ prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> <CODE>"
209
+
210
+ # Tokenize
211
+ device = next(loaded_model.parameters()).device
212
+ inputs = loaded_tokenizer(prompt, return_tensors='pt').to(device)
213
+
214
+ # Generate (ensure type safety for parameters)
215
+ with torch.no_grad():
216
+ outputs = loaded_model.generate(
217
+ **inputs,
218
+ max_length=int(max_length),
219
+ temperature=float(temperature),
220
+ top_k=int(top_k),
221
+ top_p=float(top_p),
222
+ do_sample=True,
223
+ num_return_sequences=int(num_sequences),
224
+ pad_token_id=loaded_tokenizer.pad_token_id,
225
+ eos_token_id=loaded_tokenizer.eos_token_id,
226
+ )
227
+
228
+ generation_time = time.time() - start_time
229
+
230
+ # Decode all sequences
231
+ generated_codes = []
232
+ for output in outputs:
233
+ generated = loaded_tokenizer.decode(output, skip_special_tokens=False)
234
+
235
+ # Extract code part
236
+ if '<CODE>' in generated:
237
+ code = generated.split('<CODE>')[-1].strip()
238
+ # Remove special tokens
239
+ code = code.replace('<PAD>', '').replace('<SEP>', '').strip()
240
+ else:
241
+ code = generated
242
+
243
+ generated_codes.append(code)
244
+
245
+ # Use the first generated code as primary output
246
+ primary_code = generated_codes[0]
247
+
248
+ # Calculate metrics if reference code is provided
249
+ metrics_output = ""
250
+ bleu_output = ""
251
+
252
+ if reference_code and reference_code.strip():
253
+ # Calculate BLEU scores
254
+ bleu_1, bleu_2, bleu_3, bleu_4 = calculate_bleu_score(reference_code, primary_code)
255
+
256
+ bleu_output = f"""πŸ“Š BLEU Scores:
257
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
258
+ β€’ BLEU-1 (Unigram): {bleu_1:.4f} ({bleu_1*100:.2f}%)
259
+ β€’ BLEU-2 (Bigram): {bleu_2:.4f} ({bleu_2*100:.2f}%)
260
+ β€’ BLEU-3 (Trigram): {bleu_3:.4f} ({bleu_3*100:.2f}%)
261
+ β€’ BLEU-4 (4-gram): {bleu_4:.4f} ({bleu_4*100:.2f}%)
262
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
263
+
264
+ πŸ’‘ Interpretation:
265
+ β€’ BLEU > 0.4: Excellent match
266
+ β€’ BLEU 0.3-0.4: Good match
267
+ β€’ BLEU 0.2-0.3: Fair match
268
+ β€’ BLEU < 0.2: Poor match
269
+ """
270
+
271
+ # Calculate additional metrics
272
+ code_metrics = calculate_code_metrics(reference_code, primary_code)
273
+
274
+ metrics_output = f"""πŸ“ˆ Additional Metrics:
275
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
276
+ β€’ Length Ratio: {code_metrics['length_ratio']:.3f}
277
+ β€’ Precision: {code_metrics['precision']:.4f} ({code_metrics['precision']*100:.2f}%)
278
+ β€’ Recall: {code_metrics['recall']:.4f} ({code_metrics['recall']*100:.2f}%)
279
+ β€’ F1-Score: {code_metrics['f1_score']:.4f} ({code_metrics['f1_score']*100:.2f}%)
280
+ β€’ Character Overlap: {code_metrics['char_overlap']:.4f} ({code_metrics['char_overlap']*100:.2f}%)
281
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
282
+
283
+ ⏱️ Generation Time: {generation_time:.2f}s
284
+ πŸ“ Sequences Generated: {num_sequences}
285
+ πŸ”’ Output Length: {len(primary_code)} characters
286
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
287
+ """
288
+ else:
289
+ metrics_output = f"""⏱️ Generation Time: {generation_time:.2f}s
290
+ πŸ“ Sequences Generated: {num_sequences}
291
+ πŸ”’ Output Length: {len(primary_code)} characters
292
+
293
+ πŸ’‘ Tip: Provide reference code to see BLEU scores and similarity metrics!
294
+ """
295
+
296
+ # Format alternative sequences
297
+ alternatives = ""
298
+ if num_sequences > 1:
299
+ alternatives = "πŸ”„ Alternative Generations:\n" + "━"*50 + "\n\n"
300
+ for i, code in enumerate(generated_codes[1:], 2):
301
+ alternatives += f"Variation {i}:\n```python\n{code}\n```\n\n"
302
+
303
+ # Add to history
304
+ generation_history.append({
305
+ 'pseudo': pseudo_code,
306
+ 'generated': primary_code,
307
+ 'bleu_4': bleu_4 if reference_code else None,
308
+ 'time': generation_time
309
+ })
310
+
311
+ return primary_code, metrics_output, bleu_output, alternatives
312
+
313
+ except Exception as e:
314
+ return f"❌ Error generating code: {str(e)}", "", "", ""
315
+
316
+ def show_examples(example_name):
317
+ """Load example pseudo-code"""
318
+ examples = {
319
+ "Basic Loop": "create a list of numbers from 1 to 10",
320
+ "Function Definition": "define a function to calculate the sum of two numbers",
321
+ "List Iteration": "iterate through a list and print each element",
322
+ "Conditional Check": "check if a number is even or odd",
323
+ "Sorting": "sort a list in descending order",
324
+ "Maximum Element": "create a function to find maximum element in array",
325
+ "Binary Search": "implement binary search algorithm",
326
+ "Factorial": "create a recursive function to calculate factorial",
327
+ "Palindrome": "check if a string is palindrome",
328
+ "Fibonacci": "generate fibonacci sequence up to n terms"
329
+ }
330
+ return examples.get(example_name, "")
331
+
332
+ def clear_all():
333
+ """Clear all inputs and outputs"""
334
+ return "", "", "", "", "", 150, 0.7, 50, 0.95, 1
335
+
336
+ def show_history():
337
+ """Display generation history"""
338
+ if not generation_history:
339
+ return "No generation history yet. Start generating code!"
340
+
341
+ history_text = "πŸ“œ Generation History:\n" + "="*60 + "\n\n"
342
+
343
+ for i, entry in enumerate(reversed(generation_history[-10:]), 1): # Show last 10
344
+ history_text += f"{i}. Pseudo: {entry['pseudo'][:60]}...\n"
345
+ history_text += f" Time: {entry['time']:.2f}s"
346
+ if entry['bleu_4'] is not None:
347
+ history_text += f" | BLEU-4: {entry['bleu_4']:.4f}"
348
+ history_text += f"\n Code: {entry['generated'][:80]}...\n\n"
349
+
350
+ return history_text
351
+
352
+ # Create Gradio interface with custom CSS
353
+ custom_css = """
354
+ .gradio-container {
355
+ font-family: 'Arial', sans-serif;
356
+ }
357
+ .output-code {
358
+ font-family: 'Courier New', monospace;
359
+ font-size: 14px;
360
+ }
361
+ .metrics-box {
362
+ background-color: #f0f8ff;
363
+ border-radius: 8px;
364
+ padding: 10px;
365
+ }
366
+ """
367
+
368
+ with gr.Blocks(title="πŸš€ GPT-2 Pseudo-Code to Code Generator", theme=gr.themes.Soft(), css=custom_css) as demo:
369
+
370
+ gr.Markdown("""
371
+ # πŸš€ GPT-2 Pseudo-Code to Python Code Generator
372
+
373
+ **Transform natural language descriptions into executable Python code using fine-tuned GPT-2!**
374
+
375
+ This model is trained on the SPOC (Search-based Pseudo-code to Code) dataset and can generate Python code from pseudo-code descriptions.
376
+ """)
377
+
378
+ with gr.Tabs():
379
+ # Tab 1: Code Generation
380
+ with gr.Tab("πŸ’» Code Generation"):
381
+ with gr.Row():
382
+ with gr.Column(scale=1):
383
+ gr.Markdown("### οΏ½ Model Status")
384
+ model_status = gr.Textbox(
385
+ label="Model Information",
386
+ lines=15,
387
+ interactive=False,
388
+ value=initialize_model() # Auto-load on startup
389
+ )
390
+
391
+ gr.Markdown("---")
392
+
393
+ with gr.Row():
394
+ with gr.Column(scale=1):
395
+ gr.Markdown("### ✍️ Enter Pseudo-Code")
396
+
397
+ # Example selector
398
+ with gr.Row():
399
+ example_dropdown = gr.Dropdown(
400
+ choices=["Basic Loop", "Function Definition", "List Iteration",
401
+ "Conditional Check", "Sorting", "Maximum Element",
402
+ "Binary Search", "Factorial", "Palindrome", "Fibonacci"],
403
+ label="πŸ“š Load Example",
404
+ value=None
405
+ )
406
+
407
+ pseudo_input = gr.Textbox(
408
+ label="Pseudo-Code Description",
409
+ placeholder="Example: create a function to calculate factorial of a number",
410
+ lines=4
411
+ )
412
+
413
+ reference_code = gr.Textbox(
414
+ label="Reference Code (Optional - for BLEU score calculation)",
415
+ placeholder="Paste reference code here to calculate BLEU scores...",
416
+ lines=4
417
+ )
418
+
419
+ gr.Markdown("### βš™οΈ Generation Parameters")
420
+ with gr.Row():
421
+ max_length = gr.Slider(
422
+ minimum=50,
423
+ maximum=500,
424
+ value=150,
425
+ step=10,
426
+ label="Max Length",
427
+ info="Maximum tokens to generate"
428
+ )
429
+ temperature = gr.Slider(
430
+ minimum=0.1,
431
+ maximum=1.5,
432
+ value=0.7,
433
+ step=0.1,
434
+ label="Temperature",
435
+ info="Higher = more creative"
436
+ )
437
+
438
+ with gr.Row():
439
+ top_k = gr.Slider(
440
+ minimum=10,
441
+ maximum=100,
442
+ value=50,
443
+ step=5,
444
+ label="Top-K",
445
+ info="Vocabulary filtering"
446
+ )
447
+ top_p = gr.Slider(
448
+ minimum=0.5,
449
+ maximum=1.0,
450
+ value=0.95,
451
+ step=0.05,
452
+ label="Top-P",
453
+ info="Nucleus sampling"
454
+ )
455
+
456
+ num_sequences = gr.Slider(
457
+ minimum=1,
458
+ maximum=5,
459
+ value=1,
460
+ step=1,
461
+ label="Number of Variations",
462
+ info="Generate multiple versions"
463
+ )
464
+
465
+ with gr.Row():
466
+ generate_btn = gr.Button("✨ Generate Code", variant="primary", size="lg")
467
+ clear_btn = gr.Button("πŸ—‘οΈ Clear All", variant="secondary")
468
+
469
+ with gr.Column(scale=1):
470
+ gr.Markdown("### πŸ’» Generated Python Code")
471
+ code_output = gr.Code(
472
+ label="Generated Code",
473
+ language="python",
474
+ lines=12,
475
+ elem_classes="output-code"
476
+ )
477
+
478
+ with gr.Row():
479
+ with gr.Column():
480
+ metrics_output = gr.Textbox(
481
+ label="πŸ“Š Performance Metrics",
482
+ lines=8,
483
+ interactive=False,
484
+ elem_classes="metrics-box"
485
+ )
486
+ with gr.Column():
487
+ bleu_output = gr.Textbox(
488
+ label="🎯 BLEU Scores",
489
+ lines=8,
490
+ interactive=False,
491
+ elem_classes="metrics-box"
492
+ )
493
+
494
+ alternatives_output = gr.Markdown(
495
+ label="πŸ”„ Alternative Generations"
496
+ )
497
+
498
+ # Tab 2: Information & Guide
499
+ with gr.Tab("πŸ“– Guide & Examples"):
500
+ gr.Markdown("""
501
+ ## πŸ“š How to Use
502
+
503
+ ### 1️⃣ Load Your Model
504
+ - Upload the `best_model.pkl` file (trained GPT-2 model)
505
+ - Click "Load Model" and wait for confirmation
506
+ - You'll see model configuration and training metrics
507
+
508
+ ### 2️⃣ Generate Code
509
+ - **Quick Start**: Select an example from the dropdown
510
+ - **Custom Input**: Type your own pseudo-code description
511
+ - **Optional**: Add reference code to calculate BLEU scores
512
+ - Adjust generation parameters for different outputs
513
+ - Click "Generate Code"
514
+
515
+ ### 3️⃣ Understand the Metrics
516
+
517
+ #### 🎯 BLEU Score (Bilingual Evaluation Understudy)
518
+ - Measures similarity between generated and reference code
519
+ - **BLEU-1**: Word-level similarity (unigrams)
520
+ - **BLEU-2**: 2-word phrase similarity (bigrams)
521
+ - **BLEU-3**: 3-word phrase similarity (trigrams)
522
+ - **BLEU-4**: 4-word phrase similarity (most comprehensive)
523
+
524
+ **Score Interpretation:**
525
+ - 🟒 **> 0.4**: Excellent match - Generated code is very similar to reference
526
+ - 🟑 **0.3-0.4**: Good match - Code captures most key elements
527
+ - 🟠 **0.2-0.3**: Fair match - Some similarity exists
528
+ - πŸ”΄ **< 0.2**: Poor match - Significant differences
529
+
530
+ #### πŸ“ˆ Additional Metrics
531
+ - **Precision**: How many generated words appear in reference
532
+ - **Recall**: How many reference words appear in generated code
533
+ - **F1-Score**: Harmonic mean of precision and recall
534
+ - **Length Ratio**: Generated vs reference code length
535
+ - **Character Overlap**: Character-level similarity
536
+
537
+ ### πŸŽ›οΈ Generation Parameters
538
+
539
+ | Parameter | Low Value | High Value | Use Case |
540
+ |-----------|-----------|------------|----------|
541
+ | **Temperature** | 0.1-0.3 | 0.8-1.2 | Low: Deterministic, focused<br>High: Creative, diverse |
542
+ | **Top-K** | 10-30 | 60-100 | Low: Conservative choices<br>High: More variety |
543
+ | **Top-P** | 0.5-0.8 | 0.9-1.0 | Low: Safe predictions<br>High: Exploratory |
544
+ | **Max Length** | 50-100 | 200-500 | Short: Simple code<br>Long: Complex implementations |
545
+
546
+ ---
547
+
548
+ ## πŸ’‘ Example Pseudo-Code Prompts
549
+
550
+ ### Basic Operations
551
+ ```
552
+ create a list of numbers from 1 to 10
553
+ define a function to calculate the sum of two numbers
554
+ iterate through a list and print each element
555
+ ```
556
+
557
+ ### Conditionals & Logic
558
+ ```
559
+ check if a number is even or odd
560
+ find the maximum of three numbers
561
+ validate if a string is empty
562
+ ```
563
+
564
+ ### Data Structures
565
+ ```
566
+ sort a list in descending order
567
+ remove duplicates from a list
568
+ merge two dictionaries
569
+ ```
570
+
571
+ ### Algorithms
572
+ ```
573
+ implement binary search algorithm
574
+ create a recursive function to calculate factorial
575
+ generate fibonacci sequence up to n terms
576
+ check if a string is palindrome
577
+ ```
578
+
579
+ ### Advanced
580
+ ```
581
+ create a class to represent a student with name and grades
582
+ implement a function to read CSV file and return dataframe
583
+ create a decorator to measure function execution time
584
+ ```
585
+
586
+ ---
587
+
588
+ ## πŸŽ“ About the Model
589
+
590
+ This model is fine-tuned on the **SPOC (Search-based Pseudo-code to Code)** dataset:
591
+ - πŸ“„ Paper: [SPOC: Search-based Pseudo-code to Code](https://arxiv.org/pdf/1906.04908)
592
+ - πŸ›οΈ Source: Stanford University
593
+ - πŸ€– Base Model: GPT-2 (Decoder-Only Transformer)
594
+ - πŸ“Š Training: 10,000+ pseudo-code to code pairs
595
+ - 🎯 Task: Causal Language Modeling
596
+
597
+ ---
598
+
599
+ ## ⚠️ Limitations
600
+
601
+ - Model may not handle very complex algorithms perfectly
602
+ - Generated code should be tested before production use
603
+ - Best results with clear, specific pseudo-code descriptions
604
+ - Model trained on C++ code, adapted for Python generation
605
+
606
+ ---
607
+
608
+ ## 🀝 Tips for Best Results
609
+
610
+ 1. βœ… **Be Specific**: "create a function to sort list in ascending order" vs "sort list"
611
+ 2. βœ… **Use Action Words**: "create", "define", "implement", "calculate"
612
+ 3. βœ… **Mention Data Types**: "list", "string", "dictionary", "integer"
613
+ 4. βœ… **Include Details**: "recursive function" vs just "function"
614
+ 5. βœ… **Try Variations**: Generate multiple times with different temperatures
615
+
616
+ """)
617
+
618
+ # Tab 3: History
619
+ with gr.Tab("πŸ“œ History"):
620
+ gr.Markdown("## πŸ“Š Generation History")
621
+ history_display = gr.Textbox(
622
+ label="Recent Generations",
623
+ lines=20,
624
+ interactive=False
625
+ )
626
+ refresh_history_btn = gr.Button("πŸ”„ Refresh History", variant="secondary")
627
+
628
+ gr.Markdown("""
629
+ ---
630
+ ### 🌟 Features
631
+ - βœ… Upload and use custom trained models
632
+ - βœ… BLEU score calculation for quality assessment
633
+ - βœ… Multiple evaluation metrics (Precision, Recall, F1)
634
+ - βœ… Generate multiple code variations
635
+ - βœ… Real-time performance tracking
636
+ - βœ… Example prompts library
637
+ - βœ… Generation history
638
+
639
+ ### πŸ“ Citation
640
+ If you use this model, please cite:
641
+ ```
642
+ @article{kulal2019spoc,
643
+ title={SPOC: Search-based Pseudo-code to Code},
644
+ author={Kulal, Sumith and Pasupat, Panupong and Chandra, Kartik and Lee, Mina and Padon, Oded and Aiken, Alex and Liang, Percy},
645
+ journal={arXiv preprint arXiv:1906.04908},
646
+ year={2019}
647
+ }
648
+ ```
649
+
650
+ **Built with ❀️ using HuggingFace Transformers & Gradio**
651
+ """)
652
+
653
+ # Event handlers
654
+ example_dropdown.change(
655
+ fn=show_examples,
656
+ inputs=[example_dropdown],
657
+ outputs=[pseudo_input]
658
+ )
659
+
660
+ generate_btn.click(
661
+ fn=generate_code_from_pseudo,
662
+ inputs=[pseudo_input, max_length, temperature, top_k, top_p, num_sequences, reference_code],
663
+ outputs=[code_output, metrics_output, bleu_output, alternatives_output]
664
+ )
665
+
666
+ clear_btn.click(
667
+ fn=clear_all,
668
+ inputs=[],
669
+ outputs=[pseudo_input, reference_code, code_output, metrics_output, bleu_output,
670
+ max_length, temperature, top_k, top_p, num_sequences]
671
+ )
672
+
673
+ refresh_history_btn.click(
674
+ fn=show_history,
675
+ inputs=[],
676
+ outputs=[history_display]
677
+ )
678
+
679
+ # Launch the interface
680
+ if __name__ == "__main__":
681
+ demo.launch(share=False)