hamxaameer commited on
Commit
2deb22c
·
verified ·
1 Parent(s): 16b9485

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -5
app.py CHANGED
@@ -267,7 +267,106 @@ def calculate_code_metrics(reference, generated):
267
  'char_overlap': 0
268
  }
269
 
270
- def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  """Generate code from pseudo-code using loaded model"""
272
  global loaded_model, loaded_tokenizer, generation_history
273
 
@@ -280,8 +379,8 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
280
  try:
281
  start_time = time.time()
282
 
283
- # Format input
284
- prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> <CODE>"
285
 
286
  # Tokenize with error handling
287
  device = next(loaded_model.parameters()).device
@@ -500,8 +599,11 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
500
  for token in special_tokens:
501
  code = code.replace(token, '')
502
 
503
- # Clean up extra whitespace
504
- code = ' '.join(code.split())
 
 
 
505
 
506
  # Ensure we have some content
507
  if not code or code.isspace():
 
267
  'char_overlap': 0
268
  }
269
 
270
+ def format_python_code(code):
271
+ """Format and clean generated code to be proper Python syntax with indentation"""
272
+ if not code or code.startswith('#'):
273
+ return code
274
+
275
+ try:
276
+ # Basic Python syntax corrections (convert common C++/other language patterns to Python)
277
+ corrections = [
278
+ # Data types
279
+ ('int ', ''), # Remove C-style type declarations
280
+ ('string ', ''), # Remove string type declarations
281
+ ('bool ', ''), # Remove bool type declarations
282
+ ('float ', ''), # Remove float type declarations
283
+ ('void ', ''), # Remove void declarations
284
+ ('char ', ''), # Remove char type declarations
285
+ ('double ', ''), # Remove double type declarations
286
+
287
+ # Function declarations
288
+ ('function ', 'def '), # Convert function to def
289
+ ('public ', ''), # Remove public
290
+ ('private ', ''), # Remove private
291
+ ('static ', ''), # Remove static
292
+ ('const ', ''), # Remove const
293
+
294
+ # Control structures
295
+ ('elseif', 'elif'), # Convert elseif to elif
296
+ ('else if', 'elif'), # Convert else if to elif
297
+
298
+ # Loops
299
+ ('for(int ', 'for '), # Remove int from for loops
300
+ ('for(string ', 'for '), # Remove string from for loops
301
+ ('for(char ', 'for '), # Remove char from for loops
302
+
303
+ # Common patterns
304
+ ('System.out.println', 'print'), # Convert Java print to Python
305
+ ('console.log', 'print'), # Convert JS print to Python
306
+ ('cout <<', 'print'), # Convert C++ cout to Python
307
+ ('printf', 'print'), # Convert C printf to Python
308
+ ('std::', ''), # Remove C++ std namespace
309
+ ('->', '.'), # Convert arrow operator to dot
310
+ ('::', '.'), # Convert scope resolution to dot
311
+
312
+ # Braces and brackets
313
+ ('{', ':'), # Convert opening braces to colons
314
+ ('}', ''), # Remove closing braces
315
+
316
+ # Semicolons (remove them)
317
+ ('; ', '\n'), # Convert semicolons to newlines
318
+ (';', '\n'), # Convert semicolons to newlines
319
+ ]
320
+
321
+ for old, new in corrections:
322
+ code = code.replace(old, new)
323
+
324
+ # Handle function calls and parentheses
325
+ # Convert function(param) to function(param): for function definitions
326
+ import re
327
+ # Pattern to find function definitions like "def func(param)" and add colon
328
+ code = re.sub(r'(def\s+\w+\([^)]*\))(?!\s*:)', r'\1:', code)
329
+
330
+ # Split into lines and add proper indentation
331
+ lines = code.split('\n')
332
+ formatted_lines = []
333
+ indent_level = 0
334
+ indent_size = 4 # 4 spaces per indent level
335
+
336
+ for line in lines:
337
+ line = line.strip()
338
+ if not line:
339
+ continue
340
+
341
+ # Decrease indent for dedent keywords
342
+ if any(line.startswith(keyword) for keyword in ['else:', 'elif ', 'except:', 'finally:', 'return', 'break', 'continue', 'pass']):
343
+ indent_level = max(0, indent_level - 1)
344
+
345
+ # Add indentation
346
+ indented_line = ' ' * (indent_level * indent_size) + line
347
+ formatted_lines.append(indented_line)
348
+
349
+ # Increase indent after indent keywords
350
+ if ':' in line and not line.startswith(('else:', 'elif ', 'except:', 'finally:')):
351
+ if any(keyword in line for keyword in ['if ', 'for ', 'while ', 'def ', 'class ', 'try:', 'except ', 'with ']):
352
+ indent_level += 1
353
+
354
+ # Join lines back
355
+ formatted_code = '\n'.join(formatted_lines)
356
+
357
+ # Additional cleanup
358
+ formatted_code = re.sub(r'\s+', ' ', formatted_code) # Remove multiple spaces
359
+ formatted_code = re.sub(r'\n\s*\n\s*\n', '\n\n', formatted_code) # Remove excessive newlines
360
+
361
+ # Ensure proper Python syntax
362
+ # Add colons where missing for control structures
363
+ formatted_code = re.sub(r'\b(if|for|while|def|class|try|except|with)\s+([^:]+)(?!\s*:)', r'\1 \2:', formatted_code)
364
+
365
+ return formatted_code
366
+
367
+ except Exception as e:
368
+ # If formatting fails, return original code with a note
369
+ return f"# Formatting failed: {str(e)}\n{code}"def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
370
  """Generate code from pseudo-code using loaded model"""
371
  global loaded_model, loaded_tokenizer, generation_history
372
 
 
379
  try:
380
  start_time = time.time()
381
 
382
+ # Format input with Python-specific instructions
383
+ prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> Generate Python code with proper indentation and syntax: <CODE>"
384
 
385
  # Tokenize with error handling
386
  device = next(loaded_model.parameters()).device
 
599
  for token in special_tokens:
600
  code = code.replace(token, '')
601
 
602
+ # Clean up extra whitespace but preserve some structure
603
+ code = code.replace('\n\n\n', '\n\n') # Reduce excessive newlines
604
+
605
+ # Format as proper Python code
606
+ code = format_python_code(code)
607
 
608
  # Ensure we have some content
609
  if not code or code.isspace():