hamxaameer commited on
Commit
d8f3c7f
·
verified ·
1 Parent(s): 6eabddd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -110
app.py CHANGED
@@ -273,118 +273,76 @@ def format_python_code(code):
273
  return code
274
 
275
  try:
276
- # Basic Python syntax corrections (convert common C++/other language patterns to Python)
277
- corrections = [
278
- # Data types
279
- ('int ', ''), # Remove C-style type declarations
280
- ('string ', ''), # Remove string type declarations
281
- ('bool ', ''), # Remove bool type declarations
282
- ('float ', ''), # Remove float type declarations
283
- ('void ', ''), # Remove void declarations
284
- ('char ', ''), # Remove char type declarations
285
- ('double ', ''), # Remove double type declarations
286
-
287
- # Function declarations
288
- ('function ', 'def '), # Convert function to def
289
- ('public ', ''), # Remove public
290
- ('private ', ''), # Remove private
291
- ('static ', ''), # Remove static
292
- ('const ', ''), # Remove const
293
-
294
- # Control structures
295
- ('elseif', 'elif'), # Convert elseif to elif
296
- ('else if', 'elif'), # Convert else if to elif
297
-
298
- # Loops
299
- ('for(int ', 'for '), # Remove int from for loops
300
- ('for(string ', 'for '), # Remove string from for loops
301
- ('for(char ', 'for '), # Remove char from for loops
302
-
303
- # Common patterns
304
- ('System.out.println', 'print'), # Convert Java print to Python
305
- ('console.log', 'print'), # Convert JS print to Python
306
- ('cout <<', 'print'), # Convert C++ cout to Python
307
- ('printf', 'print'), # Convert C printf to Python
308
- ('std::', ''), # Remove C++ std namespace
309
- ('->', '.'), # Convert arrow operator to dot
310
- ('::', '.'), # Convert scope resolution to dot
311
-
312
- # Braces and brackets - convert to Python indentation
313
- ('{', ':\n'), # Convert opening braces to colons with newline
314
- ('}', ''), # Remove closing braces
315
-
316
- # Semicolons (convert to newlines)
317
- ('; ', '\n'), # Convert semicolons to newlines
318
- (';', '\n'), # Convert semicolons to newlines
319
-
320
- # Ternary operators (C++ style to Python)
321
- (' ? ', ' if '), # Convert ternary ? to if
322
- (' : ', ' else '), # Convert ternary : to else
323
-
324
- # Comparison operators
325
- ('< SEP', '<'), # Fix common typo
326
- ('< SEp', '<'), # Fix common typo
327
- ('<SEP', '<'), # Fix common typo
328
- ('<SEp', '<'), # Fix common typo
329
- ]
330
-
331
- for old, new in corrections:
332
- code = code.replace(old, new)
333
-
334
- # Handle function calls and parentheses
335
- # Convert function(param) to function(param): for function definitions
336
  import re
337
- # Pattern to find function definitions like "def func(param)" and add colon
338
- code = re.sub(r'(def\s+\w+\([^)]*\))(?!\s*:)', r'\1:', code)
339
-
340
- # Split into lines and add proper indentation
341
- lines = code.split('\n')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  formatted_lines = []
343
  indent_level = 0
344
- indent_size = 4 # 4 spaces per indent level
345
-
346
  for line in lines:
347
- line = line.strip()
348
- if not line:
349
- continue
350
-
351
- # Decrease indent for dedent keywords
352
- if any(line.startswith(keyword) for keyword in ['else:', 'elif ', 'except:', 'finally:', 'return', 'break', 'continue', 'pass']):
353
  indent_level = max(0, indent_level - 1)
354
-
355
  # Add indentation
356
- indented_line = ' ' * (indent_level * indent_size) + line
357
- formatted_lines.append(indented_line)
358
-
359
- # Increase indent after indent keywords
360
- if ':' in line and not line.startswith(('else:', 'elif ', 'except:', 'finally:')):
361
- if any(keyword in line for keyword in ['if ', 'for ', 'while ', 'def ', 'class ', 'try:', 'except ', 'with ']):
362
- indent_level += 1
363
-
364
- # Join lines back
365
- formatted_code = '\n'.join(formatted_lines)
366
-
367
- # Additional cleanup
368
- formatted_code = re.sub(r'\s+', ' ', formatted_code) # Remove multiple spaces
369
- formatted_code = re.sub(r'\n\s*\n\s*\n', '\n\n', formatted_code) # Remove excessive newlines
370
-
371
- # Ensure proper Python syntax
372
- # Add colons where missing for control structures
373
- formatted_code = re.sub(r'\b(if|for|while|def|class|try|except|with)\s+([^:]+)(?!\s*:)', r'\1 \2:', formatted_code)
374
-
375
- # Fix ternary operators that became malformed
376
- # Convert "return a if condition else b" to proper Python ternary
377
- formatted_code = re.sub(r'return\s+(.+?)\s+if\s+(.+?)\s+else\s+(.+)', r'return \1 if \2 else \3', formatted_code)
378
-
379
- # Ensure functions have proper structure
380
- # If a line starts with "def" and doesn't end with ":", add it
381
- formatted_code = re.sub(r'(def\s+\w+\([^)]*)\s*$', r'\1:', formatted_code, flags=re.MULTILINE)
382
-
383
- return formatted_code
384
 
385
  except Exception as e:
386
- # If formatting fails, return original code with a note
387
- return f"# Formatting failed: {str(e)}\n{code}"
388
 
389
 
390
  def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
@@ -401,7 +359,7 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
401
  start_time = time.time()
402
 
403
  # Format input with Python-specific instructions
404
- prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> Generate proper Python code with correct syntax, indentation, and separate lines. Use def for functions, if/else for conditionals, for/while for loops, and proper Python syntax only. No C++ or other languages. <CODE>"
405
 
406
  # Tokenize with error handling
407
  device = next(loaded_model.parameters()).device
@@ -623,13 +581,15 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
623
  # Clean up extra whitespace but preserve some structure
624
  code = code.replace('\n\n\n', '\n\n') # Reduce excessive newlines
625
 
626
- # Format as proper Python code
627
- code = format_python_code(code)
628
-
629
  # For debugging: include raw generated code
630
  raw_code = generated.strip()
631
- if not code.startswith('#'):
632
- code = f"# Raw generated code:\n# {raw_code}\n\n# Formatted Python code:\n{code}"
 
 
 
 
 
633
 
634
  # Ensure we have some content
635
  if not code or code.isspace():
 
273
  return code
274
 
275
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  import re
277
+
278
+ # Remove special tokens and artifacts first
279
+ code = re.sub(r'<[^>]*>', '', code) # Remove all <TOKEN> patterns
280
+ code = code.replace('<TR>', '').strip() # Remove <TR> specifically
281
+
282
+ # Basic cleanup and conversion to Python
283
+ # Convert C++ function declarations to Python
284
+ code = re.sub(r'\b(?:bool|int|void|string|float|char|double)\s+(\w+)\s*\(([^)]*)\)\s*\{', r'def \1(\2):', code)
285
+
286
+ # Clean up parameter types in function signatures
287
+ code = re.sub(r'\(\s*(?:int|bool|string|float|char|double)\s+(\w+)\s*\)', r'(\1)', code)
288
+ code = re.sub(r',\s*(?:int|bool|string|float|char|double)\s+(\w+)', r', \1', code)
289
+
290
+ # Replace braces with proper Python structure
291
+ code = code.replace('{', ':')
292
+ code = code.replace('}', '')
293
+
294
+ # Remove semicolons
295
+ code = code.replace(';', '')
296
+
297
+ # Fix return statements
298
+ code = re.sub(r'return\s+true\b', 'return True', code)
299
+ code = re.sub(r'return\s+false\b', 'return False', code)
300
+
301
+ # Fix control structures
302
+ code = re.sub(r'\bif\s*\(([^)]+)\)', r'if \1:', code)
303
+ code = re.sub(r'\belse\s*:', r'else:', code)
304
+ code = re.sub(r'\belse\s+', r'else:\n ', code)
305
+
306
+ # Split into lines for indentation
307
+ lines = [line.strip() for line in code.split('\n') if line.strip()]
308
+
309
+ # Add proper indentation
310
  formatted_lines = []
311
  indent_level = 0
312
+
 
313
  for line in lines:
314
+ # Handle dedent
315
+ if line.startswith('else:') or line.startswith('elif'):
 
 
 
 
316
  indent_level = max(0, indent_level - 1)
317
+
318
  # Add indentation
319
+ if indent_level > 0:
320
+ formatted_line = ' ' * indent_level + line
321
+ else:
322
+ formatted_line = line
323
+
324
+ formatted_lines.append(formatted_line)
325
+
326
+ # Handle indent after colon
327
+ if line.endswith(':'):
328
+ indent_level += 1
329
+
330
+ # Join lines
331
+ result = '\n'.join(formatted_lines)
332
+
333
+ # Final cleanup
334
+ result = re.sub(r'\n\s*\n+', '\n', result) # Remove empty lines
335
+
336
+ # Ensure we have something useful
337
+ if not result.strip() or 'def ' not in result:
338
+ # Create a basic function if parsing failed
339
+ result = f"def generated_function():\n # Model output: {code[:50]}...\n return None"
340
+
341
+ return result
 
 
 
 
 
342
 
343
  except Exception as e:
344
+ # If formatting fails, return a basic structure with the original
345
+ return f"def generated_function():\n # Formatting error: {str(e)}\n # Original: {code[:100]}...\n return None"
346
 
347
 
348
  def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
 
359
  start_time = time.time()
360
 
361
  # Format input with Python-specific instructions
362
+ prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> Write a Python function to {pseudo_code.strip()}. Use proper Python syntax with def, return statements, if/else conditions, and proper indentation. Example: def check_even_odd(number): if number % 2 == 0: return 'even' else: return 'odd' <CODE>"
363
 
364
  # Tokenize with error handling
365
  device = next(loaded_model.parameters()).device
 
581
  # Clean up extra whitespace but preserve some structure
582
  code = code.replace('\n\n\n', '\n\n') # Reduce excessive newlines
583
 
 
 
 
584
  # For debugging: include raw generated code
585
  raw_code = generated.strip()
586
+ formatted_code = format_python_code(code)
587
+
588
+ # Show both raw and formatted for transparency
589
+ if not formatted_code.startswith('#'):
590
+ code = f"# Model Generated (Raw):\n# {raw_code[:100]}...\n\n# Formatted Python Code:\n{formatted_code}"
591
+ else:
592
+ code = formatted_code
593
 
594
  # Ensure we have some content
595
  if not code or code.isspace():