Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -273,118 +273,76 @@ def format_python_code(code):
|
|
| 273 |
return code
|
| 274 |
|
| 275 |
try:
|
| 276 |
-
# Basic Python syntax corrections (convert common C++/other language patterns to Python)
|
| 277 |
-
corrections = [
|
| 278 |
-
# Data types
|
| 279 |
-
('int ', ''), # Remove C-style type declarations
|
| 280 |
-
('string ', ''), # Remove string type declarations
|
| 281 |
-
('bool ', ''), # Remove bool type declarations
|
| 282 |
-
('float ', ''), # Remove float type declarations
|
| 283 |
-
('void ', ''), # Remove void declarations
|
| 284 |
-
('char ', ''), # Remove char type declarations
|
| 285 |
-
('double ', ''), # Remove double type declarations
|
| 286 |
-
|
| 287 |
-
# Function declarations
|
| 288 |
-
('function ', 'def '), # Convert function to def
|
| 289 |
-
('public ', ''), # Remove public
|
| 290 |
-
('private ', ''), # Remove private
|
| 291 |
-
('static ', ''), # Remove static
|
| 292 |
-
('const ', ''), # Remove const
|
| 293 |
-
|
| 294 |
-
# Control structures
|
| 295 |
-
('elseif', 'elif'), # Convert elseif to elif
|
| 296 |
-
('else if', 'elif'), # Convert else if to elif
|
| 297 |
-
|
| 298 |
-
# Loops
|
| 299 |
-
('for(int ', 'for '), # Remove int from for loops
|
| 300 |
-
('for(string ', 'for '), # Remove string from for loops
|
| 301 |
-
('for(char ', 'for '), # Remove char from for loops
|
| 302 |
-
|
| 303 |
-
# Common patterns
|
| 304 |
-
('System.out.println', 'print'), # Convert Java print to Python
|
| 305 |
-
('console.log', 'print'), # Convert JS print to Python
|
| 306 |
-
('cout <<', 'print'), # Convert C++ cout to Python
|
| 307 |
-
('printf', 'print'), # Convert C printf to Python
|
| 308 |
-
('std::', ''), # Remove C++ std namespace
|
| 309 |
-
('->', '.'), # Convert arrow operator to dot
|
| 310 |
-
('::', '.'), # Convert scope resolution to dot
|
| 311 |
-
|
| 312 |
-
# Braces and brackets - convert to Python indentation
|
| 313 |
-
('{', ':\n'), # Convert opening braces to colons with newline
|
| 314 |
-
('}', ''), # Remove closing braces
|
| 315 |
-
|
| 316 |
-
# Semicolons (convert to newlines)
|
| 317 |
-
('; ', '\n'), # Convert semicolons to newlines
|
| 318 |
-
(';', '\n'), # Convert semicolons to newlines
|
| 319 |
-
|
| 320 |
-
# Ternary operators (C++ style to Python)
|
| 321 |
-
(' ? ', ' if '), # Convert ternary ? to if
|
| 322 |
-
(' : ', ' else '), # Convert ternary : to else
|
| 323 |
-
|
| 324 |
-
# Comparison operators
|
| 325 |
-
('< SEP', '<'), # Fix common typo
|
| 326 |
-
('< SEp', '<'), # Fix common typo
|
| 327 |
-
('<SEP', '<'), # Fix common typo
|
| 328 |
-
('<SEp', '<'), # Fix common typo
|
| 329 |
-
]
|
| 330 |
-
|
| 331 |
-
for old, new in corrections:
|
| 332 |
-
code = code.replace(old, new)
|
| 333 |
-
|
| 334 |
-
# Handle function calls and parentheses
|
| 335 |
-
# Convert function(param) to function(param): for function definitions
|
| 336 |
import re
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
formatted_lines = []
|
| 343 |
indent_level = 0
|
| 344 |
-
|
| 345 |
-
|
| 346 |
for line in lines:
|
| 347 |
-
|
| 348 |
-
if
|
| 349 |
-
continue
|
| 350 |
-
|
| 351 |
-
# Decrease indent for dedent keywords
|
| 352 |
-
if any(line.startswith(keyword) for keyword in ['else:', 'elif ', 'except:', 'finally:', 'return', 'break', 'continue', 'pass']):
|
| 353 |
indent_level = max(0, indent_level - 1)
|
| 354 |
-
|
| 355 |
# Add indentation
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
#
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
#
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
# Ensure functions have proper structure
|
| 380 |
-
# If a line starts with "def" and doesn't end with ":", add it
|
| 381 |
-
formatted_code = re.sub(r'(def\s+\w+\([^)]*)\s*$', r'\1:', formatted_code, flags=re.MULTILINE)
|
| 382 |
-
|
| 383 |
-
return formatted_code
|
| 384 |
|
| 385 |
except Exception as e:
|
| 386 |
-
# If formatting fails, return
|
| 387 |
-
return f"# Formatting
|
| 388 |
|
| 389 |
|
| 390 |
def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
|
|
@@ -401,7 +359,7 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
|
|
| 401 |
start_time = time.time()
|
| 402 |
|
| 403 |
# Format input with Python-specific instructions
|
| 404 |
-
prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP>
|
| 405 |
|
| 406 |
# Tokenize with error handling
|
| 407 |
device = next(loaded_model.parameters()).device
|
|
@@ -623,13 +581,15 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
|
|
| 623 |
# Clean up extra whitespace but preserve some structure
|
| 624 |
code = code.replace('\n\n\n', '\n\n') # Reduce excessive newlines
|
| 625 |
|
| 626 |
-
# Format as proper Python code
|
| 627 |
-
code = format_python_code(code)
|
| 628 |
-
|
| 629 |
# For debugging: include raw generated code
|
| 630 |
raw_code = generated.strip()
|
| 631 |
-
|
| 632 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
|
| 634 |
# Ensure we have some content
|
| 635 |
if not code or code.isspace():
|
|
|
|
| 273 |
return code
|
| 274 |
|
| 275 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
import re
|
| 277 |
+
|
| 278 |
+
# Remove special tokens and artifacts first
|
| 279 |
+
code = re.sub(r'<[^>]*>', '', code) # Remove all <TOKEN> patterns
|
| 280 |
+
code = code.replace('<TR>', '').strip() # Remove <TR> specifically
|
| 281 |
+
|
| 282 |
+
# Basic cleanup and conversion to Python
|
| 283 |
+
# Convert C++ function declarations to Python
|
| 284 |
+
code = re.sub(r'\b(?:bool|int|void|string|float|char|double)\s+(\w+)\s*\(([^)]*)\)\s*\{', r'def \1(\2):', code)
|
| 285 |
+
|
| 286 |
+
# Clean up parameter types in function signatures
|
| 287 |
+
code = re.sub(r'\(\s*(?:int|bool|string|float|char|double)\s+(\w+)\s*\)', r'(\1)', code)
|
| 288 |
+
code = re.sub(r',\s*(?:int|bool|string|float|char|double)\s+(\w+)', r', \1', code)
|
| 289 |
+
|
| 290 |
+
# Replace braces with proper Python structure
|
| 291 |
+
code = code.replace('{', ':')
|
| 292 |
+
code = code.replace('}', '')
|
| 293 |
+
|
| 294 |
+
# Remove semicolons
|
| 295 |
+
code = code.replace(';', '')
|
| 296 |
+
|
| 297 |
+
# Fix return statements
|
| 298 |
+
code = re.sub(r'return\s+true\b', 'return True', code)
|
| 299 |
+
code = re.sub(r'return\s+false\b', 'return False', code)
|
| 300 |
+
|
| 301 |
+
# Fix control structures
|
| 302 |
+
code = re.sub(r'\bif\s*\(([^)]+)\)', r'if \1:', code)
|
| 303 |
+
code = re.sub(r'\belse\s*:', r'else:', code)
|
| 304 |
+
code = re.sub(r'\belse\s+', r'else:\n ', code)
|
| 305 |
+
|
| 306 |
+
# Split into lines for indentation
|
| 307 |
+
lines = [line.strip() for line in code.split('\n') if line.strip()]
|
| 308 |
+
|
| 309 |
+
# Add proper indentation
|
| 310 |
formatted_lines = []
|
| 311 |
indent_level = 0
|
| 312 |
+
|
|
|
|
| 313 |
for line in lines:
|
| 314 |
+
# Handle dedent
|
| 315 |
+
if line.startswith('else:') or line.startswith('elif'):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
indent_level = max(0, indent_level - 1)
|
| 317 |
+
|
| 318 |
# Add indentation
|
| 319 |
+
if indent_level > 0:
|
| 320 |
+
formatted_line = ' ' * indent_level + line
|
| 321 |
+
else:
|
| 322 |
+
formatted_line = line
|
| 323 |
+
|
| 324 |
+
formatted_lines.append(formatted_line)
|
| 325 |
+
|
| 326 |
+
# Handle indent after colon
|
| 327 |
+
if line.endswith(':'):
|
| 328 |
+
indent_level += 1
|
| 329 |
+
|
| 330 |
+
# Join lines
|
| 331 |
+
result = '\n'.join(formatted_lines)
|
| 332 |
+
|
| 333 |
+
# Final cleanup
|
| 334 |
+
result = re.sub(r'\n\s*\n+', '\n', result) # Remove empty lines
|
| 335 |
+
|
| 336 |
+
# Ensure we have something useful
|
| 337 |
+
if not result.strip() or 'def ' not in result:
|
| 338 |
+
# Create a basic function if parsing failed
|
| 339 |
+
result = f"def generated_function():\n # Model output: {code[:50]}...\n return None"
|
| 340 |
+
|
| 341 |
+
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
except Exception as e:
|
| 344 |
+
# If formatting fails, return a basic structure with the original
|
| 345 |
+
return f"def generated_function():\n # Formatting error: {str(e)}\n # Original: {code[:100]}...\n return None"
|
| 346 |
|
| 347 |
|
| 348 |
def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
|
|
|
|
| 359 |
start_time = time.time()
|
| 360 |
|
| 361 |
# Format input with Python-specific instructions
|
| 362 |
+
prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> Write a Python function to {pseudo_code.strip()}. Use proper Python syntax with def, return statements, if/else conditions, and proper indentation. Example: def check_even_odd(number): if number % 2 == 0: return 'even' else: return 'odd' <CODE>"
|
| 363 |
|
| 364 |
# Tokenize with error handling
|
| 365 |
device = next(loaded_model.parameters()).device
|
|
|
|
| 581 |
# Clean up extra whitespace but preserve some structure
|
| 582 |
code = code.replace('\n\n\n', '\n\n') # Reduce excessive newlines
|
| 583 |
|
|
|
|
|
|
|
|
|
|
| 584 |
# For debugging: include raw generated code
|
| 585 |
raw_code = generated.strip()
|
| 586 |
+
formatted_code = format_python_code(code)
|
| 587 |
+
|
| 588 |
+
# Show both raw and formatted for transparency
|
| 589 |
+
if not formatted_code.startswith('#'):
|
| 590 |
+
code = f"# Model Generated (Raw):\n# {raw_code[:100]}...\n\n# Formatted Python Code:\n{formatted_code}"
|
| 591 |
+
else:
|
| 592 |
+
code = formatted_code
|
| 593 |
|
| 594 |
# Ensure we have some content
|
| 595 |
if not code or code.isspace():
|