Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -267,7 +267,106 @@ def calculate_code_metrics(reference, generated):
|
|
| 267 |
'char_overlap': 0
|
| 268 |
}
|
| 269 |
|
| 270 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
"""Generate code from pseudo-code using loaded model"""
|
| 272 |
global loaded_model, loaded_tokenizer, generation_history
|
| 273 |
|
|
@@ -280,8 +379,8 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
|
|
| 280 |
try:
|
| 281 |
start_time = time.time()
|
| 282 |
|
| 283 |
-
# Format input
|
| 284 |
-
prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> <CODE>"
|
| 285 |
|
| 286 |
# Tokenize with error handling
|
| 287 |
device = next(loaded_model.parameters()).device
|
|
@@ -500,8 +599,11 @@ def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p
|
|
| 500 |
for token in special_tokens:
|
| 501 |
code = code.replace(token, '')
|
| 502 |
|
| 503 |
-
# Clean up extra whitespace
|
| 504 |
-
code = ' '
|
|
|
|
|
|
|
|
|
|
| 505 |
|
| 506 |
# Ensure we have some content
|
| 507 |
if not code or code.isspace():
|
|
|
|
| 267 |
'char_overlap': 0
|
| 268 |
}
|
| 269 |
|
| 270 |
+
def format_python_code(code):
|
| 271 |
+
"""Format and clean generated code to be proper Python syntax with indentation"""
|
| 272 |
+
if not code or code.startswith('#'):
|
| 273 |
+
return code
|
| 274 |
+
|
| 275 |
+
try:
|
| 276 |
+
# Basic Python syntax corrections (convert common C++/other language patterns to Python)
|
| 277 |
+
corrections = [
|
| 278 |
+
# Data types
|
| 279 |
+
('int ', ''), # Remove C-style type declarations
|
| 280 |
+
('string ', ''), # Remove string type declarations
|
| 281 |
+
('bool ', ''), # Remove bool type declarations
|
| 282 |
+
('float ', ''), # Remove float type declarations
|
| 283 |
+
('void ', ''), # Remove void declarations
|
| 284 |
+
('char ', ''), # Remove char type declarations
|
| 285 |
+
('double ', ''), # Remove double type declarations
|
| 286 |
+
|
| 287 |
+
# Function declarations
|
| 288 |
+
('function ', 'def '), # Convert function to def
|
| 289 |
+
('public ', ''), # Remove public
|
| 290 |
+
('private ', ''), # Remove private
|
| 291 |
+
('static ', ''), # Remove static
|
| 292 |
+
('const ', ''), # Remove const
|
| 293 |
+
|
| 294 |
+
# Control structures
|
| 295 |
+
('elseif', 'elif'), # Convert elseif to elif
|
| 296 |
+
('else if', 'elif'), # Convert else if to elif
|
| 297 |
+
|
| 298 |
+
# Loops
|
| 299 |
+
('for(int ', 'for '), # Remove int from for loops
|
| 300 |
+
('for(string ', 'for '), # Remove string from for loops
|
| 301 |
+
('for(char ', 'for '), # Remove char from for loops
|
| 302 |
+
|
| 303 |
+
# Common patterns
|
| 304 |
+
('System.out.println', 'print'), # Convert Java print to Python
|
| 305 |
+
('console.log', 'print'), # Convert JS print to Python
|
| 306 |
+
('cout <<', 'print'), # Convert C++ cout to Python
|
| 307 |
+
('printf', 'print'), # Convert C printf to Python
|
| 308 |
+
('std::', ''), # Remove C++ std namespace
|
| 309 |
+
('->', '.'), # Convert arrow operator to dot
|
| 310 |
+
('::', '.'), # Convert scope resolution to dot
|
| 311 |
+
|
| 312 |
+
# Braces and brackets
|
| 313 |
+
('{', ':'), # Convert opening braces to colons
|
| 314 |
+
('}', ''), # Remove closing braces
|
| 315 |
+
|
| 316 |
+
# Semicolons (remove them)
|
| 317 |
+
('; ', '\n'), # Convert semicolons to newlines
|
| 318 |
+
(';', '\n'), # Convert semicolons to newlines
|
| 319 |
+
]
|
| 320 |
+
|
| 321 |
+
for old, new in corrections:
|
| 322 |
+
code = code.replace(old, new)
|
| 323 |
+
|
| 324 |
+
# Handle function calls and parentheses
|
| 325 |
+
# Convert function(param) to function(param): for function definitions
|
| 326 |
+
import re
|
| 327 |
+
# Pattern to find function definitions like "def func(param)" and add colon
|
| 328 |
+
code = re.sub(r'(def\s+\w+\([^)]*\))(?!\s*:)', r'\1:', code)
|
| 329 |
+
|
| 330 |
+
# Split into lines and add proper indentation
|
| 331 |
+
lines = code.split('\n')
|
| 332 |
+
formatted_lines = []
|
| 333 |
+
indent_level = 0
|
| 334 |
+
indent_size = 4 # 4 spaces per indent level
|
| 335 |
+
|
| 336 |
+
for line in lines:
|
| 337 |
+
line = line.strip()
|
| 338 |
+
if not line:
|
| 339 |
+
continue
|
| 340 |
+
|
| 341 |
+
# Decrease indent for dedent keywords
|
| 342 |
+
if any(line.startswith(keyword) for keyword in ['else:', 'elif ', 'except:', 'finally:', 'return', 'break', 'continue', 'pass']):
|
| 343 |
+
indent_level = max(0, indent_level - 1)
|
| 344 |
+
|
| 345 |
+
# Add indentation
|
| 346 |
+
indented_line = ' ' * (indent_level * indent_size) + line
|
| 347 |
+
formatted_lines.append(indented_line)
|
| 348 |
+
|
| 349 |
+
# Increase indent after indent keywords
|
| 350 |
+
if ':' in line and not line.startswith(('else:', 'elif ', 'except:', 'finally:')):
|
| 351 |
+
if any(keyword in line for keyword in ['if ', 'for ', 'while ', 'def ', 'class ', 'try:', 'except ', 'with ']):
|
| 352 |
+
indent_level += 1
|
| 353 |
+
|
| 354 |
+
# Join lines back
|
| 355 |
+
formatted_code = '\n'.join(formatted_lines)
|
| 356 |
+
|
| 357 |
+
# Additional cleanup
|
| 358 |
+
formatted_code = re.sub(r'\s+', ' ', formatted_code) # Remove multiple spaces
|
| 359 |
+
formatted_code = re.sub(r'\n\s*\n\s*\n', '\n\n', formatted_code) # Remove excessive newlines
|
| 360 |
+
|
| 361 |
+
# Ensure proper Python syntax
|
| 362 |
+
# Add colons where missing for control structures
|
| 363 |
+
formatted_code = re.sub(r'\b(if|for|while|def|class|try|except|with)\s+([^:]+)(?!\s*:)', r'\1 \2:', formatted_code)
|
| 364 |
+
|
| 365 |
+
return formatted_code
|
| 366 |
+
|
| 367 |
+
except Exception as e:
|
| 368 |
+
# If formatting fails, return original code with a note
|
| 369 |
+
return f"# Formatting failed: {str(e)}\n{code}"def generate_code_from_pseudo(pseudo_code, max_length, temperature, top_k, top_p, num_sequences, reference_code):
|
| 370 |
"""Generate code from pseudo-code using loaded model"""
|
| 371 |
global loaded_model, loaded_tokenizer, generation_history
|
| 372 |
|
|
|
|
| 379 |
try:
|
| 380 |
start_time = time.time()
|
| 381 |
|
| 382 |
+
# Format input with Python-specific instructions
|
| 383 |
+
prompt = f"<PSEUDO> {pseudo_code.strip()} <SEP> Generate Python code with proper indentation and syntax: <CODE>"
|
| 384 |
|
| 385 |
# Tokenize with error handling
|
| 386 |
device = next(loaded_model.parameters()).device
|
|
|
|
| 599 |
for token in special_tokens:
|
| 600 |
code = code.replace(token, '')
|
| 601 |
|
| 602 |
+
# Clean up extra whitespace but preserve some structure
|
| 603 |
+
code = code.replace('\n\n\n', '\n\n') # Reduce excessive newlines
|
| 604 |
+
|
| 605 |
+
# Format as proper Python code
|
| 606 |
+
code = format_python_code(code)
|
| 607 |
|
| 608 |
# Ensure we have some content
|
| 609 |
if not code or code.isspace():
|