Mgolo commited on
Commit
724395a
·
verified ·
1 Parent(s): a7ed79f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -3
app.py CHANGED
@@ -28,6 +28,8 @@ from markdown import markdown
28
  import chardet
29
  from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
30
  from huggingface_hub import login
 
 
31
 
32
  # ================================
33
  # Configuration & Constants
@@ -442,6 +444,15 @@ class AudioProcessor:
442
  class EvaluationService:
443
  """Handles evaluation submissions and GitHub storage."""
444
 
 
 
 
 
 
 
 
 
 
445
  @staticmethod
446
  def get_github_file_sha() -> Optional[str]:
447
  """Get the SHA of the existing evaluation file on GitHub."""
@@ -467,7 +478,6 @@ class EvaluationService:
467
 
468
  if response.status_code == 200:
469
  content = response.json().get("content", "")
470
- import base64
471
  return base64.b64decode(content).decode('utf-8')
472
  return ""
473
  except Exception as e:
@@ -498,8 +508,16 @@ class EvaluationService:
498
  Status message
499
  """
500
  try:
 
 
 
 
 
 
 
 
501
  # Prepare the new evaluation data
502
- new_row = f'"{source_lang}","{target_lang}","{user_input.replace("\"", "\"\"")}","{model_output.replace("\"", "\"\"")}","{notation or ""}","{correct_answer or ""}"\n'
503
 
504
  # Get existing content
505
  existing_content = EvaluationService.read_existing_csv_content()
@@ -514,7 +532,6 @@ class EvaluationService:
514
  csv_content = headers + new_row
515
 
516
  # Encode content for GitHub API
517
- import base64
518
  content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
519
 
520
  # Prepare GitHub API request
 
28
  import chardet
29
  from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
30
  from huggingface_hub import login
31
+ import base64
32
+ import io
33
 
34
  # ================================
35
  # Configuration & Constants
 
444
  class EvaluationService:
445
  """Handles evaluation submissions and GitHub storage."""
446
 
447
+ @staticmethod
448
+ def escape_csv_field(text):
449
+ """Escape text for CSV format."""
450
+ if '"' in text:
451
+ text = text.replace('"', '""')
452
+ if ',' in text or '"' in text or '\n' in text:
453
+ text = f'"{text}"'
454
+ return text
455
+
456
  @staticmethod
457
  def get_github_file_sha() -> Optional[str]:
458
  """Get the SHA of the existing evaluation file on GitHub."""
 
478
 
479
  if response.status_code == 200:
480
  content = response.json().get("content", "")
 
481
  return base64.b64decode(content).decode('utf-8')
482
  return ""
483
  except Exception as e:
 
508
  Status message
509
  """
510
  try:
511
+ # Escape fields for CSV
512
+ source_lang_escaped = EvaluationService.escape_csv_field(source_lang)
513
+ target_lang_escaped = EvaluationService.escape_csv_field(target_lang)
514
+ user_input_escaped = EvaluationService.escape_csv_field(user_input)
515
+ model_output_escaped = EvaluationService.escape_csv_field(model_output)
516
+ notation_escaped = EvaluationService.escape_csv_field(notation if notation else "")
517
+ correct_answer_escaped = EvaluationService.escape_csv_field(correct_answer if correct_answer else "")
518
+
519
  # Prepare the new evaluation data
520
+ new_row = f"{source_lang_escaped},{target_lang_escaped},{user_input_escaped},{model_output_escaped},{notation_escaped},{correct_answer_escaped}\n"
521
 
522
  # Get existing content
523
  existing_content = EvaluationService.read_existing_csv_content()
 
532
  csv_content = headers + new_row
533
 
534
  # Encode content for GitHub API
 
535
  content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
536
 
537
  # Prepare GitHub API request