Spaces:

LocaleNLP
/

LocaleNLP_Translator

Running

App Files Files Community

Mgolo commited on Sep 20

Commit

724395a

verified ·

1 Parent(s): a7ed79f

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -3

app.py CHANGED Viewed

@@ -28,6 +28,8 @@ from markdown import markdown
 import chardet
 from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
 from huggingface_hub import login
 # ================================
 # Configuration & Constants
@@ -442,6 +444,15 @@ class AudioProcessor:
 class EvaluationService:
     """Handles evaluation submissions and GitHub storage."""
     @staticmethod
     def get_github_file_sha() -> Optional[str]:
         """Get the SHA of the existing evaluation file on GitHub."""
@@ -467,7 +478,6 @@ class EvaluationService:
             if response.status_code == 200:
                 content = response.json().get("content", "")
-                import base64
                 return base64.b64decode(content).decode('utf-8')
             return ""
         except Exception as e:
@@ -498,8 +508,16 @@ class EvaluationService:
             Status message
         """
         try:
             # Prepare the new evaluation data
-            new_row = f'"{source_lang}","{target_lang}","{user_input.replace("\"", "\"\"")}","{model_output.replace("\"", "\"\"")}","{notation or ""}","{correct_answer or ""}"\n'
             # Get existing content
             existing_content = EvaluationService.read_existing_csv_content()
@@ -514,7 +532,6 @@ class EvaluationService:
                 csv_content = headers + new_row
             # Encode content for GitHub API
-            import base64
             content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
             # Prepare GitHub API request

 import chardet
 from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
 from huggingface_hub import login
+import base64
+import io
 # ================================
 # Configuration & Constants
 class EvaluationService:
     """Handles evaluation submissions and GitHub storage."""
+    @staticmethod
+    def escape_csv_field(text):
+        """Escape text for CSV format."""
+        if '"' in text:
+            text = text.replace('"', '""')
+        if ',' in text or '"' in text or '\n' in text:
+            text = f'"{text}"'
+        return text
     @staticmethod
     def get_github_file_sha() -> Optional[str]:
         """Get the SHA of the existing evaluation file on GitHub."""
             if response.status_code == 200:
                 content = response.json().get("content", "")
                 return base64.b64decode(content).decode('utf-8')
             return ""
         except Exception as e:
             Status message
         """
         try:
+            # Escape fields for CSV
+            source_lang_escaped = EvaluationService.escape_csv_field(source_lang)
+            target_lang_escaped = EvaluationService.escape_csv_field(target_lang)
+            user_input_escaped = EvaluationService.escape_csv_field(user_input)
+            model_output_escaped = EvaluationService.escape_csv_field(model_output)
+            notation_escaped = EvaluationService.escape_csv_field(notation if notation else "")
+            correct_answer_escaped = EvaluationService.escape_csv_field(correct_answer if correct_answer else "")
             # Prepare the new evaluation data
+            new_row = f"{source_lang_escaped},{target_lang_escaped},{user_input_escaped},{model_output_escaped},{notation_escaped},{correct_answer_escaped}\n"
             # Get existing content
             existing_content = EvaluationService.read_existing_csv_content()
                 csv_content = headers + new_row
             # Encode content for GitHub API
             content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
             # Prepare GitHub API request