Mgolo commited on
Commit
a95150e
·
verified ·
1 Parent(s): 200a039

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -63
app.py CHANGED
@@ -12,6 +12,7 @@ import logging
12
  import tempfile
13
  import csv
14
  import requests
 
15
  from typing import Optional, Dict, Tuple, Any, Union
16
  from pathlib import Path
17
  from dataclasses import dataclass
@@ -93,8 +94,10 @@ SUPPORTED_FILE_TYPES = [
93
  # Audio file extensions
94
  AUDIO_EXTENSIONS = [".wav", ".mp3", ".m4a"]
95
 
96
- # Evaluation CSV URL
97
- EVALUATION_CSV_URL = "https://github.com/mgolomanta/Models_Evaluation/blob/main/evaluation.csv"
 
 
98
 
99
  # ================================
100
  # Logging Configuration
@@ -437,12 +440,42 @@ class AudioProcessor:
437
  # ================================
438
 
439
  class EvaluationService:
440
- """Handles evaluation submissions and storage."""
441
 
442
- EVALUATION_CSV_PATH = "evaluation.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
  @staticmethod
445
- def save_evaluation(
446
  source_lang: str,
447
  target_lang: str,
448
  user_input: str,
@@ -451,7 +484,7 @@ class EvaluationService:
451
  correct_answer: Optional[str] = None
452
  ) -> str:
453
  """
454
- Save evaluation to CSV file.
455
 
456
  Args:
457
  source_lang: Source language name
@@ -465,37 +498,56 @@ class EvaluationService:
465
  Status message
466
  """
467
  try:
468
- # Check if CSV file exists, create it with headers if not
469
- file_exists = os.path.exists(EvaluationService.EVALUATION_CSV_PATH)
470
-
471
- with open(EvaluationService.EVALUATION_CSV_PATH, mode='a', newline='', encoding='utf-8') as file:
472
- writer = csv.writer(file)
473
-
474
- # Write headers if file is new
475
- if not file_exists:
476
- writer.writerow([
477
- "source_language_name",
478
- "target_language_name",
479
- "user_input",
480
- "model_output",
481
- "notation_value",
482
- "correct_answer"
483
- ])
484
-
485
- # Write evaluation data
486
- writer.writerow([
487
- source_lang,
488
- target_lang,
489
- user_input,
490
- model_output,
491
- notation if notation else "",
492
- correct_answer if correct_answer else ""
493
- ])
494
 
495
- return "✅ Evaluation submitted successfully!"
 
496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
  except Exception as e:
498
- logger.error(f"Failed to save evaluation: {e}")
499
  return f"❌ Error saving evaluation: {str(e)}"
500
 
501
  # ================================
@@ -559,8 +611,14 @@ class TranslationApp:
559
  notation: Optional[str],
560
  correct_answer: Optional[str]
561
  ) -> str:
562
- """Submit evaluation data."""
563
- return self.evaluation_service.save_evaluation(
 
 
 
 
 
 
564
  source_lang, target_lang, user_input, model_output, notation, correct_answer
565
  )
566
 
@@ -635,6 +693,10 @@ class TranslationApp:
635
  interactive=False
636
  )
637
 
 
 
 
 
638
  # Evaluation section
639
  gr.Markdown("### 📝 Model Evaluation")
640
  with gr.Group():
@@ -673,7 +735,7 @@ class TranslationApp:
673
  text_input: str,
674
  audio_file: Optional[str],
675
  file_obj: Optional[gr.FileData]
676
- ) -> Tuple[str, str]:
677
  """Handle initial input processing."""
678
  try:
679
  processed_text = self.process_input(
@@ -683,28 +745,29 @@ class TranslationApp:
683
  audio_file,
684
  file_obj
685
  )
686
- return processed_text, ""
687
  except Exception as e:
688
  logger.error(f"Processing error: {e}")
689
- return "", f"❌ Error: {str(e)}"
690
 
691
  def handle_translate(
692
  extracted_text: str,
693
  source_lang: str,
694
  target_lang: str
695
- ) -> str:
696
  """Handle translation of processed text."""
697
  if not extracted_text.strip():
698
- return "📝 No text to translate."
699
  try:
700
- return self.translation_service.translate(
701
  extracted_text,
702
  Language(source_lang),
703
  Language(target_lang)
704
  )
 
705
  except Exception as e:
706
  logger.error(f"Translation error: {e}")
707
- return f"❌ Translation error: {str(e)}"
708
 
709
  def handle_evaluation(
710
  source_lang: str,
@@ -715,9 +778,6 @@ class TranslationApp:
715
  correct_answer: Optional[str]
716
  ) -> str:
717
  """Handle evaluation submission."""
718
- if not user_input.strip() or not model_output.strip():
719
- return "⚠️ Please translate text before submitting evaluation."
720
-
721
  return self.submit_evaluation(
722
  source_lang,
723
  target_lang,
@@ -727,14 +787,6 @@ class TranslationApp:
727
  correct_answer
728
  )
729
 
730
- def clear_evaluation_fields() -> Dict:
731
- """Clear evaluation fields after submission."""
732
- return {
733
- notation: gr.update(value=None),
734
- correct_translation: gr.update(value=""),
735
- evaluation_status: gr.update(value="Evaluation cleared. Ready for next submission.")
736
- }
737
-
738
  # Connect events
739
  input_mode.change(
740
  fn=update_visibility,
@@ -742,14 +794,14 @@ class TranslationApp:
742
  outputs=[input_text, audio_input, file_input, extracted_text, output_text]
743
  )
744
 
745
- translate_btn.click(
746
  fn=handle_process,
747
  inputs=[input_mode, input_lang, input_text, audio_input, file_input],
748
- outputs=[extracted_text, output_text]
749
  ).then(
750
  fn=handle_translate,
751
  inputs=[extracted_text, input_lang, output_lang],
752
- outputs=output_text
753
  )
754
 
755
  submit_evaluation_btn.click(
@@ -757,16 +809,12 @@ class TranslationApp:
757
  inputs=[
758
  input_lang,
759
  output_lang,
760
- extracted_text,
761
- output_text,
762
  notation,
763
  correct_translation
764
  ],
765
  outputs=evaluation_status
766
- ).then(
767
- fn=clear_evaluation_fields,
768
- inputs=[],
769
- outputs=[notation, correct_translation, evaluation_status]
770
  )
771
 
772
  return interface
@@ -777,6 +825,12 @@ class TranslationApp:
777
 
778
  def main():
779
  """Main application entry point."""
 
 
 
 
 
 
780
  try:
781
  app = TranslationApp()
782
  interface = app.create_interface()
 
12
  import tempfile
13
  import csv
14
  import requests
15
+ import json
16
  from typing import Optional, Dict, Tuple, Any, Union
17
  from pathlib import Path
18
  from dataclasses import dataclass
 
94
  # Audio file extensions
95
  AUDIO_EXTENSIONS = [".wav", ".mp3", ".m4a"]
96
 
97
+ # GitHub repository details
98
+ GITHUB_REPO = "mgolomanta/Models_Evaluation"
99
+ EVALUATION_FILE = "evaluation.csv"
100
+ GITHUB_TOKEN = os.getenv("git_tk") # Set this in your environment variables
101
 
102
  # ================================
103
  # Logging Configuration
 
440
  # ================================
441
 
442
  class EvaluationService:
443
+ """Handles evaluation submissions and GitHub storage."""
444
 
445
+ @staticmethod
446
+ def get_github_file_sha() -> Optional[str]:
447
+ """Get the SHA of the existing evaluation file on GitHub."""
448
+ try:
449
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
450
+ headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
451
+ response = requests.get(url, headers=headers)
452
+
453
+ if response.status_code == 200:
454
+ return response.json().get("sha")
455
+ return None
456
+ except Exception as e:
457
+ logger.error(f"Error getting file SHA: {e}")
458
+ return None
459
+
460
+ @staticmethod
461
+ def read_existing_csv_content() -> str:
462
+ """Read existing CSV content from GitHub."""
463
+ try:
464
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
465
+ headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
466
+ response = requests.get(url, headers=headers)
467
+
468
+ if response.status_code == 200:
469
+ content = response.json().get("content", "")
470
+ import base64
471
+ return base64.b64decode(content).decode('utf-8')
472
+ return ""
473
+ except Exception as e:
474
+ logger.error(f"Error reading existing CSV: {e}")
475
+ return ""
476
 
477
  @staticmethod
478
+ def save_evaluation_to_github(
479
  source_lang: str,
480
  target_lang: str,
481
  user_input: str,
 
484
  correct_answer: Optional[str] = None
485
  ) -> str:
486
  """
487
+ Save evaluation to GitHub CSV file.
488
 
489
  Args:
490
  source_lang: Source language name
 
498
  Status message
499
  """
500
  try:
501
+ # Prepare the new evaluation data
502
+ new_row = f'"{source_lang}","{target_lang}","{user_input.replace(\'"\', \'""\')}","{model_output.replace(\'"\', \'""\')}","{notation if notation else \'\'}","{correct_answer if correct_answer else \'\'}"\n'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
+ # Get existing content
505
+ existing_content = EvaluationService.read_existing_csv_content()
506
 
507
+ # Check if file exists and has headers
508
+ if existing_content.strip():
509
+ # File exists, append new row
510
+ csv_content = existing_content + new_row
511
+ else:
512
+ # File doesn't exist, create with headers
513
+ headers = "source_language_name,target_language_name,user_input,model_output,notation_value,correct_answer\n"
514
+ csv_content = headers + new_row
515
+
516
+ # Encode content for GitHub API
517
+ import base64
518
+ content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
519
+
520
+ # Prepare GitHub API request
521
+ url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
522
+ headers = {
523
+ "Authorization": f"token {GITHUB_TOKEN}",
524
+ "Accept": "application/vnd.github.v3+json"
525
+ }
526
+
527
+ # Check if file exists to get SHA
528
+ file_sha = EvaluationService.get_github_file_sha()
529
+
530
+ # Prepare payload
531
+ payload = {
532
+ "message": "Add new evaluation",
533
+ "content": content_encoded
534
+ }
535
+
536
+ # Add SHA if file exists (for update)
537
+ if file_sha:
538
+ payload["sha"] = file_sha
539
+
540
+ # Send request to GitHub API
541
+ response = requests.put(url, headers=headers, json=payload)
542
+
543
+ if response.status_code in [200, 201]:
544
+ return "✅ Evaluation submitted successfully to GitHub!"
545
+ else:
546
+ logger.error(f"GitHub API error: {response.status_code} - {response.text}")
547
+ return f"❌ Error saving evaluation to GitHub: {response.status_code}"
548
+
549
  except Exception as e:
550
+ logger.error(f"Failed to save evaluation to GitHub: {e}")
551
  return f"❌ Error saving evaluation: {str(e)}"
552
 
553
  # ================================
 
611
  notation: Optional[str],
612
  correct_answer: Optional[str]
613
  ) -> str:
614
+ """Submit evaluation data to GitHub."""
615
+ if not GITHUB_TOKEN:
616
+ return "❌ GitHub token not configured. Please set GITHUB_TOKEN environment variable."
617
+
618
+ if not user_input.strip() or not model_output.strip():
619
+ return "⚠️ Please translate text before submitting evaluation."
620
+
621
+ return self.evaluation_service.save_evaluation_to_github(
622
  source_lang, target_lang, user_input, model_output, notation, correct_answer
623
  )
624
 
 
693
  interactive=False
694
  )
695
 
696
+ # Store the last translation data for evaluation
697
+ last_input_state = gr.State("")
698
+ last_output_state = gr.State("")
699
+
700
  # Evaluation section
701
  gr.Markdown("### 📝 Model Evaluation")
702
  with gr.Group():
 
735
  text_input: str,
736
  audio_file: Optional[str],
737
  file_obj: Optional[gr.FileData]
738
+ ) -> Tuple[str, str, str, str]:
739
  """Handle initial input processing."""
740
  try:
741
  processed_text = self.process_input(
 
745
  audio_file,
746
  file_obj
747
  )
748
+ return processed_text, "", processed_text, ""
749
  except Exception as e:
750
  logger.error(f"Processing error: {e}")
751
+ return "", f"❌ Error: {str(e)}", "", ""
752
 
753
  def handle_translate(
754
  extracted_text: str,
755
  source_lang: str,
756
  target_lang: str
757
+ ) -> Tuple[str, str, str]:
758
  """Handle translation of processed text."""
759
  if not extracted_text.strip():
760
+ return "📝 No text to translate.", extracted_text, ""
761
  try:
762
+ result = self.translation_service.translate(
763
  extracted_text,
764
  Language(source_lang),
765
  Language(target_lang)
766
  )
767
+ return result, extracted_text, result
768
  except Exception as e:
769
  logger.error(f"Translation error: {e}")
770
+ return f"❌ Translation error: {str(e)}", extracted_text, ""
771
 
772
  def handle_evaluation(
773
  source_lang: str,
 
778
  correct_answer: Optional[str]
779
  ) -> str:
780
  """Handle evaluation submission."""
 
 
 
781
  return self.submit_evaluation(
782
  source_lang,
783
  target_lang,
 
787
  correct_answer
788
  )
789
 
 
 
 
 
 
 
 
 
790
  # Connect events
791
  input_mode.change(
792
  fn=update_visibility,
 
794
  outputs=[input_text, audio_input, file_input, extracted_text, output_text]
795
  )
796
 
797
+ process_result = translate_btn.click(
798
  fn=handle_process,
799
  inputs=[input_mode, input_lang, input_text, audio_input, file_input],
800
+ outputs=[extracted_text, output_text, last_input_state, last_output_state]
801
  ).then(
802
  fn=handle_translate,
803
  inputs=[extracted_text, input_lang, output_lang],
804
+ outputs=[output_text, last_input_state, last_output_state]
805
  )
806
 
807
  submit_evaluation_btn.click(
 
809
  inputs=[
810
  input_lang,
811
  output_lang,
812
+ last_input_state,
813
+ last_output_state,
814
  notation,
815
  correct_translation
816
  ],
817
  outputs=evaluation_status
 
 
 
 
818
  )
819
 
820
  return interface
 
825
 
826
  def main():
827
  """Main application entry point."""
828
+ # Check if GitHub token is set
829
+ if not os.getenv("git_tk"):
830
+ logger.warning("GITHUB_TOKEN environment variable not set. Evaluation submissions will fail.")
831
+ print("⚠️ WARNING: GITHUB_TOKEN environment variable not set!")
832
+ print(" Please set it to enable evaluation submissions to GitHub.")
833
+
834
  try:
835
  app = TranslationApp()
836
  interface = app.create_interface()