Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ import logging
|
|
| 12 |
import tempfile
|
| 13 |
import csv
|
| 14 |
import requests
|
|
|
|
| 15 |
from typing import Optional, Dict, Tuple, Any, Union
|
| 16 |
from pathlib import Path
|
| 17 |
from dataclasses import dataclass
|
|
@@ -93,8 +94,10 @@ SUPPORTED_FILE_TYPES = [
|
|
| 93 |
# Audio file extensions
|
| 94 |
AUDIO_EXTENSIONS = [".wav", ".mp3", ".m4a"]
|
| 95 |
|
| 96 |
-
#
|
| 97 |
-
|
|
|
|
|
|
|
| 98 |
|
| 99 |
# ================================
|
| 100 |
# Logging Configuration
|
|
@@ -437,12 +440,42 @@ class AudioProcessor:
|
|
| 437 |
# ================================
|
| 438 |
|
| 439 |
class EvaluationService:
|
| 440 |
-
"""Handles evaluation submissions and storage."""
|
| 441 |
|
| 442 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
|
| 444 |
@staticmethod
|
| 445 |
-
def
|
| 446 |
source_lang: str,
|
| 447 |
target_lang: str,
|
| 448 |
user_input: str,
|
|
@@ -451,7 +484,7 @@ class EvaluationService:
|
|
| 451 |
correct_answer: Optional[str] = None
|
| 452 |
) -> str:
|
| 453 |
"""
|
| 454 |
-
Save evaluation to CSV file.
|
| 455 |
|
| 456 |
Args:
|
| 457 |
source_lang: Source language name
|
|
@@ -465,37 +498,56 @@ class EvaluationService:
|
|
| 465 |
Status message
|
| 466 |
"""
|
| 467 |
try:
|
| 468 |
-
#
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
with open(EvaluationService.EVALUATION_CSV_PATH, mode='a', newline='', encoding='utf-8') as file:
|
| 472 |
-
writer = csv.writer(file)
|
| 473 |
-
|
| 474 |
-
# Write headers if file is new
|
| 475 |
-
if not file_exists:
|
| 476 |
-
writer.writerow([
|
| 477 |
-
"source_language_name",
|
| 478 |
-
"target_language_name",
|
| 479 |
-
"user_input",
|
| 480 |
-
"model_output",
|
| 481 |
-
"notation_value",
|
| 482 |
-
"correct_answer"
|
| 483 |
-
])
|
| 484 |
-
|
| 485 |
-
# Write evaluation data
|
| 486 |
-
writer.writerow([
|
| 487 |
-
source_lang,
|
| 488 |
-
target_lang,
|
| 489 |
-
user_input,
|
| 490 |
-
model_output,
|
| 491 |
-
notation if notation else "",
|
| 492 |
-
correct_answer if correct_answer else ""
|
| 493 |
-
])
|
| 494 |
|
| 495 |
-
|
|
|
|
| 496 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
except Exception as e:
|
| 498 |
-
logger.error(f"Failed to save evaluation: {e}")
|
| 499 |
return f"❌ Error saving evaluation: {str(e)}"
|
| 500 |
|
| 501 |
# ================================
|
|
@@ -559,8 +611,14 @@ class TranslationApp:
|
|
| 559 |
notation: Optional[str],
|
| 560 |
correct_answer: Optional[str]
|
| 561 |
) -> str:
|
| 562 |
-
"""Submit evaluation data."""
|
| 563 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
source_lang, target_lang, user_input, model_output, notation, correct_answer
|
| 565 |
)
|
| 566 |
|
|
@@ -635,6 +693,10 @@ class TranslationApp:
|
|
| 635 |
interactive=False
|
| 636 |
)
|
| 637 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
# Evaluation section
|
| 639 |
gr.Markdown("### 📝 Model Evaluation")
|
| 640 |
with gr.Group():
|
|
@@ -673,7 +735,7 @@ class TranslationApp:
|
|
| 673 |
text_input: str,
|
| 674 |
audio_file: Optional[str],
|
| 675 |
file_obj: Optional[gr.FileData]
|
| 676 |
-
) -> Tuple[str, str]:
|
| 677 |
"""Handle initial input processing."""
|
| 678 |
try:
|
| 679 |
processed_text = self.process_input(
|
|
@@ -683,28 +745,29 @@ class TranslationApp:
|
|
| 683 |
audio_file,
|
| 684 |
file_obj
|
| 685 |
)
|
| 686 |
-
return processed_text, ""
|
| 687 |
except Exception as e:
|
| 688 |
logger.error(f"Processing error: {e}")
|
| 689 |
-
return "", f"❌ Error: {str(e)}"
|
| 690 |
|
| 691 |
def handle_translate(
|
| 692 |
extracted_text: str,
|
| 693 |
source_lang: str,
|
| 694 |
target_lang: str
|
| 695 |
-
) -> str:
|
| 696 |
"""Handle translation of processed text."""
|
| 697 |
if not extracted_text.strip():
|
| 698 |
-
return "📝 No text to translate."
|
| 699 |
try:
|
| 700 |
-
|
| 701 |
extracted_text,
|
| 702 |
Language(source_lang),
|
| 703 |
Language(target_lang)
|
| 704 |
)
|
|
|
|
| 705 |
except Exception as e:
|
| 706 |
logger.error(f"Translation error: {e}")
|
| 707 |
-
return f"❌ Translation error: {str(e)}"
|
| 708 |
|
| 709 |
def handle_evaluation(
|
| 710 |
source_lang: str,
|
|
@@ -715,9 +778,6 @@ class TranslationApp:
|
|
| 715 |
correct_answer: Optional[str]
|
| 716 |
) -> str:
|
| 717 |
"""Handle evaluation submission."""
|
| 718 |
-
if not user_input.strip() or not model_output.strip():
|
| 719 |
-
return "⚠️ Please translate text before submitting evaluation."
|
| 720 |
-
|
| 721 |
return self.submit_evaluation(
|
| 722 |
source_lang,
|
| 723 |
target_lang,
|
|
@@ -727,14 +787,6 @@ class TranslationApp:
|
|
| 727 |
correct_answer
|
| 728 |
)
|
| 729 |
|
| 730 |
-
def clear_evaluation_fields() -> Dict:
|
| 731 |
-
"""Clear evaluation fields after submission."""
|
| 732 |
-
return {
|
| 733 |
-
notation: gr.update(value=None),
|
| 734 |
-
correct_translation: gr.update(value=""),
|
| 735 |
-
evaluation_status: gr.update(value="Evaluation cleared. Ready for next submission.")
|
| 736 |
-
}
|
| 737 |
-
|
| 738 |
# Connect events
|
| 739 |
input_mode.change(
|
| 740 |
fn=update_visibility,
|
|
@@ -742,14 +794,14 @@ class TranslationApp:
|
|
| 742 |
outputs=[input_text, audio_input, file_input, extracted_text, output_text]
|
| 743 |
)
|
| 744 |
|
| 745 |
-
translate_btn.click(
|
| 746 |
fn=handle_process,
|
| 747 |
inputs=[input_mode, input_lang, input_text, audio_input, file_input],
|
| 748 |
-
outputs=[extracted_text, output_text]
|
| 749 |
).then(
|
| 750 |
fn=handle_translate,
|
| 751 |
inputs=[extracted_text, input_lang, output_lang],
|
| 752 |
-
outputs=output_text
|
| 753 |
)
|
| 754 |
|
| 755 |
submit_evaluation_btn.click(
|
|
@@ -757,16 +809,12 @@ class TranslationApp:
|
|
| 757 |
inputs=[
|
| 758 |
input_lang,
|
| 759 |
output_lang,
|
| 760 |
-
|
| 761 |
-
|
| 762 |
notation,
|
| 763 |
correct_translation
|
| 764 |
],
|
| 765 |
outputs=evaluation_status
|
| 766 |
-
).then(
|
| 767 |
-
fn=clear_evaluation_fields,
|
| 768 |
-
inputs=[],
|
| 769 |
-
outputs=[notation, correct_translation, evaluation_status]
|
| 770 |
)
|
| 771 |
|
| 772 |
return interface
|
|
@@ -777,6 +825,12 @@ class TranslationApp:
|
|
| 777 |
|
| 778 |
def main():
|
| 779 |
"""Main application entry point."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 780 |
try:
|
| 781 |
app = TranslationApp()
|
| 782 |
interface = app.create_interface()
|
|
|
|
| 12 |
import tempfile
|
| 13 |
import csv
|
| 14 |
import requests
|
| 15 |
+
import json
|
| 16 |
from typing import Optional, Dict, Tuple, Any, Union
|
| 17 |
from pathlib import Path
|
| 18 |
from dataclasses import dataclass
|
|
|
|
| 94 |
# Audio file extensions
|
| 95 |
AUDIO_EXTENSIONS = [".wav", ".mp3", ".m4a"]
|
| 96 |
|
| 97 |
+
# GitHub repository details
|
| 98 |
+
GITHUB_REPO = "mgolomanta/Models_Evaluation"
|
| 99 |
+
EVALUATION_FILE = "evaluation.csv"
|
| 100 |
+
GITHUB_TOKEN = os.getenv("git_tk") # Set this in your environment variables
|
| 101 |
|
| 102 |
# ================================
|
| 103 |
# Logging Configuration
|
|
|
|
| 440 |
# ================================
|
| 441 |
|
| 442 |
class EvaluationService:
|
| 443 |
+
"""Handles evaluation submissions and GitHub storage."""
|
| 444 |
|
| 445 |
+
@staticmethod
|
| 446 |
+
def get_github_file_sha() -> Optional[str]:
|
| 447 |
+
"""Get the SHA of the existing evaluation file on GitHub."""
|
| 448 |
+
try:
|
| 449 |
+
url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
|
| 450 |
+
headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
|
| 451 |
+
response = requests.get(url, headers=headers)
|
| 452 |
+
|
| 453 |
+
if response.status_code == 200:
|
| 454 |
+
return response.json().get("sha")
|
| 455 |
+
return None
|
| 456 |
+
except Exception as e:
|
| 457 |
+
logger.error(f"Error getting file SHA: {e}")
|
| 458 |
+
return None
|
| 459 |
+
|
| 460 |
+
@staticmethod
|
| 461 |
+
def read_existing_csv_content() -> str:
|
| 462 |
+
"""Read existing CSV content from GitHub."""
|
| 463 |
+
try:
|
| 464 |
+
url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
|
| 465 |
+
headers = {"Authorization": f"token {GITHUB_TOKEN}"} if GITHUB_TOKEN else {}
|
| 466 |
+
response = requests.get(url, headers=headers)
|
| 467 |
+
|
| 468 |
+
if response.status_code == 200:
|
| 469 |
+
content = response.json().get("content", "")
|
| 470 |
+
import base64
|
| 471 |
+
return base64.b64decode(content).decode('utf-8')
|
| 472 |
+
return ""
|
| 473 |
+
except Exception as e:
|
| 474 |
+
logger.error(f"Error reading existing CSV: {e}")
|
| 475 |
+
return ""
|
| 476 |
|
| 477 |
@staticmethod
|
| 478 |
+
def save_evaluation_to_github(
|
| 479 |
source_lang: str,
|
| 480 |
target_lang: str,
|
| 481 |
user_input: str,
|
|
|
|
| 484 |
correct_answer: Optional[str] = None
|
| 485 |
) -> str:
|
| 486 |
"""
|
| 487 |
+
Save evaluation to GitHub CSV file.
|
| 488 |
|
| 489 |
Args:
|
| 490 |
source_lang: Source language name
|
|
|
|
| 498 |
Status message
|
| 499 |
"""
|
| 500 |
try:
|
| 501 |
+
# Prepare the new evaluation data
|
| 502 |
+
new_row = f'"{source_lang}","{target_lang}","{user_input.replace(\'"\', \'""\')}","{model_output.replace(\'"\', \'""\')}","{notation if notation else \'\'}","{correct_answer if correct_answer else \'\'}"\n'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
|
| 504 |
+
# Get existing content
|
| 505 |
+
existing_content = EvaluationService.read_existing_csv_content()
|
| 506 |
|
| 507 |
+
# Check if file exists and has headers
|
| 508 |
+
if existing_content.strip():
|
| 509 |
+
# File exists, append new row
|
| 510 |
+
csv_content = existing_content + new_row
|
| 511 |
+
else:
|
| 512 |
+
# File doesn't exist, create with headers
|
| 513 |
+
headers = "source_language_name,target_language_name,user_input,model_output,notation_value,correct_answer\n"
|
| 514 |
+
csv_content = headers + new_row
|
| 515 |
+
|
| 516 |
+
# Encode content for GitHub API
|
| 517 |
+
import base64
|
| 518 |
+
content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
|
| 519 |
+
|
| 520 |
+
# Prepare GitHub API request
|
| 521 |
+
url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{EVALUATION_FILE}"
|
| 522 |
+
headers = {
|
| 523 |
+
"Authorization": f"token {GITHUB_TOKEN}",
|
| 524 |
+
"Accept": "application/vnd.github.v3+json"
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
# Check if file exists to get SHA
|
| 528 |
+
file_sha = EvaluationService.get_github_file_sha()
|
| 529 |
+
|
| 530 |
+
# Prepare payload
|
| 531 |
+
payload = {
|
| 532 |
+
"message": "Add new evaluation",
|
| 533 |
+
"content": content_encoded
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
# Add SHA if file exists (for update)
|
| 537 |
+
if file_sha:
|
| 538 |
+
payload["sha"] = file_sha
|
| 539 |
+
|
| 540 |
+
# Send request to GitHub API
|
| 541 |
+
response = requests.put(url, headers=headers, json=payload)
|
| 542 |
+
|
| 543 |
+
if response.status_code in [200, 201]:
|
| 544 |
+
return "✅ Evaluation submitted successfully to GitHub!"
|
| 545 |
+
else:
|
| 546 |
+
logger.error(f"GitHub API error: {response.status_code} - {response.text}")
|
| 547 |
+
return f"❌ Error saving evaluation to GitHub: {response.status_code}"
|
| 548 |
+
|
| 549 |
except Exception as e:
|
| 550 |
+
logger.error(f"Failed to save evaluation to GitHub: {e}")
|
| 551 |
return f"❌ Error saving evaluation: {str(e)}"
|
| 552 |
|
| 553 |
# ================================
|
|
|
|
| 611 |
notation: Optional[str],
|
| 612 |
correct_answer: Optional[str]
|
| 613 |
) -> str:
|
| 614 |
+
"""Submit evaluation data to GitHub."""
|
| 615 |
+
if not GITHUB_TOKEN:
|
| 616 |
+
return "❌ GitHub token not configured. Please set GITHUB_TOKEN environment variable."
|
| 617 |
+
|
| 618 |
+
if not user_input.strip() or not model_output.strip():
|
| 619 |
+
return "⚠️ Please translate text before submitting evaluation."
|
| 620 |
+
|
| 621 |
+
return self.evaluation_service.save_evaluation_to_github(
|
| 622 |
source_lang, target_lang, user_input, model_output, notation, correct_answer
|
| 623 |
)
|
| 624 |
|
|
|
|
| 693 |
interactive=False
|
| 694 |
)
|
| 695 |
|
| 696 |
+
# Store the last translation data for evaluation
|
| 697 |
+
last_input_state = gr.State("")
|
| 698 |
+
last_output_state = gr.State("")
|
| 699 |
+
|
| 700 |
# Evaluation section
|
| 701 |
gr.Markdown("### 📝 Model Evaluation")
|
| 702 |
with gr.Group():
|
|
|
|
| 735 |
text_input: str,
|
| 736 |
audio_file: Optional[str],
|
| 737 |
file_obj: Optional[gr.FileData]
|
| 738 |
+
) -> Tuple[str, str, str, str]:
|
| 739 |
"""Handle initial input processing."""
|
| 740 |
try:
|
| 741 |
processed_text = self.process_input(
|
|
|
|
| 745 |
audio_file,
|
| 746 |
file_obj
|
| 747 |
)
|
| 748 |
+
return processed_text, "", processed_text, ""
|
| 749 |
except Exception as e:
|
| 750 |
logger.error(f"Processing error: {e}")
|
| 751 |
+
return "", f"❌ Error: {str(e)}", "", ""
|
| 752 |
|
| 753 |
def handle_translate(
|
| 754 |
extracted_text: str,
|
| 755 |
source_lang: str,
|
| 756 |
target_lang: str
|
| 757 |
+
) -> Tuple[str, str, str]:
|
| 758 |
"""Handle translation of processed text."""
|
| 759 |
if not extracted_text.strip():
|
| 760 |
+
return "📝 No text to translate.", extracted_text, ""
|
| 761 |
try:
|
| 762 |
+
result = self.translation_service.translate(
|
| 763 |
extracted_text,
|
| 764 |
Language(source_lang),
|
| 765 |
Language(target_lang)
|
| 766 |
)
|
| 767 |
+
return result, extracted_text, result
|
| 768 |
except Exception as e:
|
| 769 |
logger.error(f"Translation error: {e}")
|
| 770 |
+
return f"❌ Translation error: {str(e)}", extracted_text, ""
|
| 771 |
|
| 772 |
def handle_evaluation(
|
| 773 |
source_lang: str,
|
|
|
|
| 778 |
correct_answer: Optional[str]
|
| 779 |
) -> str:
|
| 780 |
"""Handle evaluation submission."""
|
|
|
|
|
|
|
|
|
|
| 781 |
return self.submit_evaluation(
|
| 782 |
source_lang,
|
| 783 |
target_lang,
|
|
|
|
| 787 |
correct_answer
|
| 788 |
)
|
| 789 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 790 |
# Connect events
|
| 791 |
input_mode.change(
|
| 792 |
fn=update_visibility,
|
|
|
|
| 794 |
outputs=[input_text, audio_input, file_input, extracted_text, output_text]
|
| 795 |
)
|
| 796 |
|
| 797 |
+
process_result = translate_btn.click(
|
| 798 |
fn=handle_process,
|
| 799 |
inputs=[input_mode, input_lang, input_text, audio_input, file_input],
|
| 800 |
+
outputs=[extracted_text, output_text, last_input_state, last_output_state]
|
| 801 |
).then(
|
| 802 |
fn=handle_translate,
|
| 803 |
inputs=[extracted_text, input_lang, output_lang],
|
| 804 |
+
outputs=[output_text, last_input_state, last_output_state]
|
| 805 |
)
|
| 806 |
|
| 807 |
submit_evaluation_btn.click(
|
|
|
|
| 809 |
inputs=[
|
| 810 |
input_lang,
|
| 811 |
output_lang,
|
| 812 |
+
last_input_state,
|
| 813 |
+
last_output_state,
|
| 814 |
notation,
|
| 815 |
correct_translation
|
| 816 |
],
|
| 817 |
outputs=evaluation_status
|
|
|
|
|
|
|
|
|
|
|
|
|
| 818 |
)
|
| 819 |
|
| 820 |
return interface
|
|
|
|
| 825 |
|
| 826 |
def main():
|
| 827 |
"""Main application entry point."""
|
| 828 |
+
# Check if GitHub token is set
|
| 829 |
+
if not os.getenv("git_tk"):
|
| 830 |
+
logger.warning("GITHUB_TOKEN environment variable not set. Evaluation submissions will fail.")
|
| 831 |
+
print("⚠️ WARNING: GITHUB_TOKEN environment variable not set!")
|
| 832 |
+
print(" Please set it to enable evaluation submissions to GitHub.")
|
| 833 |
+
|
| 834 |
try:
|
| 835 |
app = TranslationApp()
|
| 836 |
interface = app.create_interface()
|