Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
"""
|
| 2 |
LocaleNLP Translation Service
|
| 3 |
============================
|
|
|
|
| 4 |
A multi-language translation application supporting English, Wolof, Hausa, and Darija.
|
| 5 |
Features text, audio, and document translation with automatic chaining for all language pairs.
|
| 6 |
-
|
| 7 |
Author: LocaleNLP
|
| 8 |
"""
|
| 9 |
|
|
@@ -15,8 +16,6 @@ from typing import Optional, Dict, Tuple, Any, Union
|
|
| 15 |
from pathlib import Path
|
| 16 |
from dataclasses import dataclass
|
| 17 |
from enum import Enum
|
| 18 |
-
import csv
|
| 19 |
-
from datetime import datetime
|
| 20 |
|
| 21 |
import gradio as gr
|
| 22 |
import torch
|
|
@@ -27,7 +26,7 @@ from bs4 import BeautifulSoup
|
|
| 27 |
from markdown import markdown
|
| 28 |
import chardet
|
| 29 |
from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
|
| 30 |
-
from huggingface_hub import login
|
| 31 |
|
| 32 |
# ================================
|
| 33 |
# Configuration & Constants
|
|
@@ -133,8 +132,7 @@ class ModelManager:
|
|
| 133 |
logger.info(f"Loading translation model: {config.model_name}")
|
| 134 |
|
| 135 |
# Authenticate with Hugging Face if token provided
|
| 136 |
-
hf_token
|
| 137 |
-
if hf_token:
|
| 138 |
login(token=hf_token)
|
| 139 |
|
| 140 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
@@ -468,72 +466,6 @@ class TranslationApp:
|
|
| 468 |
|
| 469 |
return ""
|
| 470 |
|
| 471 |
-
def log_feedback(
|
| 472 |
-
self,
|
| 473 |
-
source_lang: Language,
|
| 474 |
-
target_lang: Language,
|
| 475 |
-
user_input: str,
|
| 476 |
-
model_output: str,
|
| 477 |
-
notation: Optional[float] = None,
|
| 478 |
-
correct_translation: Optional[str] = None
|
| 479 |
-
):
|
| 480 |
-
"""
|
| 481 |
-
Log user feedback to a CSV file and push to Hugging Face repo if running in Space.
|
| 482 |
-
"""
|
| 483 |
-
# Define filename
|
| 484 |
-
src = source_lang.value.lower()
|
| 485 |
-
tgt = target_lang.value.lower()
|
| 486 |
-
filename = f"{src}_{tgt}.csv"
|
| 487 |
-
|
| 488 |
-
headers = [
|
| 489 |
-
"timestamp",
|
| 490 |
-
"source_language",
|
| 491 |
-
"target_language",
|
| 492 |
-
"user_input",
|
| 493 |
-
"model_output",
|
| 494 |
-
"notation",
|
| 495 |
-
"correct_translation"
|
| 496 |
-
]
|
| 497 |
-
row = {
|
| 498 |
-
"timestamp": datetime.now().isoformat(),
|
| 499 |
-
"source_language": source_lang.value,
|
| 500 |
-
"target_language": target_lang.value,
|
| 501 |
-
"user_input": user_input.strip(),
|
| 502 |
-
"model_output": model_output.strip(),
|
| 503 |
-
"notation": notation,
|
| 504 |
-
"correct_translation": correct_translation.strip() if correct_translation else ""
|
| 505 |
-
}
|
| 506 |
-
|
| 507 |
-
file_exists = Path(filename).exists()
|
| 508 |
-
|
| 509 |
-
try:
|
| 510 |
-
# Write to local CSV
|
| 511 |
-
with open(filename, mode="a", encoding="utf-8", newline="") as f:
|
| 512 |
-
writer = csv.DictWriter(f, fieldnames=headers)
|
| 513 |
-
if not file_exists:
|
| 514 |
-
writer.writeheader()
|
| 515 |
-
writer.writerow(row)
|
| 516 |
-
logger.info(f"Feedback saved locally to {filename}")
|
| 517 |
-
|
| 518 |
-
# Try to push to HF repo if HF_TOKEN exists
|
| 519 |
-
hf_token = os.getenv("hffff")
|
| 520 |
-
space_id = os.getenv("SPACE_ID") # e.g., "yourusername/your-space-name"
|
| 521 |
-
if hf_token and space_id:
|
| 522 |
-
try:
|
| 523 |
-
upload_file(
|
| 524 |
-
path_or_fileobj=filename,
|
| 525 |
-
path_in_repo=filename,
|
| 526 |
-
repo_id=space_id,
|
| 527 |
-
token=hf_token,
|
| 528 |
-
repo_type="space",
|
| 529 |
-
commit_message=f"Add/update feedback: {filename}"
|
| 530 |
-
)
|
| 531 |
-
logger.info(f"Successfully pushed {filename} to Hugging Face Space repo.")
|
| 532 |
-
except Exception as e:
|
| 533 |
-
logger.warning(f"Failed to push feedback to HF Hub: {e}")
|
| 534 |
-
except Exception as e:
|
| 535 |
-
logger.error(f"Failed to save feedback to {filename}: {e}")
|
| 536 |
-
|
| 537 |
def create_interface(self) -> gr.Blocks:
|
| 538 |
"""Create and return the Gradio interface."""
|
| 539 |
|
|
@@ -604,35 +536,7 @@ class TranslationApp:
|
|
| 604 |
lines=10,
|
| 605 |
interactive=False
|
| 606 |
)
|
| 607 |
-
|
| 608 |
-
# --- FEEDBACK SECTION ---
|
| 609 |
-
gr.Markdown("### 📝 Provide Feedback on Translation")
|
| 610 |
-
|
| 611 |
-
notation = gr.Slider(
|
| 612 |
-
minimum=1,
|
| 613 |
-
maximum=5,
|
| 614 |
-
step=1,
|
| 615 |
-
label="Rate translation quality (1-5 stars)",
|
| 616 |
-
value=None,
|
| 617 |
-
interactive=True
|
| 618 |
-
)
|
| 619 |
-
|
| 620 |
-
correct_translation = gr.Textbox(
|
| 621 |
-
label="Correct Translation (if incorrect)",
|
| 622 |
-
placeholder="Please provide the correct version if inaccurate...",
|
| 623 |
-
lines=4,
|
| 624 |
-
value=""
|
| 625 |
-
)
|
| 626 |
-
|
| 627 |
-
submit_feedback = gr.Button("📤 Submit Feedback", variant="primary")
|
| 628 |
-
feedback_status = gr.Textbox(label="Feedback Status", value="", interactive=False)
|
| 629 |
-
|
| 630 |
-
# Hidden states to preserve context for feedback
|
| 631 |
-
user_input_state = gr.State()
|
| 632 |
-
model_output_state = gr.State()
|
| 633 |
-
source_lang_state = gr.State()
|
| 634 |
-
target_lang_state = gr.State()
|
| 635 |
-
|
| 636 |
# Event handlers
|
| 637 |
def update_visibility(mode: str) -> Dict[str, Any]:
|
| 638 |
"""Update component visibility based on input mode."""
|
|
@@ -665,70 +569,24 @@ class TranslationApp:
|
|
| 665 |
logger.error(f"Processing error: {e}")
|
| 666 |
return "", f"❌ Error: {str(e)}"
|
| 667 |
|
| 668 |
-
def
|
| 669 |
extracted_text: str,
|
| 670 |
source_lang: str,
|
| 671 |
target_lang: str
|
| 672 |
-
) ->
|
| 673 |
-
"""Handle translation
|
| 674 |
if not extracted_text.strip():
|
| 675 |
-
return "📝 No text to translate."
|
| 676 |
try:
|
| 677 |
-
|
| 678 |
extracted_text,
|
| 679 |
Language(source_lang),
|
| 680 |
Language(target_lang)
|
| 681 |
)
|
| 682 |
-
return translated, extracted_text, source_lang
|
| 683 |
except Exception as e:
|
| 684 |
logger.error(f"Translation error: {e}")
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
def set_states_for_feedback(
|
| 689 |
-
output: str,
|
| 690 |
-
inp: str,
|
| 691 |
-
src: str,
|
| 692 |
-
tgt: str
|
| 693 |
-
):
|
| 694 |
-
"""Set hidden states for feedback submission."""
|
| 695 |
-
return inp, output, src, tgt
|
| 696 |
-
|
| 697 |
-
def save_feedback(
|
| 698 |
-
user_input: str,
|
| 699 |
-
model_output: str,
|
| 700 |
-
source_lang_str: str,
|
| 701 |
-
target_lang_str: str,
|
| 702 |
-
notation_val: float,
|
| 703 |
-
correct_trans: str
|
| 704 |
-
):
|
| 705 |
-
"""Save feedback to CSV and clear form."""
|
| 706 |
-
try:
|
| 707 |
-
source_lang = Language(source_lang_str)
|
| 708 |
-
target_lang = Language(target_lang_str)
|
| 709 |
-
|
| 710 |
-
self.log_feedback(
|
| 711 |
-
source_lang=source_lang,
|
| 712 |
-
target_lang=target_lang,
|
| 713 |
-
user_input=user_input,
|
| 714 |
-
model_output=model_output,
|
| 715 |
-
notation=notation_val,
|
| 716 |
-
correct_translation=correct_trans or None
|
| 717 |
-
)
|
| 718 |
-
# Return values to reset UI components
|
| 719 |
-
return (
|
| 720 |
-
"✅ Thank you for your feedback!",
|
| 721 |
-
None, # reset slider
|
| 722 |
-
"" # clear correction box
|
| 723 |
-
)
|
| 724 |
-
except Exception as e:
|
| 725 |
-
logger.error(f"Feedback submission failed: {e}")
|
| 726 |
-
return (
|
| 727 |
-
f"❌ Failed to save feedback: {str(e)}",
|
| 728 |
-
notation_val, # keep value
|
| 729 |
-
correct_trans # keep text
|
| 730 |
-
)
|
| 731 |
-
|
| 732 |
# Connect events
|
| 733 |
input_mode.change(
|
| 734 |
fn=update_visibility,
|
|
@@ -736,37 +594,16 @@ class TranslationApp:
|
|
| 736 |
outputs=[input_text, audio_input, file_input, extracted_text, output_text]
|
| 737 |
)
|
| 738 |
|
| 739 |
-
|
| 740 |
-
process_event = translate_btn.click(
|
| 741 |
fn=handle_process,
|
| 742 |
inputs=[input_mode, input_lang, input_text, audio_input, file_input],
|
| 743 |
outputs=[extracted_text, output_text]
|
| 744 |
-
)
|
| 745 |
-
|
| 746 |
-
process_event.success(
|
| 747 |
-
fn=handle_translate_with_input,
|
| 748 |
inputs=[extracted_text, input_lang, output_lang],
|
| 749 |
-
outputs=
|
| 750 |
-
).success(
|
| 751 |
-
fn=set_states_for_feedback,
|
| 752 |
-
inputs=[output_text, extracted_text, input_lang, output_lang],
|
| 753 |
-
outputs=[user_input_state, model_output_state, source_lang_state, target_lang_state]
|
| 754 |
-
)
|
| 755 |
-
|
| 756 |
-
# Feedback submission
|
| 757 |
-
submit_feedback.click(
|
| 758 |
-
fn=save_feedback,
|
| 759 |
-
inputs=[
|
| 760 |
-
user_input_state,
|
| 761 |
-
model_output_state,
|
| 762 |
-
source_lang_state,
|
| 763 |
-
target_lang_state,
|
| 764 |
-
notation,
|
| 765 |
-
correct_translation
|
| 766 |
-
],
|
| 767 |
-
outputs=[feedback_status, notation, correct_translation]
|
| 768 |
)
|
| 769 |
-
|
| 770 |
return interface
|
| 771 |
|
| 772 |
# ================================
|
|
|
|
| 1 |
"""
|
| 2 |
LocaleNLP Translation Service
|
| 3 |
============================
|
| 4 |
+
|
| 5 |
A multi-language translation application supporting English, Wolof, Hausa, and Darija.
|
| 6 |
Features text, audio, and document translation with automatic chaining for all language pairs.
|
| 7 |
+
|
| 8 |
Author: LocaleNLP
|
| 9 |
"""
|
| 10 |
|
|
|
|
| 16 |
from pathlib import Path
|
| 17 |
from dataclasses import dataclass
|
| 18 |
from enum import Enum
|
|
|
|
|
|
|
| 19 |
|
| 20 |
import gradio as gr
|
| 21 |
import torch
|
|
|
|
| 26 |
from markdown import markdown
|
| 27 |
import chardet
|
| 28 |
from transformers import pipeline, MarianTokenizer, AutoModelForSeq2SeqLM
|
| 29 |
+
from huggingface_hub import login
|
| 30 |
|
| 31 |
# ================================
|
| 32 |
# Configuration & Constants
|
|
|
|
| 132 |
logger.info(f"Loading translation model: {config.model_name}")
|
| 133 |
|
| 134 |
# Authenticate with Hugging Face if token provided
|
| 135 |
+
if hf_token := os.getenv("hffff"):
|
|
|
|
| 136 |
login(token=hf_token)
|
| 137 |
|
| 138 |
model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
|
|
| 466 |
|
| 467 |
return ""
|
| 468 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
def create_interface(self) -> gr.Blocks:
|
| 470 |
"""Create and return the Gradio interface."""
|
| 471 |
|
|
|
|
| 536 |
lines=10,
|
| 537 |
interactive=False
|
| 538 |
)
|
| 539 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
# Event handlers
|
| 541 |
def update_visibility(mode: str) -> Dict[str, Any]:
|
| 542 |
"""Update component visibility based on input mode."""
|
|
|
|
| 569 |
logger.error(f"Processing error: {e}")
|
| 570 |
return "", f"❌ Error: {str(e)}"
|
| 571 |
|
| 572 |
+
def handle_translate(
|
| 573 |
extracted_text: str,
|
| 574 |
source_lang: str,
|
| 575 |
target_lang: str
|
| 576 |
+
) -> str:
|
| 577 |
+
"""Handle translation of processed text."""
|
| 578 |
if not extracted_text.strip():
|
| 579 |
+
return "📝 No text to translate."
|
| 580 |
try:
|
| 581 |
+
return self.translation_service.translate(
|
| 582 |
extracted_text,
|
| 583 |
Language(source_lang),
|
| 584 |
Language(target_lang)
|
| 585 |
)
|
|
|
|
| 586 |
except Exception as e:
|
| 587 |
logger.error(f"Translation error: {e}")
|
| 588 |
+
return f"❌ Translation error: {str(e)}"
|
| 589 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 590 |
# Connect events
|
| 591 |
input_mode.change(
|
| 592 |
fn=update_visibility,
|
|
|
|
| 594 |
outputs=[input_text, audio_input, file_input, extracted_text, output_text]
|
| 595 |
)
|
| 596 |
|
| 597 |
+
translate_btn.click(
|
|
|
|
| 598 |
fn=handle_process,
|
| 599 |
inputs=[input_mode, input_lang, input_text, audio_input, file_input],
|
| 600 |
outputs=[extracted_text, output_text]
|
| 601 |
+
).then(
|
| 602 |
+
fn=handle_translate,
|
|
|
|
|
|
|
| 603 |
inputs=[extracted_text, input_lang, output_lang],
|
| 604 |
+
outputs=output_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
)
|
| 606 |
+
|
| 607 |
return interface
|
| 608 |
|
| 609 |
# ================================
|