Spaces:
Runtime error
Runtime error
| """ | |
| File: app.py | |
| Description: Translate text... | |
| Author: Didier Guillevic | |
| Date: 2024-09-07 | |
| """ | |
| import spaces | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| logging.basicConfig(level=logging.INFO) | |
| import gradio as gr | |
| import langdetect | |
| from deep_translator import GoogleTranslator | |
| from model_spacy import nlp_xx | |
| import model_translation | |
| m2m100 = model_translation.ModelM2M100() | |
| def translate_with_Helsinki( | |
| chunks, src_lang, tgt_lang, input_max_length, output_max_length) -> str: | |
| """Translate the chunks with the Helsinki model | |
| """ | |
| if src_lang not in translation.src_langs: | |
| return ( | |
| f"ISSUE: currently no model for language '{src_lang}'. " | |
| "If wrong language, please specify language." | |
| ) | |
| logger.info(f"LANG: {src_lang}, TEXT: {chunks[0][:50]}...") | |
| tokenizer, model = translation.get_tokenizer_model_for_src_lang(src_lang) | |
| translated_chunks = [] | |
| for chunk in chunks: | |
| # NOTE: The 'fa' (Persian) model has multiple target languages to choose from. | |
| # We need to specifiy the desired languages among: fra ita por ron spa | |
| # https://huggingface.co/Helsinki-NLP/opus-mt-tc-big-fa-itc | |
| # Prepend text with >>fra<< in order to translate in French. | |
| if src_lang == 'fa': | |
| chunk = ">>fra<< " + chunk | |
| inputs = tokenizer( | |
| chunk, return_tensors="pt", max_length=input_max_length, | |
| truncation=True, padding="longest").to(model.device) | |
| outputs = model.generate(**inputs, max_length=output_max_length) | |
| translated_chunk = tokenizer.batch_decode( | |
| outputs, skip_special_tokens=True)[0] | |
| #logger.info(f"Text: {chunk}") | |
| #logger.info(f"Translation: {translated_chunk}") | |
| translated_chunks.append(translated_chunk) | |
| return '\n'.join(translated_chunks) | |
| def translate_text( | |
| text: str, | |
| src_lang: str, | |
| tgt_lang: str | |
| ) -> str: | |
| """Translate the given text into English or French | |
| """ | |
| # src_lang among the supported languages? | |
| # - make sure src_lang is not None | |
| src_lang = src_lang if (src_lang and src_lang != "auto") else langdetect.detect(text) | |
| if src_lang not in model_translation.language_codes.values(): | |
| logging.error(f"Language detected {src_lang} not among supported language") | |
| # tgt_lang: make sure it is not None. Default to 'en' if not set. | |
| if tgt_lang not in model_translation.tgt_language_codes.values(): | |
| tgt_lang = 'en' | |
| # translate | |
| translated_text_m2m100 = m2m100.translate(text, src_lang, tgt_lang) | |
| translated_text_google_translate = GoogleTranslator( | |
| source='auto', target='en').translate(text=text) | |
| return ( | |
| translated_text_m2m100, | |
| translated_text_google_translate | |
| ) | |
| # | |
| # User interface | |
| # | |
| with gr.Blocks() as demo: | |
| gr.Markdown(""" | |
| ## Text translation v0.0.3 | |
| """) | |
| # Input | |
| input_text = gr.Textbox( | |
| lines=5, | |
| placeholder="Enter text to translate", | |
| label="Text to translate", | |
| render=True | |
| ) | |
| # Output | |
| output_text_m2m100 = gr.Textbox( | |
| lines=4, | |
| label="Facebook m2m100 (418M)", | |
| render=True | |
| ) | |
| output_text_google_translate = gr.Textbox( | |
| lines=4, | |
| label="Google Translate", | |
| render=True | |
| ) | |
| # Source and target languages | |
| with gr.Row(): | |
| src_lang = gr.Dropdown( | |
| choices=model_translation.language_codes.items(), | |
| value="auto", | |
| label="Source language", | |
| render=True | |
| ) | |
| tgt_lang = gr.Dropdown( | |
| choices=model_translation.tgt_language_codes.items(), | |
| value="en", | |
| label="Target language", | |
| render=True | |
| ) | |
| # Submit button | |
| translate_btn = gr.Button("Translate") | |
| translate_btn.click( | |
| fn=translate_text, | |
| inputs=[input_text, src_lang, tgt_lang], | |
| outputs=[output_text_m2m100, output_text_google_translate] | |
| ) | |
| with gr.Accordion("Documentation", open=False): | |
| gr.Markdown(""" | |
| - Models: serving Facebook M2M100 (418M) and Google Translate. | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |