Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import MarianMTModel, MarianTokenizer | |
| # Define a list of supported language pairs | |
| languages = { | |
| 'English': 'en', | |
| 'Urdu': 'ur', | |
| 'French': 'fr', | |
| 'Spanish': 'es', | |
| 'German': 'de', | |
| 'Chinese': 'zh', | |
| 'Italian': 'it', | |
| 'Russian': 'ru', | |
| 'Japanese': 'ja', | |
| 'Arabic': 'ar', | |
| 'Hindi': 'hi', | |
| } | |
| # Define supported language pairs | |
| language_pairs = { | |
| ('en', 'ur'): 'Helsinki-NLP/opus-mt-en-ur', | |
| ('ur', 'en'): 'Helsinki-NLP/opus-mt-ur-en', | |
| ('en', 'fr'): 'Helsinki-NLP/opus-mt-en-fr', | |
| ('fr', 'en'): 'Helsinki-NLP/opus-mt-fr-en', | |
| ('en', 'es'): 'Helsinki-NLP/opus-mt-en-es', | |
| ('es', 'en'): 'Helsinki-NLP/opus-mt-es-en', | |
| ('en', 'de'): 'Helsinki-NLP/opus-mt-en-de', | |
| ('de', 'en'): 'Helsinki-NLP/opus-mt-de-en', | |
| ('en', 'zh'): 'Helsinki-NLP/opus-mt-en-zh', | |
| ('zh', 'en'): 'Helsinki-NLP/opus-mt-zh-en', | |
| ('en', 'it'): 'Helsinki-NLP/opus-mt-en-it', | |
| ('it', 'en'): 'Helsinki-NLP/opus-mt-it-en', | |
| ('en', 'ru'): 'Helsinki-NLP/opus-mt-en-ru', | |
| ('ru', 'en'): 'Helsinki-NLP/opus-mt-ru-en', | |
| ('en', 'ja'): 'Helsinki-NLP/opus-mt-en-ja', | |
| ('ja', 'en'): 'Helsinki-NLP/opus-mt-ja-en', | |
| ('en', 'ar'): 'Helsinki-NLP/opus-mt-en-ar', | |
| ('ar', 'en'): 'Helsinki-NLP/opus-mt-ar-en', | |
| ('en', 'hi'): 'Helsinki-NLP/opus-mt-en-hi', | |
| ('hi', 'en'): 'Helsinki-NLP/opus-mt-hi-en', | |
| # Add more pairs as available | |
| } | |
| def load_model(src_lang, tgt_lang): | |
| model_name = language_pairs.get((src_lang, tgt_lang)) | |
| if not model_name: | |
| raise ValueError(f"No available model for {src_lang} to {tgt_lang}") | |
| tokenizer = MarianTokenizer.from_pretrained(model_name) | |
| model = MarianMTModel.from_pretrained(model_name) | |
| return model, tokenizer | |
| def translate(text, src_lang, tgt_lang): | |
| model, tokenizer = load_model(src_lang, tgt_lang) | |
| inputs = tokenizer.encode(text, return_tensors="pt", padding=True) | |
| translated = model.generate(inputs) | |
| return tokenizer.decode(translated[0], skip_special_tokens=True) | |
| def translate_chain(text, src_lang, tgt_lang): | |
| if src_lang != 'en': | |
| text = translate(text, src_lang, 'en') | |
| if tgt_lang != 'en': | |
| text = translate(text, 'en', tgt_lang) | |
| return text | |
| def translate_ui(text, source_language, target_language): | |
| src_lang = languages[source_language] | |
| tgt_lang = languages[target_language] | |
| try: | |
| return translate(text, src_lang, tgt_lang) | |
| except ValueError: | |
| return translate_chain(text, src_lang, tgt_lang) | |
| # Streamlit App UI | |
| st.title("Multilingual Translator") | |
| st.write("Translate text between various languages including Urdu, French, Spanish, and more.") | |
| # Input text | |
| text = st.text_area("Enter text to translate", height=100) | |
| # Source and Target Languages | |
| source_language = st.selectbox("Select Source Language", list(languages.keys())) | |
| target_language = st.selectbox("Select Target Language", list(languages.keys())) | |
| # Translate Button | |
| if st.button("Translate"): | |
| if text.strip(): | |
| translation = translate_ui(text, source_language, target_language) | |
| st.text_area("Translated Text", translation, height=100) | |
| else: | |
| st.warning("Please enter text to translate.") | |
| # About Section | |
| st.sidebar.title("About") | |
| st.sidebar.info( | |
| """ | |
| This app allows you to translate text between multiple languages using the MarianMT model from Hugging Face's Helsinki-NLP collection. | |
| """ | |
| ) | |