Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import torch | |
| # 1. Load Models (this will happen only once when the app starts) | |
| print("Loading models...") | |
| # ASR Pipeline | |
| asr_pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-small", | |
| torch_dtype=torch.float16, # Use float16 for faster inference | |
| device="cpu" # Specify CPU device | |
| ) | |
| # Translation Pipelines | |
| translators = { | |
| "en-zh": pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh"), | |
| "zh-en": pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en"), | |
| "en-ja": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ja"), | |
| "ja-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ja-en"), | |
| "en-ko": pipeline("translation", model="Helsinki-NLP/opus-mt-en-ko"), | |
| "ko-en": pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en"), | |
| } | |
| print("Models loaded successfully.") | |
| # 2. Define Processing Functions | |
| def transcribe_audio(audio_file): | |
| print(f"Received audio file: {audio_file}") | |
| if audio_file is None: | |
| return "" | |
| try: | |
| # The pipeline expects a file path | |
| text = asr_pipeline(audio_file)["text"] | |
| print(f"ASR result: {text}") | |
| return text | |
| except Exception as e: | |
| print(f"Error in ASR: {e}") | |
| return f"Error in ASR: {e}" | |
| def translate_text(text, source_lang, target_lang): | |
| print(f"Translating '{text}' from {source_lang} to {target_lang}") | |
| if not text: | |
| return "" | |
| # Direct translation if possible | |
| if f"{source_lang}-{target_lang}" in translators: | |
| translator = translators[f"{source_lang}-{target_lang}"] | |
| translated_text = translator(text)[0]['translation_text'] | |
| # Bridge translation via English | |
| elif source_lang != 'en' and target_lang != 'en': | |
| to_english_translator = translators[f"{source_lang}-en"] | |
| english_text = to_english_translator(text)[0]['translation_text'] | |
| from_english_translator = translators[f"en-{target_lang}"] | |
| translated_text = from_english_translator(english_text)[0]['translation_text'] | |
| else: | |
| return "Translation route not supported" | |
| print(f"Translation result: {translated_text}") | |
| return translated_text | |
| # 3. Create Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## All-in-One ASR and Translation API") | |
| with gr.Tab("ASR"): | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio") | |
| asr_output = gr.Textbox(label="Transcript") | |
| asr_button = gr.Button("Transcribe") | |
| asr_button.click(transcribe_audio, inputs=audio_input, outputs=asr_output, api_name="asr") | |
| with gr.Tab("Translate"): | |
| text_input = gr.Textbox(label="Input Text") | |
| source_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Source Language") | |
| target_lang_input = gr.Dropdown(["en", "zh", "ja", "ko"], label="Target Language") | |
| translation_output = gr.Textbox(label="Translation") | |
| translate_button = gr.Button("Translate") | |
| translate_button.click(translate_text, inputs=[text_input, source_lang_input, target_lang_input], outputs=translation_output, api_name="translate") | |
| # 4. Launch the App | |
| if __name__ == "__main__": | |
| demo.launch() | |