Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from transformers import VitsModel, AutoTokenizer | |
| LANG_MODEL_MAP = { | |
| "English": "facebook/mms-tts-eng", | |
| "Hindi": "facebook/mms-tts-hin", | |
| "Tamil": "facebook/mms-tts-tam", | |
| "Malayalam": "facebook/mms-tts-mal", | |
| "Kannada": "facebook/mms-tts-kan", | |
| "Telugu": "facebook/mms-tts-tel" | |
| } | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| cache = {} | |
| def load_model_and_tokenizer(language): | |
| model_name = LANG_MODEL_MAP[language] | |
| if model_name not in cache: | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = VitsModel.from_pretrained(model_name).to(device) | |
| cache[model_name] = (tokenizer, model) | |
| return cache[model_name] | |
| def tts(language, text): | |
| if not text.strip(): | |
| return 16000, np.zeros(1) # empty waveform if no text | |
| tokenizer, model = load_model_and_tokenizer(language) | |
| inputs = tokenizer(text, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| output = model(**inputs) | |
| waveform = output.waveform.squeeze().cpu().numpy() | |
| return 16000, waveform | |
| iface = gr.Interface( | |
| fn=tts, | |
| inputs=[ | |
| gr.Dropdown(choices=list(LANG_MODEL_MAP.keys()), label="Select Language"), | |
| gr.Textbox(label="Enter Text") | |
| ], | |
| outputs=gr.Audio(label="Synthesized Speech", type="numpy"), | |
| title="Multilingual Text-to-Speech (MMS)", | |
| description="Generate speech from text using Meta's MMS models for English, Hindi, Tamil, Malayalam, Kannada and Telugu." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |