Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| from espnet2.bin.asr_inference import Speech2Text | |
| from espnet_model_zoo.downloader import ModelDownloader | |
| # Download a pretrained model | |
| d = ModelDownloader() | |
| asr_model = Speech2Text( | |
| **d.download_and_unpack("espnet/simple_asr_train_asr_transformer_e18_raw_bpe_sp_valid.acc.best"), | |
| device="cpu", # Change to "cuda" if using a GPU | |
| ) | |
| def transcribe(audio): | |
| """Transcribe speech to text using ESPnet.""" | |
| # Convert audio input (from Gradio) to text | |
| speech = torch.tensor(audio[1]) # Extract the audio waveform | |
| result = asr_model(speech) | |
| text, *_ = result[0] # Get the transcription from the result | |
| return text | |
| # Create a simple Gradio interface | |
| interface = gr.Interface( | |
| fn=transcribe, # Function to call | |
| inputs=gr.Audio(source="microphone", type="numpy"), # Audio input from microphone | |
| outputs="text", # Output type (text transcription) | |
| title="ESPnet ASR Demo", # Title of the UI | |
| description="Simple ESPnet-based speech recognition", # Description of the app | |
| ) | |
| # Launch the app | |
| interface.launch() | |