Spaces:

zanemotiwala
/

audio-to-text

Running

File size: 1,315 Bytes

7400283
9854ee3
a4e347a
 
b3e5e27
d7f0c18
 
b3e5e27
d7f0c18
a4e347a
b3e5e27
 
 
d7f0c18
7400283
d7f0c18
9854ee3
b3e5e27
 
9854ee3
 
 
 
 
b3e5e27
8876f3b
b3e5e27
 
d7f0c18
b3e5e27
 
 
d7f0c18
 
 
b3e5e27

import gradio as gr
import logging
from transformers import pipeline

# Set up logging
logging.basicConfig(level=logging.INFO)

# Load Whisper model (tiny version for speed)
asr = pipeline(task="automatic-speech-recognition", model="openai/whisper-tiny.en")

# Function to transcribe audio from a file path
def transcribe_speech(audio_path):
    if audio_path is None:
        logging.error("No audio provided.")
        return "No audio found, please retry."

    try:
        logging.info(f"Received audio file path: {audio_path}")
        output = asr(audio_path)
        return output["text"]
    except Exception as e:
        logging.error(f"Error during transcription: {str(e)}")
        return f"Error processing the audio file: {str(e)}"

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("# 🎤 Simple Speech Recognition App")
    gr.Markdown("Record or upload audio, then click **Transcribe Audio**")

    mic = gr.Audio(label="🎙️ Microphone or Upload", type="filepath")  # This is the key change
    transcribe_button = gr.Button("📝 Transcribe Audio")
    transcription = gr.Textbox(label="🗒️ Transcription", lines=3, placeholder="Transcription will appear here...")

    transcribe_button.click(fn=transcribe_speech, inputs=mic, outputs=transcription)

demo.launch(share=True)