Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from faster_whisper import WhisperModel | |
| import pandas as pd | |
| # Model size selection | |
| model_size = "large-v2" | |
| # Get device | |
| device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
| # Initialize model based on device | |
| if device == "cuda:0": | |
| # Run on GPU with FP16 | |
| model_whisper = WhisperModel(model_size, device="cuda", compute_type="float16") | |
| else: | |
| # Run on CPU with INT8 | |
| model_whisper = WhisperModel(model_size, device="cpu", compute_type="int8") | |
| # Function to get filename from file object | |
| def get_filename(file_obj): | |
| return file_obj.name.split("/")[-1] | |
| # Function to transcribe audio to text | |
| def audio_to_transcript(file_obj): | |
| try: | |
| filename = get_filename(file_obj) | |
| segments, _ = model_whisper.transcribe(file_obj.name, beam_size=5, vad_filter=True) | |
| except: | |
| filename = file_obj.split("/")[-1] | |
| segments, _ = model_whisper.transcribe(file_obj, beam_size=5, vad_filter=True) | |
| # Initialize lists to store transcription data | |
| start_segments, end_segments, text_segments = list(), list(), list() | |
| # Process each segment for start time, end time, and text | |
| for segment in segments: | |
| start, end, text = segment.start, segment.end, segment.text | |
| start_segments.append(start) | |
| end_segments.append(end) | |
| text_segments.append(text) | |
| # Save transcript to CSV | |
| df = pd.DataFrame() | |
| df["start"] = start_segments | |
| df["end"] = end_segments | |
| df["text"] = text_segments | |
| # Define output CSV file | |
| csv_file = filename.split(".")[0] + ".csv" | |
| df.to_csv(csv_file, encoding="utf-8", index=False) | |
| path_to_csv = gr.File.update(value=csv_file, visible=True) | |
| return filename, path_to_csv, df | |
| ## Gradio Interface Setup | |
| headers = ["start", "end", "text"] | |
| iface = gr.Interface( | |
| fn=audio_to_transcript, | |
| inputs=gr.File(label="Upload an Audio File", type="filepath"), | |
| outputs=[ | |
| gr.Textbox(label="Audio file name"), | |
| gr.File(label="Transcript CSV file"), | |
| gr.DataFrame(label="Transcript", headers=headers), | |
| ], | |
| allow_flagging="never", | |
| title="Audio to Transcript", | |
| description="Upload an audio file, and this tool will return a transcript with time-stamped segments.", | |
| theme="compact", # Enhanced UI theme for simplicity | |
| ) | |
| iface.launch(debug=True) |