Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline, Wav2Vec2ProcessorWithLM | |
| from pyannote.audio import Pipeline | |
| from librosa import load, resample | |
| from rpunct import RestorePuncts | |
| asr_model = 'patrickvonplaten/wav2vec2-base-960h-4-gram' | |
| processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model) | |
| asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder) | |
| speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation") | |
| rpunct = RestorePuncts() | |
| def transcribe(filepath): | |
| speaker_output = speaker_segmentation(filepath) | |
| speech, sampling_rate = load(filepath) | |
| if sampling_rate != 16000: | |
| speech = resample(speech, sampling_rate, 16000) | |
| text = asr(speech, return_timestamps="word") | |
| full_text = text['text'].lower() | |
| chunks = text['chunks'] | |
| diarized_output = "" | |
| i = 0 | |
| for turn, _, speaker in speaker_output.itertracks(yield_label=True): | |
| diarized = "" | |
| while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end: | |
| diarized += chunks[i]['text'].lower() + ' ' | |
| i += 1 | |
| if diarized != "": | |
| diarized = rpunct.punctuate(diarized) | |
| diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end) | |
| return diarized_output, full_text | |
| mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False) | |
| diarized_transcript = gr.outputs.Textbox(type='auto', label='Diarized Output') | |
| full_transcript = gr.outputs.Textbox(type='auto', label='Full Transcript') | |
| examples = [["meeting_audio.wav"]] | |
| iface = gr.Interface( | |
| theme='huggingface', | |
| description='Testing transcription', | |
| fn=transcribe, | |
| inputs=[mic], | |
| outputs=[diarized_transcript, full_transcript], | |
| examples=examples | |
| ) | |
| iface.launch() | |