Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| import random2 | |
| from spleeter.separator import Separator | |
| from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM | |
| # Initiate a file separator with 2 stems (instruments and vocals) and 16khz bitrate, required for ASR | |
| separator = Separator('spleeter:2stems-16kHz') | |
| # Initiate Speech to text model with Wave2Vec english | |
| # https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english | |
| pipe = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english") | |
| # Gradio function to split audio stems, transcribe vocals and return their filepaths | |
| def extract_stems(audio): | |
| # initiate a unique folder name for splitted files | |
| foldername = str(random2.randrange(100000000)) | |
| # Separate audio input. Synchronous is true to wait for the end of split before going further | |
| separator.separate_to_file(audio, "output/", filename_format= foldername + "/{instrument}.wav", synchronous=True) | |
| # build filepaths for vocals and accompaniment files | |
| vocals = f"./output/"+ foldername +"/vocals.wav" | |
| accompaniment = f"./output/"+ foldername +"/accompaniment.wav" | |
| # Get a transcript of the vocals, by using the huggingface pipeline | |
| transcript = pipe(vocals, chunk_length_s=10) | |
| return vocals, accompaniment, transcript | |
| # Launch a Gradio interface | |
| # Input is an audio file, | |
| # Output is two audio files and a transcript | |
| title = "Demo: Deezer Spleeter + english Automatic Speech Recognition" | |
| description = "<p>This demo is a basic interface for <a href='https://research.deezer.com/projects/spleeter.html' target='_blank'>Deezer Spleeter</a>.</p><p>It uses the Spleeter library for separate audio file in two stems : accompaniments and vocals.</p><p>Once splitted, it performs ASR (Automatic Speech Recognition) based on a Wav2vec2 english model.</p>" | |
| examples = [["examples/" + mp3] for mp3 in os.listdir("examples/")] | |
| demo = gr.Interface( | |
| fn=extract_stems, | |
| inputs=gr.Audio(source="upload", type="filepath"), | |
| outputs=[gr.Audio(label="Vocals stem", source="upload", type="filepath"), gr.Audio(label="Accompaniment stem", source="upload", type="filepath"), gr.Textbox(label="Wave2vec2 Automatic Speech Recognition (English)")], | |
| title=title, | |
| description=description, | |
| examples=examples, | |
| allow_flagging="never" | |
| ) | |
| demo.launch() |