Spleeter_and_ASR

Running

App Files Files Community

Spleeter_and_ASR / app.py

trysem

Update app.py

ac993f1 almost 3 years ago

raw

history blame contribute delete

2.39 kB

	import gradio as gr
	import os
	import random2
	from spleeter.separator import Separator
	from transformers import pipeline, AutoModelForCTC, Wav2Vec2Processor, Wav2Vec2ProcessorWithLM

	# Initiate a file separator with 2 stems (instruments and vocals) and 16khz bitrate, required for ASR
	separator = Separator('spleeter:2stems-16kHz')

	# Initiate Speech to text model with Wave2Vec english
	# https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-english
	pipe = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english")

	# Gradio function to split audio stems, transcribe vocals and return their filepaths
	def extract_stems(audio):

	# initiate a unique folder name for splitted files
	foldername = str(random2.randrange(100000000))

	# Separate audio input. Synchronous is true to wait for the end of split before going further
	separator.separate_to_file(audio, "output/", filename_format= foldername + "/{instrument}.wav", synchronous=True)

	# build filepaths for vocals and accompaniment files
	vocals = f"./output/"+ foldername +"/vocals.wav"
	accompaniment = f"./output/"+ foldername +"/accompaniment.wav"

	# Get a transcript of the vocals, by using the huggingface pipeline
	transcript = pipe(vocals, chunk_length_s=10)

	return vocals, accompaniment, transcript

	# Launch a Gradio interface
	# Input is an audio file,
	# Output is two audio files and a transcript

	title = "Demo: Deezer Spleeter + english Automatic Speech Recognition"
	description = "<p>This demo is a basic interface for <a href='https://research.deezer.com/projects/spleeter.html' target='_blank'>Deezer Spleeter</a>.</p><p>It uses the Spleeter library for separate audio file in two stems : accompaniments and vocals.</p><p>Once splitted, it performs ASR (Automatic Speech Recognition) based on a Wav2vec2 english model.</p>"
	examples = [["examples/" + mp3] for mp3 in os.listdir("examples/")]

	demo = gr.Interface(
	fn=extract_stems,
	inputs=gr.Audio(source="upload", type="filepath"),
	outputs=[gr.Audio(label="Vocals stem", source="upload", type="filepath"), gr.Audio(label="Accompaniment stem", source="upload", type="filepath"), gr.Textbox(label="Wave2vec2 Automatic Speech Recognition (English)")],
	title=title,
	description=description,
	examples=examples,
	allow_flagging="never"
	)

	demo.launch()