Spaces:
Runtime error
Runtime error
| import torch | |
| import soundfile as sf | |
| import gradio as gr | |
| from clearvoice import ClearVoice | |
| def fn_clearvoice_se(input_wav): | |
| myClearVoice = ClearVoice(task='speech_enhancement', model_names=['FRCRN_SE_16K']) | |
| output_wav_dict = myClearVoice(input_path=input_wav, online_write=False) | |
| if isinstance(output_wav_dict, dict): | |
| key = next(iter(output_wav_dict)) | |
| output_wav = output_wav_dict[key] | |
| else: | |
| output_wav = output_wav_dict | |
| sf.write('enhanced.wav', output_wav, 16000) | |
| return 'enhanced.wav' | |
| def fn_clearvoice_ss(input_wav): | |
| myClearVoice = ClearVoice(task='speech_separation', model_names=['MossFormer2_SS_16K']) | |
| output_wav_dict = myClearVoice(input_path=input_wav, online_write=False) | |
| if isinstance(output_wav_dict, dict): | |
| key = next(iter(output_wav_dict)) | |
| output_wav_list = output_wav_dict[key] | |
| output_wav_s1 = output_wav_list[0] | |
| output_wav_s2 = output_wav_list[1] | |
| else: | |
| output_wav_list = output_wav_dict | |
| output_wav_s1 = output_wav_list[0] | |
| output_wav_s2 = output_wav_list[1] | |
| sf.write('separated_s1.wav', output_wav_s1, 16000) | |
| sf.write('separated_s2.wav', output_wav_s2, 16000) | |
| return "separated_s1.wav", "separated_s2.wav" | |
| demo = gr.Blocks() | |
| se_demo = gr.Interface( | |
| fn=fn_clearvoice_se, | |
| inputs = [ | |
| gr.Audio(label="Input Audio", type="filepath"), | |
| ], | |
| outputs = [ | |
| gr.Audio(label="Output Audio", type="filepath"), | |
| ], | |
| title = "ClearVoice: Speech Enhancement", | |
| description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."), | |
| article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab' target='_blank'>Github Repo</a></p>" | |
| ), | |
| examples = [ | |
| ['mandarin_speech.wav'] | |
| ], | |
| cache_examples = True, | |
| ) | |
| ss_demo = gr.Interface( | |
| fn=fn_clearvoice_ss, | |
| inputs = [ | |
| gr.Audio(label="Input Audio", type="filepath"), | |
| ], | |
| outputs = [ | |
| gr.Audio(label="Output Audio", type="filepath"), | |
| gr.Audio(label="Output Audio", type="filepath"), | |
| ], | |
| title = "ClearVoice: Speech Separation", | |
| description = ("Gradio demo for Speech enhancement with ClearVoice. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."), | |
| article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab' target='_blank'>Github Repo</a></p>" | |
| "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab' target='_blank'>Github Repo</a></p>"), | |
| examples = [ | |
| ['mandarin_speech.wav'] | |
| ], | |
| cache_examples = True, | |
| ) | |
| with demo: | |
| gr.TabbedInterface([se_demo, ss_demo], ["Speech Enhancement", "Speech Separation"]) | |
| demo.launch(share=True) |