Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -77,11 +77,10 @@ se_demo = gr.Interface(
|
|
| 77 |
gr.Audio(label="Output Audio", type="filepath"),
|
| 78 |
],
|
| 79 |
title = "ClearVoice: Speech Enhancement",
|
| 80 |
-
description = ("
|
| 81 |
-
"
|
| 82 |
-
"To test it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
|
| 83 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
|
| 84 |
-
),
|
| 85 |
examples = [
|
| 86 |
["examples/mandarin_speech_16kHz.wav", "16000"],
|
| 87 |
["examples/english_speech_48kHz.wav", "48000"],
|
|
@@ -99,9 +98,8 @@ ss_demo = gr.Interface(
|
|
| 99 |
gr.Audio(label="Output Audio", type="filepath"),
|
| 100 |
],
|
| 101 |
title = "ClearVoice: Speech Separation",
|
| 102 |
-
description = ("
|
| 103 |
-
|
| 104 |
-
"To test it, simply upload your audio, or click one of the examples to load them. Read more at the links below."),
|
| 105 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
| 106 |
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
| 107 |
examples = [
|
|
@@ -119,9 +117,9 @@ tse_demo = gr.Interface(
|
|
| 119 |
outputs = [
|
| 120 |
gr.Gallery(label="Output Video List")
|
| 121 |
],
|
| 122 |
-
title = "ClearVoice: Audio-
|
| 123 |
-
description = ("
|
| 124 |
-
"To
|
| 125 |
# article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
| 126 |
# "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
| 127 |
examples = [
|
|
@@ -132,7 +130,6 @@ tse_demo = gr.Interface(
|
|
| 132 |
)
|
| 133 |
|
| 134 |
with demo:
|
| 135 |
-
|
| 136 |
-
gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Demo_1: Speech Enhancement", "Demo_2: Speech Separation", "Demo_3: Audio-visual Speaker Extraction"])
|
| 137 |
|
| 138 |
demo.launch()
|
|
|
|
| 77 |
gr.Audio(label="Output Audio", type="filepath"),
|
| 78 |
],
|
| 79 |
title = "ClearVoice: Speech Enhancement",
|
| 80 |
+
description = ("ClearVoice is AI-powered and extracts clear speech from background noise for enhanced speech quality. It supports both 16 kHz and 48 kHz audio outputs. "
|
| 81 |
+
"To try it, simply upload your audio, or click one of the examples. "),
|
|
|
|
| 82 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2206.07293' target='_blank'>FRCRN: Boosting Feature Representation Using Frequency Recurrence for Monaural Speech Enhancement</a> | <a href='https://github.com/alibabasglab/FRCRN' target='_blank'>Github Repo</a></p>"
|
| 83 |
+
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
| 84 |
examples = [
|
| 85 |
["examples/mandarin_speech_16kHz.wav", "16000"],
|
| 86 |
["examples/english_speech_48kHz.wav", "48000"],
|
|
|
|
| 98 |
gr.Audio(label="Output Audio", type="filepath"),
|
| 99 |
],
|
| 100 |
title = "ClearVoice: Speech Separation",
|
| 101 |
+
description = ("ClearVoice is powered by AI and separates individual speech from mixed audio. It supports 16 kHz and two output streams.
|
| 102 |
+
"To try it, simply upload your audio, or click one of the examples. "),
|
|
|
|
| 103 |
article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
| 104 |
"<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
| 105 |
examples = [
|
|
|
|
| 117 |
outputs = [
|
| 118 |
gr.Gallery(label="Output Video List")
|
| 119 |
],
|
| 120 |
+
title = "ClearVoice: Audio-Visual Speaker Extraction",
|
| 121 |
+
description = ("ClearVoice is AI-powered and extracts each speaker's voice from a multi-speaker video using facial recognition. "
|
| 122 |
+
"To try it, simply upload your video, or click one of the examples. "),
|
| 123 |
# article = ("<p style='text-align: center'><a href='https://arxiv.org/abs/2302.11824' target='_blank'>MossFormer: Pushing the Performance Limit of Monaural Speech Separation using Gated Single-Head Transformer with Convolution-Augmented Joint Self-Attentions</a> | <a href='https://github.com/alibabasglab/MossFormer' target='_blank'>Github Repo</a></p>"
|
| 124 |
# "<p style='text-align: center'><a href='https://arxiv.org/abs/2312.11825' target='_blank'>MossFormer2: Combining Transformer and RNN-Free Recurrent Network for Enhanced Time-Domain Monaural Speech Separation</a> | <a href='https://github.com/alibabasglab/MossFormer2' target='_blank'>Github Repo</a></p>"),
|
| 125 |
examples = [
|
|
|
|
| 130 |
)
|
| 131 |
|
| 132 |
with demo:
|
| 133 |
+
gr.TabbedInterface([se_demo, ss_demo, tse_demo], ["Task 1: Speech Enhancement", "Task 2: Speech Separation", "Task 3: Audio-Visual Speaker Extraction"])
|
|
|
|
| 134 |
|
| 135 |
demo.launch()
|