Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,23 @@
|
|
| 1 |
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import os
|
| 4 |
import time
|
|
@@ -20,17 +39,30 @@ def MyPrint(s):
|
|
| 20 |
title = "# Next-gen Kaldi: Text-to-speech (TTS)"
|
| 21 |
|
| 22 |
description = """
|
| 23 |
-
This space shows how to convert text to speech with Next-gen Kaldi
|
| 24 |
|
| 25 |
-
|
| 26 |
|
| 27 |
-
|
| 28 |
|
| 29 |
- <https://github.com/k2-fsa/sherpa-onnx>
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"""
|
| 33 |
|
|
|
|
|
|
|
| 34 |
css = """
|
| 35 |
.result {display:flex;flex-direction:column}
|
| 36 |
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
|
@@ -41,28 +73,43 @@ css = """
|
|
| 41 |
examples = [
|
| 42 |
[
|
| 43 |
"English",
|
| 44 |
-
"csukuangfj/vits-
|
| 45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
0,
|
| 47 |
1.0,
|
| 48 |
],
|
| 49 |
[
|
| 50 |
"English",
|
| 51 |
-
"csukuangfj/vits-
|
| 52 |
-
"
|
| 53 |
0,
|
| 54 |
1.0,
|
| 55 |
],
|
| 56 |
[
|
| 57 |
"English",
|
| 58 |
-
"csukuangfj/vits-
|
| 59 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
0,
|
| 61 |
1.0,
|
| 62 |
],
|
| 63 |
]
|
| 64 |
|
| 65 |
|
|
|
|
| 66 |
def update_model_dropdown(language: str):
|
| 67 |
if language in language_to_models:
|
| 68 |
choices = language_to_models[language]
|
|
@@ -127,6 +174,7 @@ def process(language: str, repo_id: str, text: str, sid: str, speed: float):
|
|
| 127 |
|
| 128 |
demo = gr.Blocks(css=css)
|
| 129 |
|
|
|
|
| 130 |
with demo:
|
| 131 |
gr.Markdown(title)
|
| 132 |
language_choices = list(language_to_models.keys())
|
|
@@ -134,13 +182,13 @@ with demo:
|
|
| 134 |
language_radio = gr.Radio(
|
| 135 |
label="Language",
|
| 136 |
choices=language_choices,
|
| 137 |
-
value=
|
| 138 |
)
|
| 139 |
|
| 140 |
model_dropdown = gr.Dropdown(
|
| 141 |
-
choices=language_to_models[
|
| 142 |
label="Select a model",
|
| 143 |
-
value=language_to_models[
|
| 144 |
)
|
| 145 |
|
| 146 |
language_radio.change(
|
|
@@ -164,7 +212,7 @@ with demo:
|
|
| 164 |
lines=1,
|
| 165 |
max_lines=1,
|
| 166 |
value="0",
|
| 167 |
-
placeholder="Speaker ID. Valid only for
|
| 168 |
)
|
| 169 |
|
| 170 |
input_speed = gr.Slider(
|
|
@@ -227,4 +275,6 @@ def download_espeak_ng_data():
|
|
| 227 |
|
| 228 |
if __name__ == "__main__":
|
| 229 |
download_espeak_ng_data()
|
|
|
|
|
|
|
| 230 |
demo.launch()
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
#
|
| 3 |
+
# Copyright 2022-2023 Xiaomi Corp. (authors: Fangjun Kuang)
|
| 4 |
+
#
|
| 5 |
+
# See LICENSE for clarification regarding multiple authors
|
| 6 |
+
#
|
| 7 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 8 |
+
# you may not use this file except in compliance with the License.
|
| 9 |
+
# You may obtain a copy of the License at
|
| 10 |
+
#
|
| 11 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 12 |
+
#
|
| 13 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 14 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 15 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 16 |
+
# See the License for the specific language governing permissions and
|
| 17 |
+
# limitations under the License.
|
| 18 |
+
|
| 19 |
+
# References:
|
| 20 |
+
# https://gradio.app/docs/#dropdown
|
| 21 |
|
| 22 |
import os
|
| 23 |
import time
|
|
|
|
| 39 |
title = "# Next-gen Kaldi: Text-to-speech (TTS)"
|
| 40 |
|
| 41 |
description = """
|
| 42 |
+
This space shows how to convert text to speech with Next-gen Kaldi.
|
| 43 |
|
| 44 |
+
It is running on CPU within a docker container provided by Hugging Face.
|
| 45 |
|
| 46 |
+
See more information by visiting the following links:
|
| 47 |
|
| 48 |
- <https://github.com/k2-fsa/sherpa-onnx>
|
| 49 |
+
|
| 50 |
+
If you want to deploy it locally, please see
|
| 51 |
+
<https://k2-fsa.github.io/sherpa/>
|
| 52 |
+
|
| 53 |
+
If you want to use Android APKs, please see
|
| 54 |
+
<https://k2-fsa.github.io/sherpa/onnx/tts/apk.html>
|
| 55 |
+
|
| 56 |
+
If you want to use Android text-to-speech engine APKs, please see
|
| 57 |
+
<https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html>
|
| 58 |
+
|
| 59 |
+
If you want to download an all-in-one exe for Windows, please see
|
| 60 |
+
<https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models>
|
| 61 |
+
|
| 62 |
"""
|
| 63 |
|
| 64 |
+
# css style is copied from
|
| 65 |
+
# https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
|
| 66 |
css = """
|
| 67 |
.result {display:flex;flex-direction:column}
|
| 68 |
.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
|
|
|
|
| 73 |
examples = [
|
| 74 |
[
|
| 75 |
"English",
|
| 76 |
+
"csukuangfj/vits-en-us",
|
| 77 |
+
"Welcome to our next-generation text-to-speech demo. This is powered by Sherpa-ONNX.",
|
| 78 |
+
0,
|
| 79 |
+
1.0,
|
| 80 |
+
],
|
| 81 |
+
[
|
| 82 |
+
"English",
|
| 83 |
+
"csukuangfj/vits-en-us",
|
| 84 |
+
"Artificial intelligence is transforming industries with innovative voice technologies.",
|
| 85 |
0,
|
| 86 |
1.0,
|
| 87 |
],
|
| 88 |
[
|
| 89 |
"English",
|
| 90 |
+
"csukuangfj/vits-en-us",
|
| 91 |
+
"The quick brown fox jumps over the lazy dog. 1234567890.",
|
| 92 |
0,
|
| 93 |
1.0,
|
| 94 |
],
|
| 95 |
[
|
| 96 |
"English",
|
| 97 |
+
"csukuangfj/vits-en-us",
|
| 98 |
+
"Today is a great day to explore machine learning and natural language processing.",
|
| 99 |
+
0,
|
| 100 |
+
1.0,
|
| 101 |
+
],
|
| 102 |
+
[
|
| 103 |
+
"English",
|
| 104 |
+
"csukuangfj/vits-en-us",
|
| 105 |
+
"Call 911 in case of emergency. The meeting is scheduled for August 15th, 2025.",
|
| 106 |
0,
|
| 107 |
1.0,
|
| 108 |
],
|
| 109 |
]
|
| 110 |
|
| 111 |
|
| 112 |
+
|
| 113 |
def update_model_dropdown(language: str):
|
| 114 |
if language in language_to_models:
|
| 115 |
choices = language_to_models[language]
|
|
|
|
| 174 |
|
| 175 |
demo = gr.Blocks(css=css)
|
| 176 |
|
| 177 |
+
|
| 178 |
with demo:
|
| 179 |
gr.Markdown(title)
|
| 180 |
language_choices = list(language_to_models.keys())
|
|
|
|
| 182 |
language_radio = gr.Radio(
|
| 183 |
label="Language",
|
| 184 |
choices=language_choices,
|
| 185 |
+
value=language_choices[0],
|
| 186 |
)
|
| 187 |
|
| 188 |
model_dropdown = gr.Dropdown(
|
| 189 |
+
choices=language_to_models[language_choices[0]],
|
| 190 |
label="Select a model",
|
| 191 |
+
value=language_to_models[language_choices[0]][0],
|
| 192 |
)
|
| 193 |
|
| 194 |
language_radio.change(
|
|
|
|
| 212 |
lines=1,
|
| 213 |
max_lines=1,
|
| 214 |
value="0",
|
| 215 |
+
placeholder="Speaker ID. Valid only for mult-speaker model",
|
| 216 |
)
|
| 217 |
|
| 218 |
input_speed = gr.Slider(
|
|
|
|
| 275 |
|
| 276 |
if __name__ == "__main__":
|
| 277 |
download_espeak_ng_data()
|
| 278 |
+
formatter = "%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s"
|
| 279 |
+
|
| 280 |
demo.launch()
|