Spaces:
Paused
Paused
alan
commited on
Commit
·
d40aa10
1
Parent(s):
4c2d67c
publish
Browse files
app.py
CHANGED
|
@@ -232,8 +232,6 @@ DESCR = """
|
|
| 232 |
# Japanese TTS Arena: Benchmarking Japanese TTS Models in the Wild
|
| 233 |
|
| 234 |
Vote to help the community find the best available text-to-speech model!
|
| 235 |
-
|
| 236 |
-
_This arena is inspired and built on [TTS Arena](https://huggingface.co/spaces/TTS-AGI/TTS-Arena)._
|
| 237 |
""".strip()
|
| 238 |
# INSTR = """
|
| 239 |
# ## Instructions
|
|
@@ -264,7 +262,7 @@ Please [create a Discussion](https://huggingface.co/spaces/{SPACE_ID}/discussion
|
|
| 264 |
ABOUT = f"""
|
| 265 |
## 📄 About
|
| 266 |
|
| 267 |
-
The TTS Arena evaluates leading speech synthesis models. It is inspired by LMsys's [Chatbot Arena](https://chat.lmsys.org/).
|
| 268 |
|
| 269 |
### Motivation
|
| 270 |
|
|
@@ -276,16 +274,7 @@ The leaderboard allows a user to enter text, which will be synthesized by two mo
|
|
| 276 |
|
| 277 |
### Credits
|
| 278 |
|
| 279 |
-
Thank you to the
|
| 280 |
-
|
| 281 |
-
* VB ([Twitter](https://twitter.com/reach_vb) / [Hugging Face](https://huggingface.co/reach-vb))
|
| 282 |
-
* Clémentine Fourrier ([Twitter](https://twitter.com/clefourrier) / [Hugging Face](https://huggingface.co/clefourrier))
|
| 283 |
-
* Lucain Pouget ([Twitter](https://twitter.com/Wauplin) / [Hugging Face](https://huggingface.co/Wauplin))
|
| 284 |
-
* Yoach Lacombe ([Twitter](https://twitter.com/yoachlacombe) / [Hugging Face](https://huggingface.co/ylacombe))
|
| 285 |
-
* Main Horse ([Twitter](https://twitter.com/main_horse) / [Hugging Face](https://huggingface.co/main-horse))
|
| 286 |
-
* Sanchit Gandhi ([Twitter](https://twitter.com/sanchitgandhi99) / [Hugging Face](https://huggingface.co/sanchit-gandhi))
|
| 287 |
-
* Apolinário Passos ([Twitter](https://twitter.com/multimodalart) / [Hugging Face](https://huggingface.co/multimodalart))
|
| 288 |
-
* Pedro Cuenca ([Twitter](https://twitter.com/pcuenq) / [Hugging Face](https://huggingface.co/pcuenq))
|
| 289 |
|
| 290 |
{request}
|
| 291 |
|
|
@@ -296,13 +285,11 @@ We may store text you enter and generated audio. We store a unique ID for each s
|
|
| 296 |
### License
|
| 297 |
|
| 298 |
Generated audio clips cannot be redistributed and may be used for personal, non-commercial use only.
|
| 299 |
-
|
| 300 |
-
Random sentences are sourced from a filtered subset of the [Harvard Sentences](https://www.cs.columbia.edu/~hgs/audio/harvard.html).
|
| 301 |
""".strip()
|
| 302 |
LDESC = """
|
| 303 |
## 🏆 Leaderboard
|
| 304 |
|
| 305 |
-
Vote to help the community determine the best text-to-speech (TTS) models.
|
| 306 |
|
| 307 |
The leaderboard displays models in descending order of how natural they sound (based on votes cast by the community).
|
| 308 |
|
|
@@ -445,7 +432,8 @@ def get_leaderboard(reveal_prelim = False):
|
|
| 445 |
cursor = conn.cursor()
|
| 446 |
sql = 'SELECT name, upvote, downvote FROM model'
|
| 447 |
# if not reveal_prelim: sql += ' WHERE EXISTS (SELECT 1 FROM model WHERE (upvote + downvote) > 750)'
|
| 448 |
-
if not reveal_prelim: sql += ' WHERE (upvote + downvote) > 500'
|
|
|
|
| 449 |
cursor.execute(sql)
|
| 450 |
data = cursor.fetchall()
|
| 451 |
df = pd.DataFrame(data, columns=['name', 'upvote', 'downvote'])
|
|
@@ -676,7 +664,7 @@ def synthandreturn(text):
|
|
| 676 |
# Get two random models
|
| 677 |
mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)
|
| 678 |
log_text(text)
|
| 679 |
-
print("[debug] Using", mdl1, mdl2)
|
| 680 |
def predict_and_update_result(text, model, result_storage):
|
| 681 |
try:
|
| 682 |
# if model in AVAILABLE_MODELS:
|
|
@@ -693,10 +681,6 @@ def synthandreturn(text):
|
|
| 693 |
# result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
| 694 |
if model in model_kwargs:
|
| 695 |
router = Client(model_links[model])
|
| 696 |
-
# debug
|
| 697 |
-
print(model_args[model])
|
| 698 |
-
print(model_kwargs[model])
|
| 699 |
-
|
| 700 |
result = router.predict(*model_args[model], **model_kwargs[model])
|
| 701 |
else:
|
| 702 |
result = get_tts_file(text, model)
|
|
@@ -933,7 +917,7 @@ with gr.Blocks() as about:
|
|
| 933 |
# dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
| 934 |
# ddb = gr.Button("Delete DB")
|
| 935 |
# ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
| 936 |
-
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="TTS Arena
|
| 937 |
gr.Markdown(DESCR)
|
| 938 |
# gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|
| 939 |
gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
|
|
|
|
| 232 |
# Japanese TTS Arena: Benchmarking Japanese TTS Models in the Wild
|
| 233 |
|
| 234 |
Vote to help the community find the best available text-to-speech model!
|
|
|
|
|
|
|
| 235 |
""".strip()
|
| 236 |
# INSTR = """
|
| 237 |
# ## Instructions
|
|
|
|
| 262 |
ABOUT = f"""
|
| 263 |
## 📄 About
|
| 264 |
|
| 265 |
+
The Japanese TTS Arena evaluates leading speech synthesis models. It is inspired by LMsys's [Chatbot Arena](https://chat.lmsys.org/) and [TTS Arena](https://huggingface.co/spaces/TTS-AGI/TTS-Arena).
|
| 266 |
|
| 267 |
### Motivation
|
| 268 |
|
|
|
|
| 274 |
|
| 275 |
### Credits
|
| 276 |
|
| 277 |
+
Thank you to the open-source code from TTS Arena which helped make this project possible.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
{request}
|
| 280 |
|
|
|
|
| 285 |
### License
|
| 286 |
|
| 287 |
Generated audio clips cannot be redistributed and may be used for personal, non-commercial use only.
|
|
|
|
|
|
|
| 288 |
""".strip()
|
| 289 |
LDESC = """
|
| 290 |
## 🏆 Leaderboard
|
| 291 |
|
| 292 |
+
Vote to help the community determine the best Japanese text-to-speech (TTS) models.
|
| 293 |
|
| 294 |
The leaderboard displays models in descending order of how natural they sound (based on votes cast by the community).
|
| 295 |
|
|
|
|
| 432 |
cursor = conn.cursor()
|
| 433 |
sql = 'SELECT name, upvote, downvote FROM model'
|
| 434 |
# if not reveal_prelim: sql += ' WHERE EXISTS (SELECT 1 FROM model WHERE (upvote + downvote) > 750)'
|
| 435 |
+
# if not reveal_prelim: sql += ' WHERE (upvote + downvote) > 500'
|
| 436 |
+
if not reveal_prelim: sql += ' WHERE (upvote + downvote) > 2'
|
| 437 |
cursor.execute(sql)
|
| 438 |
data = cursor.fetchall()
|
| 439 |
df = pd.DataFrame(data, columns=['name', 'upvote', 'downvote'])
|
|
|
|
| 664 |
# Get two random models
|
| 665 |
mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)
|
| 666 |
log_text(text)
|
| 667 |
+
# print("[debug] Using", mdl1, mdl2)
|
| 668 |
def predict_and_update_result(text, model, result_storage):
|
| 669 |
try:
|
| 670 |
# if model in AVAILABLE_MODELS:
|
|
|
|
| 681 |
# result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
|
| 682 |
if model in model_kwargs:
|
| 683 |
router = Client(model_links[model])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
result = router.predict(*model_args[model], **model_kwargs[model])
|
| 685 |
else:
|
| 686 |
result = get_tts_file(text, model)
|
|
|
|
| 917 |
# dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
|
| 918 |
# ddb = gr.Button("Delete DB")
|
| 919 |
# ddb.click(del_db, inputs=dbtext, outputs=ddb)
|
| 920 |
+
with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="JA TTS Arena") as demo:
|
| 921 |
gr.Markdown(DESCR)
|
| 922 |
# gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
|
| 923 |
gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
|