TTS-Arena-JA

Paused

App Files Files Community

alan commited on Jul 18, 2024

Commit

05d581b

1 Parent(s): cc744ff

added blane-tts

Browse files

Files changed (1) hide show

app.py +49 -36

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from gradio_client import Client
 import pyloudnorm as pyln
 import soundfile as sf
 import librosa
-from detoxify import Detoxify
 import os
 import tempfile
 from pydub import AudioSegment
@@ -27,7 +27,7 @@ def match_target_amplitude(sound, target_dBFS):
-toxicity = Detoxify('original')
 # with open('harvard_sentences.txt') as f:
 with open('ja_sentences.txt') as f:
     sents = f.read().strip().splitlines()
@@ -50,7 +50,8 @@ AVAILABLE_MODELS = {
     # 'Parler TTS': 'parler'
     'MOE': 'moe',
     'BARK': 'bark',
-    'KOTOBA-SPEECH': 'kotoba-speech'
 }
 SPACE_ID = os.getenv('SPACE_ID')
@@ -63,12 +64,12 @@ DB_NAME = "database.db"
 DB_PATH = f"/data/{DB_NAME}" if os.path.isdir("/data") else DB_NAME
 print(f"Using {DB_PATH}")
 # AUDIO_DATASET_ID = "ttseval/tts-arena-new"
-CITATION_TEXT = """@misc{tts-arena,
-	title        = {Text to Speech Arena},
-	author       = {mrfakename and Srivastav, Vaibhav and Fourrier, Clémentine and Pouget, Lucain and Lacombe, Yoach and main and Gandhi, Sanchit},
 	year         = 2024,
 	publisher    = {Hugging Face},
-	howpublished = "\\url{https://huggingface.co/spaces/TTS-AGI/TTS-Arena}"
 }"""
 ####################################
@@ -114,25 +115,36 @@ def create_db_if_missing():
 def get_db():
     return sqlite3.connect(DB_PATH)
-def kotoba_speech_tts(text):
-    url = "https://kotoba-tech-kotoba-speech.hf.space/call/tts"
     headers = {
         "Content-Type": "application/json"
     }
     data = {
-        "data": [
-            text,
-            5,
-            5,
-            "Preset voices",
-            "Ava",
-            {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
-            {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
-        ]
     }
     # Send POST request
-    response = requests.post(url, headers=headers, data=json.dumps(data))
     response.raise_for_status()  # Raise an error for bad status codes
     # Print the response to inspect its structure
@@ -147,7 +159,7 @@ def kotoba_speech_tts(text):
     event_id = response_json['event_id']
     # Send GET request to the next URL
-    stream_url = f"https://kotoba-tech-kotoba-speech.hf.space/call/tts/{event_id}"
     stream_response = requests.get(stream_url, stream=True)
     stream_response.raise_for_status()  # Raise an error for bad status codes
@@ -220,6 +232,8 @@ DESCR = """
 # Japanese TTS Arena: Benchmarking Japanese TTS Models in the Wild
 Vote to help the community find the best available text-to-speech model!
 """.strip()
 # INSTR = """
 # ## Instructions
@@ -344,7 +358,8 @@ model_names = {
     # 'metavoice': 'MetaVoice-1B',
     'bark': 'BARK',
     'moe': 'MOE',
-    'kotoba-speech': 'KOTOBA-SPEECH'
     # 'styletts2': 'StyleTTS 2',
 }
 model_licenses = {
@@ -395,7 +410,8 @@ model_links = {
     # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
     'bark': 'https://suno-bark.hf.space/',
     'moe': 'skytnt/moe-tts',
-    # 'kotoba-speech': 'kotoba-tech/kotoba-speech'
 }
 model_kwargs = {
     'moe': {
@@ -404,9 +420,6 @@ model_kwargs = {
     'bark': {
         'fn_index': 3
     },
-    # 'kotoba-speech': {
-    #     'api_name': '/tts'
-    # }
 }
 # def get_random_split(existing_split=None):
 #     choice = random.choice(list(audio_dataset.keys()))
@@ -686,9 +699,9 @@ def synthandreturn(text):
                     result = router.predict(*model_args[model], **model_kwargs[model])
                 else:
-                    result = kotoba_speech_tts(text)
                     # URL to download the file from
-                    url = f"https://kotoba-tech-kotoba-speech.hf.space/file={result}"
                     # Local filename to save the downloaded file
                     local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
@@ -913,17 +926,17 @@ with gr.Blocks() as vote:
 with gr.Blocks() as about:
     gr.Markdown(ABOUT)
-with gr.Blocks() as admin:
-    rdb = gr.Button("Reload Audio Dataset")
-    # rdb.click(reload_audio_dataset, outputs=rdb)
-    with gr.Group():
-        dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
-        ddb = gr.Button("Delete DB")
-    ddb.click(del_db, inputs=dbtext, outputs=ddb)
 with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="TTS Arena copy") as demo:
     gr.Markdown(DESCR)
-    gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
-    # gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
     if CITATION_TEXT:
         with gr.Row():
             with gr.Accordion("Citation", open=False):

 import pyloudnorm as pyln
 import soundfile as sf
 import librosa
+# from detoxify import Detoxify
 import os
 import tempfile
 from pydub import AudioSegment
+# toxicity = Detoxify('original')
 # with open('harvard_sentences.txt') as f:
 with open('ja_sentences.txt') as f:
     sents = f.read().strip().splitlines()
     # 'Parler TTS': 'parler'
     'MOE': 'moe',
     'BARK': 'bark',
+    'KOTOBA-SPEECH': 'kotoba-speech',
+    'BLANE-TTS': 'blane-tts'
 }
 SPACE_ID = os.getenv('SPACE_ID')
 DB_PATH = f"/data/{DB_NAME}" if os.path.isdir("/data") else DB_NAME
 print(f"Using {DB_PATH}")
 # AUDIO_DATASET_ID = "ttseval/tts-arena-new"
+CITATION_TEXT = """@misc{tts-arena-ja,
+	title        = {Japanese Text to Speech Arena},
+	author       = {Kotoba Technologies.},
 	year         = 2024,
 	publisher    = {Hugging Face},
+	howpublished = "\\url{https://huggingface.co/spaces/kotoba-speech/TTS-Arena-copy}"
 }"""
 ####################################
 def get_db():
     return sqlite3.connect(DB_PATH)
+def get_tts_file(text: str, model: str):
+    url = {
+        "kotoba-speech": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
+        "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
+    }
     headers = {
         "Content-Type": "application/json"
     }
     data = {
+        "kotoba-speech": {
+            "data": [
+                text,
+                5,
+                5,
+                "Preset voices",
+                "Ava",
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
+                {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
+            ]
+        },
+        "blane-tts": {
+            "data": [
+                text,
+                "Japanese"
+            ]
+        }
     }
     # Send POST request
+    response = requests.post(url[model], headers=headers, data=json.dumps(data[model]))
     response.raise_for_status()  # Raise an error for bad status codes
     # Print the response to inspect its structure
     event_id = response_json['event_id']
     # Send GET request to the next URL
+    stream_url = f"{url[model]}/{event_id}"
     stream_response = requests.get(stream_url, stream=True)
     stream_response.raise_for_status()  # Raise an error for bad status codes
 # Japanese TTS Arena: Benchmarking Japanese TTS Models in the Wild
 Vote to help the community find the best available text-to-speech model!
+_This arena is inspired and built on [TTS Arena](https://huggingface.co/spaces/TTS-AGI/TTS-Arena)._
 """.strip()
 # INSTR = """
 # ## Instructions
     # 'metavoice': 'MetaVoice-1B',
     'bark': 'BARK',
     'moe': 'MOE',
+    'kotoba-speech': 'KOTOBA-SPEECH',
+    'blane-tts': 'BLANE-TTS'
     # 'styletts2': 'StyleTTS 2',
 }
 model_licenses = {
     # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
     'bark': 'https://suno-bark.hf.space/',
     'moe': 'skytnt/moe-tts',
+    'kotoba-speech': 'https://kotoba-tech-kotoba-speech.hf.space/',
+    'blane-tts': 'https://blane187-blane-tts.hf.space/'
 }
 model_kwargs = {
     'moe': {
     'bark': {
         'fn_index': 3
     },
 }
 # def get_random_split(existing_split=None):
 #     choice = random.choice(list(audio_dataset.keys()))
                     result = router.predict(*model_args[model], **model_kwargs[model])
                 else:
+                    result = get_tts_file(text)
                     # URL to download the file from
+                    url = f"{model_links[model]}file={result}"
                     # Local filename to save the downloaded file
                     local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
 with gr.Blocks() as about:
     gr.Markdown(ABOUT)
+# with gr.Blocks() as admin:
+#     rdb = gr.Button("Reload Audio Dataset")
+#     # rdb.click(reload_audio_dataset, outputs=rdb)
+#     with gr.Group():
+#         dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
+#         ddb = gr.Button("Delete DB")
+#     ddb.click(del_db, inputs=dbtext, outputs=ddb)
 with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="TTS Arena copy") as demo:
     gr.Markdown(DESCR)
+    # gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
+    gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
     if CITATION_TEXT:
         with gr.Row():
             with gr.Accordion("Citation", open=False):