alan commited on
Commit
05d581b
·
1 Parent(s): cc744ff

added blane-tts

Browse files
Files changed (1) hide show
  1. app.py +49 -36
app.py CHANGED
@@ -9,7 +9,7 @@ from gradio_client import Client
9
  import pyloudnorm as pyln
10
  import soundfile as sf
11
  import librosa
12
- from detoxify import Detoxify
13
  import os
14
  import tempfile
15
  from pydub import AudioSegment
@@ -27,7 +27,7 @@ def match_target_amplitude(sound, target_dBFS):
27
 
28
 
29
 
30
- toxicity = Detoxify('original')
31
  # with open('harvard_sentences.txt') as f:
32
  with open('ja_sentences.txt') as f:
33
  sents = f.read().strip().splitlines()
@@ -50,7 +50,8 @@ AVAILABLE_MODELS = {
50
  # 'Parler TTS': 'parler'
51
  'MOE': 'moe',
52
  'BARK': 'bark',
53
- 'KOTOBA-SPEECH': 'kotoba-speech'
 
54
  }
55
 
56
  SPACE_ID = os.getenv('SPACE_ID')
@@ -63,12 +64,12 @@ DB_NAME = "database.db"
63
  DB_PATH = f"/data/{DB_NAME}" if os.path.isdir("/data") else DB_NAME
64
  print(f"Using {DB_PATH}")
65
  # AUDIO_DATASET_ID = "ttseval/tts-arena-new"
66
- CITATION_TEXT = """@misc{tts-arena,
67
- title = {Text to Speech Arena},
68
- author = {mrfakename and Srivastav, Vaibhav and Fourrier, Clémentine and Pouget, Lucain and Lacombe, Yoach and main and Gandhi, Sanchit},
69
  year = 2024,
70
  publisher = {Hugging Face},
71
- howpublished = "\\url{https://huggingface.co/spaces/TTS-AGI/TTS-Arena}"
72
  }"""
73
 
74
  ####################################
@@ -114,25 +115,36 @@ def create_db_if_missing():
114
  def get_db():
115
  return sqlite3.connect(DB_PATH)
116
 
117
- def kotoba_speech_tts(text):
118
- url = "https://kotoba-tech-kotoba-speech.hf.space/call/tts"
 
 
 
119
  headers = {
120
  "Content-Type": "application/json"
121
  }
122
  data = {
123
- "data": [
124
- text,
125
- 5,
126
- 5,
127
- "Preset voices",
128
- "Ava",
129
- {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
130
- {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
131
- ]
 
 
 
 
 
 
 
 
132
  }
133
 
134
  # Send POST request
135
- response = requests.post(url, headers=headers, data=json.dumps(data))
136
  response.raise_for_status() # Raise an error for bad status codes
137
 
138
  # Print the response to inspect its structure
@@ -147,7 +159,7 @@ def kotoba_speech_tts(text):
147
  event_id = response_json['event_id']
148
 
149
  # Send GET request to the next URL
150
- stream_url = f"https://kotoba-tech-kotoba-speech.hf.space/call/tts/{event_id}"
151
  stream_response = requests.get(stream_url, stream=True)
152
  stream_response.raise_for_status() # Raise an error for bad status codes
153
 
@@ -220,6 +232,8 @@ DESCR = """
220
  # Japanese TTS Arena: Benchmarking Japanese TTS Models in the Wild
221
 
222
  Vote to help the community find the best available text-to-speech model!
 
 
223
  """.strip()
224
  # INSTR = """
225
  # ## Instructions
@@ -344,7 +358,8 @@ model_names = {
344
  # 'metavoice': 'MetaVoice-1B',
345
  'bark': 'BARK',
346
  'moe': 'MOE',
347
- 'kotoba-speech': 'KOTOBA-SPEECH'
 
348
  # 'styletts2': 'StyleTTS 2',
349
  }
350
  model_licenses = {
@@ -395,7 +410,8 @@ model_links = {
395
  # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
396
  'bark': 'https://suno-bark.hf.space/',
397
  'moe': 'skytnt/moe-tts',
398
- # 'kotoba-speech': 'kotoba-tech/kotoba-speech'
 
399
  }
400
  model_kwargs = {
401
  'moe': {
@@ -404,9 +420,6 @@ model_kwargs = {
404
  'bark': {
405
  'fn_index': 3
406
  },
407
- # 'kotoba-speech': {
408
- # 'api_name': '/tts'
409
- # }
410
  }
411
  # def get_random_split(existing_split=None):
412
  # choice = random.choice(list(audio_dataset.keys()))
@@ -686,9 +699,9 @@ def synthandreturn(text):
686
 
687
  result = router.predict(*model_args[model], **model_kwargs[model])
688
  else:
689
- result = kotoba_speech_tts(text)
690
  # URL to download the file from
691
- url = f"https://kotoba-tech-kotoba-speech.hf.space/file={result}"
692
  # Local filename to save the downloaded file
693
  local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
694
 
@@ -913,17 +926,17 @@ with gr.Blocks() as vote:
913
 
914
  with gr.Blocks() as about:
915
  gr.Markdown(ABOUT)
916
- with gr.Blocks() as admin:
917
- rdb = gr.Button("Reload Audio Dataset")
918
- # rdb.click(reload_audio_dataset, outputs=rdb)
919
- with gr.Group():
920
- dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
921
- ddb = gr.Button("Delete DB")
922
- ddb.click(del_db, inputs=dbtext, outputs=ddb)
923
  with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="TTS Arena copy") as demo:
924
  gr.Markdown(DESCR)
925
- gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
926
- # gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
927
  if CITATION_TEXT:
928
  with gr.Row():
929
  with gr.Accordion("Citation", open=False):
 
9
  import pyloudnorm as pyln
10
  import soundfile as sf
11
  import librosa
12
+ # from detoxify import Detoxify
13
  import os
14
  import tempfile
15
  from pydub import AudioSegment
 
27
 
28
 
29
 
30
+ # toxicity = Detoxify('original')
31
  # with open('harvard_sentences.txt') as f:
32
  with open('ja_sentences.txt') as f:
33
  sents = f.read().strip().splitlines()
 
50
  # 'Parler TTS': 'parler'
51
  'MOE': 'moe',
52
  'BARK': 'bark',
53
+ 'KOTOBA-SPEECH': 'kotoba-speech',
54
+ 'BLANE-TTS': 'blane-tts'
55
  }
56
 
57
  SPACE_ID = os.getenv('SPACE_ID')
 
64
  DB_PATH = f"/data/{DB_NAME}" if os.path.isdir("/data") else DB_NAME
65
  print(f"Using {DB_PATH}")
66
  # AUDIO_DATASET_ID = "ttseval/tts-arena-new"
67
+ CITATION_TEXT = """@misc{tts-arena-ja,
68
+ title = {Japanese Text to Speech Arena},
69
+ author = {Kotoba Technologies.},
70
  year = 2024,
71
  publisher = {Hugging Face},
72
+ howpublished = "\\url{https://huggingface.co/spaces/kotoba-speech/TTS-Arena-copy}"
73
  }"""
74
 
75
  ####################################
 
115
  def get_db():
116
  return sqlite3.connect(DB_PATH)
117
 
118
+ def get_tts_file(text: str, model: str):
119
+ url = {
120
+ "kotoba-speech": "https://kotoba-tech-kotoba-speech.hf.space/call/tts",
121
+ "blane-tts": "https://blane187-blane-tts.hf.space/call/get_audio_file"
122
+ }
123
  headers = {
124
  "Content-Type": "application/json"
125
  }
126
  data = {
127
+ "kotoba-speech": {
128
+ "data": [
129
+ text,
130
+ 5,
131
+ 5,
132
+ "Preset voices",
133
+ "Ava",
134
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"},
135
+ {"path": "https://keikaku-hoso.com/sample_voice/voice01_A.mp3"}
136
+ ]
137
+ },
138
+ "blane-tts": {
139
+ "data": [
140
+ text,
141
+ "Japanese"
142
+ ]
143
+ }
144
  }
145
 
146
  # Send POST request
147
+ response = requests.post(url[model], headers=headers, data=json.dumps(data[model]))
148
  response.raise_for_status() # Raise an error for bad status codes
149
 
150
  # Print the response to inspect its structure
 
159
  event_id = response_json['event_id']
160
 
161
  # Send GET request to the next URL
162
+ stream_url = f"{url[model]}/{event_id}"
163
  stream_response = requests.get(stream_url, stream=True)
164
  stream_response.raise_for_status() # Raise an error for bad status codes
165
 
 
232
  # Japanese TTS Arena: Benchmarking Japanese TTS Models in the Wild
233
 
234
  Vote to help the community find the best available text-to-speech model!
235
+
236
+ _This arena is inspired and built on [TTS Arena](https://huggingface.co/spaces/TTS-AGI/TTS-Arena)._
237
  """.strip()
238
  # INSTR = """
239
  # ## Instructions
 
358
  # 'metavoice': 'MetaVoice-1B',
359
  'bark': 'BARK',
360
  'moe': 'MOE',
361
+ 'kotoba-speech': 'KOTOBA-SPEECH',
362
+ 'blane-tts': 'BLANE-TTS'
363
  # 'styletts2': 'StyleTTS 2',
364
  }
365
  model_licenses = {
 
410
  # 'metavoice': 'https://github.com/metavoiceio/metavoice-src',
411
  'bark': 'https://suno-bark.hf.space/',
412
  'moe': 'skytnt/moe-tts',
413
+ 'kotoba-speech': 'https://kotoba-tech-kotoba-speech.hf.space/',
414
+ 'blane-tts': 'https://blane187-blane-tts.hf.space/'
415
  }
416
  model_kwargs = {
417
  'moe': {
 
420
  'bark': {
421
  'fn_index': 3
422
  },
 
 
 
423
  }
424
  # def get_random_split(existing_split=None):
425
  # choice = random.choice(list(audio_dataset.keys()))
 
699
 
700
  result = router.predict(*model_args[model], **model_kwargs[model])
701
  else:
702
+ result = get_tts_file(text)
703
  # URL to download the file from
704
+ url = f"{model_links[model]}file={result}"
705
  # Local filename to save the downloaded file
706
  local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
707
 
 
926
 
927
  with gr.Blocks() as about:
928
  gr.Markdown(ABOUT)
929
+ # with gr.Blocks() as admin:
930
+ # rdb = gr.Button("Reload Audio Dataset")
931
+ # # rdb.click(reload_audio_dataset, outputs=rdb)
932
+ # with gr.Group():
933
+ # dbtext = gr.Textbox(label="Type \"delete db\" to confirm", placeholder="delete db")
934
+ # ddb = gr.Button("Delete DB")
935
+ # ddb.click(del_db, inputs=dbtext, outputs=ddb)
936
  with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}", title="TTS Arena copy") as demo:
937
  gr.Markdown(DESCR)
938
+ # gr.TabbedInterface([vote, leaderboard, about, admin], ['Vote', 'Leaderboard', 'About', 'Admin (ONLY IN BETA)'])
939
+ gr.TabbedInterface([vote, leaderboard, about], ['🗳️ Vote', '🏆 Leaderboard', '📄 About'])
940
  if CITATION_TEXT:
941
  with gr.Row():
942
  with gr.Accordion("Citation", open=False):