alan commited on
Commit
0414b49
·
1 Parent(s): 947b8c3

added google api

Browse files
Files changed (2) hide show
  1. app.py +11 -4
  2. utils.py +47 -0
app.py CHANGED
@@ -15,7 +15,11 @@ import tempfile
15
  from pydub import AudioSegment
16
  import requests
17
  import json
 
18
 
 
 
 
19
 
20
  def match_target_amplitude(sound, target_dBFS):
21
  change_in_dBFS = target_dBFS - sound.dBFS
@@ -25,8 +29,6 @@ def match_target_amplitude(sound, target_dBFS):
25
 
26
  # enable_space_ci()
27
 
28
-
29
-
30
  # toxicity = Detoxify('original')
31
  # with open('harvard_sentences.txt') as f:
32
  with open('ja_sentences.txt') as f:
@@ -55,7 +57,8 @@ AVAILABLE_MODELS = {
55
  'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
56
  'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
57
  'BLANE-TTS': 'blane-tts',
58
- 'AMITARO-VITS': 'amitaro-vits'
 
59
  }
60
 
61
  SPACE_ID = os.getenv('SPACE_ID')
@@ -392,7 +395,8 @@ model_names = {
392
  'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
393
  'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
394
  'blane-tts': 'BLANE-TTS',
395
- 'amitaro-vits': 'AMITARO-VITS'
 
396
  # 'styletts2': 'StyleTTS 2',
397
  }
398
  model_licenses = {
@@ -740,6 +744,9 @@ def synthandreturn(text):
740
  print(model_args[model])
741
  print(model_kwargs[model])
742
  result = router.predict(*model_args[model], **model_kwargs[model])
 
 
 
743
  else:
744
  result = get_tts_file(text, model)
745
  # URL to download the file from
 
15
  from pydub import AudioSegment
16
  import requests
17
  import json
18
+ from google.cloud import texttospeech
19
 
20
+ from utils import get_credentials, get_google_tts
21
+
22
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials()
23
 
24
  def match_target_amplitude(sound, target_dBFS):
25
  change_in_dBFS = target_dBFS - sound.dBFS
 
29
 
30
  # enable_space_ci()
31
 
 
 
32
  # toxicity = Detoxify('original')
33
  # with open('harvard_sentences.txt') as f:
34
  with open('ja_sentences.txt') as f:
 
57
  'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
58
  'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
59
  'BLANE-TTS': 'blane-tts',
60
+ 'AMITARO-VITS': 'amitaro-vits',
61
+ 'GOOGLE-API': 'google-api'
62
  }
63
 
64
  SPACE_ID = os.getenv('SPACE_ID')
 
395
  'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
396
  'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
397
  'blane-tts': 'BLANE-TTS',
398
+ 'amitaro-vits': 'AMITARO-VITS',
399
+ 'google-api': 'google-api'
400
  # 'styletts2': 'StyleTTS 2',
401
  }
402
  model_licenses = {
 
744
  print(model_args[model])
745
  print(model_kwargs[model])
746
  result = router.predict(*model_args[model], **model_kwargs[model])
747
+ elif model == "google-api":
748
+ local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
749
+ result = get_google_tts(text, local_filename=local_filename)
750
  else:
751
  result = get_tts_file(text, model)
752
  # URL to download the file from
utils.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import tempfile
4
+ from google.cloud import texttospeech
5
+
6
+
7
+ def get_credentials():
8
+ creds_json_str = os.getenv("GCP_CREDENTIAL_JSON") # get json credentials stored as a string
9
+
10
+ # create a temporary file
11
+ with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
12
+ temp.write(creds_json_str) # write in json format
13
+ temp_filename = temp.name
14
+
15
+ return temp_filename
16
+
17
+ def get_google_tts(text, local_filename):
18
+ # Instantiates a client
19
+ client = texttospeech.TextToSpeechClient()
20
+
21
+ # Set the text input to be synthesized
22
+ synthesis_input = texttospeech.SynthesisInput(text=text)
23
+
24
+ # Build the voice request, select the language code ("en-US") and the ssml
25
+ # voice gender ("neutral")
26
+ voice = texttospeech.VoiceSelectionParams(
27
+ language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
28
+ )
29
+
30
+ # Select the type of audio file you want returned
31
+ audio_config = texttospeech.AudioConfig(
32
+ audio_encoding=texttospeech.AudioEncoding.MP3
33
+ )
34
+
35
+ # Perform the text-to-speech request on the text input with the selected
36
+ # voice parameters and audio file type
37
+ response = client.synthesize_speech(
38
+ input=synthesis_input, voice=voice, audio_config=audio_config
39
+ )
40
+
41
+ # The response's audio_content is binary.
42
+ with open(local_filename, "wb") as out:
43
+ # Write the response to the output file.
44
+ out.write(response.audio_content)
45
+ print(f'Audio content written to file {local_filename}')
46
+
47
+ return local_filename