Spaces:
Paused
Paused
alan
commited on
Commit
·
0414b49
1
Parent(s):
947b8c3
added google api
Browse files
app.py
CHANGED
|
@@ -15,7 +15,11 @@ import tempfile
|
|
| 15 |
from pydub import AudioSegment
|
| 16 |
import requests
|
| 17 |
import json
|
|
|
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def match_target_amplitude(sound, target_dBFS):
|
| 21 |
change_in_dBFS = target_dBFS - sound.dBFS
|
|
@@ -25,8 +29,6 @@ def match_target_amplitude(sound, target_dBFS):
|
|
| 25 |
|
| 26 |
# enable_space_ci()
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
# toxicity = Detoxify('original')
|
| 31 |
# with open('harvard_sentences.txt') as f:
|
| 32 |
with open('ja_sentences.txt') as f:
|
|
@@ -55,7 +57,8 @@ AVAILABLE_MODELS = {
|
|
| 55 |
'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
|
| 56 |
'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
|
| 57 |
'BLANE-TTS': 'blane-tts',
|
| 58 |
-
'AMITARO-VITS': 'amitaro-vits'
|
|
|
|
| 59 |
}
|
| 60 |
|
| 61 |
SPACE_ID = os.getenv('SPACE_ID')
|
|
@@ -392,7 +395,8 @@ model_names = {
|
|
| 392 |
'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
|
| 393 |
'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
|
| 394 |
'blane-tts': 'BLANE-TTS',
|
| 395 |
-
'amitaro-vits': 'AMITARO-VITS'
|
|
|
|
| 396 |
# 'styletts2': 'StyleTTS 2',
|
| 397 |
}
|
| 398 |
model_licenses = {
|
|
@@ -740,6 +744,9 @@ def synthandreturn(text):
|
|
| 740 |
print(model_args[model])
|
| 741 |
print(model_kwargs[model])
|
| 742 |
result = router.predict(*model_args[model], **model_kwargs[model])
|
|
|
|
|
|
|
|
|
|
| 743 |
else:
|
| 744 |
result = get_tts_file(text, model)
|
| 745 |
# URL to download the file from
|
|
|
|
| 15 |
from pydub import AudioSegment
|
| 16 |
import requests
|
| 17 |
import json
|
| 18 |
+
from google.cloud import texttospeech
|
| 19 |
|
| 20 |
+
from utils import get_credentials, get_google_tts
|
| 21 |
+
|
| 22 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = get_credentials()
|
| 23 |
|
| 24 |
def match_target_amplitude(sound, target_dBFS):
|
| 25 |
change_in_dBFS = target_dBFS - sound.dBFS
|
|
|
|
| 29 |
|
| 30 |
# enable_space_ci()
|
| 31 |
|
|
|
|
|
|
|
| 32 |
# toxicity = Detoxify('original')
|
| 33 |
# with open('harvard_sentences.txt') as f:
|
| 34 |
with open('ja_sentences.txt') as f:
|
|
|
|
| 57 |
'KOTOBA-SPEECH-ALEX': 'kotoba-speech-alex',
|
| 58 |
'KOTOBA-SPEECH-JACOB': 'kotoba-speech-jacob',
|
| 59 |
'BLANE-TTS': 'blane-tts',
|
| 60 |
+
'AMITARO-VITS': 'amitaro-vits',
|
| 61 |
+
'GOOGLE-API': 'google-api'
|
| 62 |
}
|
| 63 |
|
| 64 |
SPACE_ID = os.getenv('SPACE_ID')
|
|
|
|
| 395 |
'kotoba-speech-alex': 'KOTOBA-SPEECH-v0.1-ALEX',
|
| 396 |
'kotoba-speech-jacob': 'KOTOBA-SPEECH-v0.1-JACOB',
|
| 397 |
'blane-tts': 'BLANE-TTS',
|
| 398 |
+
'amitaro-vits': 'AMITARO-VITS',
|
| 399 |
+
'google-api': 'google-api'
|
| 400 |
# 'styletts2': 'StyleTTS 2',
|
| 401 |
}
|
| 402 |
model_licenses = {
|
|
|
|
| 744 |
print(model_args[model])
|
| 745 |
print(model_kwargs[model])
|
| 746 |
result = router.predict(*model_args[model], **model_kwargs[model])
|
| 747 |
+
elif model == "google-api":
|
| 748 |
+
local_filename = '/tmp/' + str(mkuuid(None)) + '.wav'
|
| 749 |
+
result = get_google_tts(text, local_filename=local_filename)
|
| 750 |
else:
|
| 751 |
result = get_tts_file(text, model)
|
| 752 |
# URL to download the file from
|
utils.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import tempfile
|
| 4 |
+
from google.cloud import texttospeech
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def get_credentials():
|
| 8 |
+
creds_json_str = os.getenv("GCP_CREDENTIAL_JSON") # get json credentials stored as a string
|
| 9 |
+
|
| 10 |
+
# create a temporary file
|
| 11 |
+
with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
|
| 12 |
+
temp.write(creds_json_str) # write in json format
|
| 13 |
+
temp_filename = temp.name
|
| 14 |
+
|
| 15 |
+
return temp_filename
|
| 16 |
+
|
| 17 |
+
def get_google_tts(text, local_filename):
|
| 18 |
+
# Instantiates a client
|
| 19 |
+
client = texttospeech.TextToSpeechClient()
|
| 20 |
+
|
| 21 |
+
# Set the text input to be synthesized
|
| 22 |
+
synthesis_input = texttospeech.SynthesisInput(text=text)
|
| 23 |
+
|
| 24 |
+
# Build the voice request, select the language code ("en-US") and the ssml
|
| 25 |
+
# voice gender ("neutral")
|
| 26 |
+
voice = texttospeech.VoiceSelectionParams(
|
| 27 |
+
language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Select the type of audio file you want returned
|
| 31 |
+
audio_config = texttospeech.AudioConfig(
|
| 32 |
+
audio_encoding=texttospeech.AudioEncoding.MP3
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Perform the text-to-speech request on the text input with the selected
|
| 36 |
+
# voice parameters and audio file type
|
| 37 |
+
response = client.synthesize_speech(
|
| 38 |
+
input=synthesis_input, voice=voice, audio_config=audio_config
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# The response's audio_content is binary.
|
| 42 |
+
with open(local_filename, "wb") as out:
|
| 43 |
+
# Write the response to the output file.
|
| 44 |
+
out.write(response.audio_content)
|
| 45 |
+
print(f'Audio content written to file {local_filename}')
|
| 46 |
+
|
| 47 |
+
return local_filename
|