TTS-Arena-JA

Paused

TTS-Arena-JA / utils.py

alan

added openai

c5c2329 over 1 year ago

2 kB

	import os
	import json
	import tempfile
	from google.cloud import texttospeech

	from pathlib import Path
	from openai import OpenAI

	def get_openai_tts(text, local_filename):
	api_key = os.getenv("OPENAI_KEY")
	client = OpenAI(api_key=api_key)

	# speech_file_path = Path(__file__).parent / "speech.mp3"
	response = client.audio.speech.create(
	model="tts-1",
	voice="alloy",
	input=text
	)
	response.stream_to_file(local_filename)
	return local_filename

	def get_google_credentials():
	creds_json_str = os.getenv("GCP_CREDENTIAL_JSON") # get json credentials stored as a string

	# create a temporary file
	with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp:
	temp.write(creds_json_str) # write in json format
	temp_filename = temp.name

	return temp_filename

	def get_google_tts(text, local_filename):
	# Instantiates a client
	client = texttospeech.TextToSpeechClient()

	# Set the text input to be synthesized
	synthesis_input = texttospeech.SynthesisInput(text=text)

	# Build the voice request, select the language code ("en-US") and the ssml
	# voice gender ("neutral")
	voice = texttospeech.VoiceSelectionParams(
	language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
	)

	# Select the type of audio file you want returned
	audio_config = texttospeech.AudioConfig(
	audio_encoding=texttospeech.AudioEncoding.MP3
	)

	# Perform the text-to-speech request on the text input with the selected
	# voice parameters and audio file type
	response = client.synthesize_speech(
	input=synthesis_input, voice=voice, audio_config=audio_config
	)

	# The response's audio_content is binary.
	with open(local_filename, "wb") as out:
	# Write the response to the output file.
	out.write(response.audio_content)
	print(f'Audio content written to file {local_filename}')

	return local_filename