import os import json import tempfile from google.cloud import texttospeech import requests from pathlib import Path from openai import OpenAI def get_openai_tts(text, local_filename): api_key = os.getenv("OPENAI_KEY") client = OpenAI(api_key=api_key) # speech_file_path = Path(__file__).parent / "speech.mp3" response = client.audio.speech.create( model="tts-1", voice="alloy", input=text ) response.stream_to_file(local_filename) return local_filename def get_google_credentials(): creds_json_str = os.getenv("GCP_CREDENTIAL_JSON") # get json credentials stored as a string # create a temporary file with tempfile.NamedTemporaryFile(mode="w+", delete=False, suffix=".json") as temp: temp.write(creds_json_str) # write in json format temp_filename = temp.name return temp_filename def get_google_tts(text, local_filename): # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) # The response's audio_content is binary. with open(local_filename, "wb") as out: # Write the response to the output file. out.write(response.audio_content) print(f'Audio content written to file {local_filename}') return local_filename def get_elevenlabs_tts(text, local_filename): """ Call the Eleven Labs API to generate speech from text. Args: text (str): The text to convert to speech local_filename (str): Path to save the generated audio file Returns: str: Path to the generated audio file """ api_key = os.getenv("ELEVENLABS_API_KEY") # API endpoint for text-to-speech url = "https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM" # Default voice ID (Rachel) # Headers with API key headers = { "Accept": "audio/mpeg", "Content-Type": "application/json", "xi-api-key": api_key } # Request body data = { "text": text, "model_id": "eleven_multilingual_v2", "voice_settings": { "stability": 0.5, "similarity_boost": 0.5 } } # Make the request response = requests.post(url, json=data, headers=headers) # Check if the request was successful response.raise_for_status() # Save the audio content to the specified file with open(local_filename, "wb") as f: f.write(response.content) return local_filename