Spaces:
Running
Running
zach
commited on
Commit
·
e91a94a
1
Parent(s):
7854f13
Clean up integration code
Browse files
src/integrations/elevenlabs_api.py
CHANGED
|
@@ -15,6 +15,7 @@ Key Features:
|
|
| 15 |
# Standard Library Imports
|
| 16 |
import logging
|
| 17 |
import random
|
|
|
|
| 18 |
from dataclasses import dataclass, field
|
| 19 |
from typing import Optional, Tuple
|
| 20 |
|
|
@@ -102,28 +103,26 @@ async def text_to_speech_with_elevenlabs(
|
|
| 102 |
"""
|
| 103 |
logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
|
| 104 |
elevenlabs_config = config.elevenlabs_config
|
| 105 |
-
|
|
|
|
| 106 |
try:
|
| 107 |
-
|
| 108 |
-
response = await elevenlabs_config.client.text_to_voice.create_previews(
|
| 109 |
voice_description=character_description,
|
| 110 |
text=text,
|
| 111 |
output_format=elevenlabs_config.output_format,
|
| 112 |
)
|
| 113 |
|
|
|
|
|
|
|
|
|
|
| 114 |
previews = response.previews
|
| 115 |
if not previews:
|
| 116 |
-
|
| 117 |
-
logger.error(msg)
|
| 118 |
-
raise ElevenLabsError(message=msg)
|
| 119 |
|
| 120 |
-
# Extract the base64 encoded audio and generated voice ID from the preview
|
| 121 |
preview = random.choice(previews)
|
| 122 |
generated_voice_id = preview.generated_voice_id
|
| 123 |
base64_audio = preview.audio_base_64
|
| 124 |
filename = f"{generated_voice_id}.mp3"
|
| 125 |
-
|
| 126 |
-
# Write audio to file and return the relative path
|
| 127 |
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
|
| 128 |
|
| 129 |
return None, audio_file_path
|
|
|
|
| 15 |
# Standard Library Imports
|
| 16 |
import logging
|
| 17 |
import random
|
| 18 |
+
import time
|
| 19 |
from dataclasses import dataclass, field
|
| 20 |
from typing import Optional, Tuple
|
| 21 |
|
|
|
|
| 103 |
"""
|
| 104 |
logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
|
| 105 |
elevenlabs_config = config.elevenlabs_config
|
| 106 |
+
client = elevenlabs_config.client
|
| 107 |
+
start_time = time.time()
|
| 108 |
try:
|
| 109 |
+
response = await client.text_to_voice.create_previews(
|
|
|
|
| 110 |
voice_description=character_description,
|
| 111 |
text=text,
|
| 112 |
output_format=elevenlabs_config.output_format,
|
| 113 |
)
|
| 114 |
|
| 115 |
+
elapsed_time = time.time() - start_time
|
| 116 |
+
logger.info(f"Elevenlabs API request completed in {elapsed_time:.2f} seconds")
|
| 117 |
+
|
| 118 |
previews = response.previews
|
| 119 |
if not previews:
|
| 120 |
+
raise ElevenLabsError(message="No previews returned by ElevenLabs API.")
|
|
|
|
|
|
|
| 121 |
|
|
|
|
| 122 |
preview = random.choice(previews)
|
| 123 |
generated_voice_id = preview.generated_voice_id
|
| 124 |
base64_audio = preview.audio_base_64
|
| 125 |
filename = f"{generated_voice_id}.mp3"
|
|
|
|
|
|
|
| 126 |
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
|
| 127 |
|
| 128 |
return None, audio_file_path
|
src/integrations/hume_api.py
CHANGED
|
@@ -21,8 +21,7 @@ from typing import Tuple, Union
|
|
| 21 |
from hume import AsyncHumeClient
|
| 22 |
from hume.core.api_error import ApiError
|
| 23 |
from hume.tts import PostedUtterance
|
| 24 |
-
from hume.tts.types import
|
| 25 |
-
from hume.tts.types.format import Format, FormatMp3
|
| 26 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
| 27 |
|
| 28 |
# Local Application Imports
|
|
@@ -47,6 +46,19 @@ class HumeConfig:
|
|
| 47 |
computed_api_key = validate_env_var("HUME_API_KEY")
|
| 48 |
object.__setattr__(self, "api_key", computed_api_key)
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
class HumeError(Exception):
|
| 52 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
@@ -100,27 +112,13 @@ async def text_to_speech_with_hume(
|
|
| 100 |
HumeError: For errors communicating with the Hume API.
|
| 101 |
UnretryableHumeError: For client-side HTTP errors (status code 4xx).
|
| 102 |
"""
|
| 103 |
-
logger.debug(
|
| 104 |
-
"Processing TTS with Hume. "
|
| 105 |
-
f"Character description length: {len(character_description)}. "
|
| 106 |
-
f"Text length: {len(text)}."
|
| 107 |
-
)
|
| 108 |
-
|
| 109 |
hume_config = config.hume_config
|
| 110 |
-
|
| 111 |
start_time = time.time()
|
| 112 |
try:
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
timeout=hume_config.request_timeout
|
| 116 |
-
)
|
| 117 |
-
|
| 118 |
-
utterance = PostedUtterance(
|
| 119 |
-
text=text,
|
| 120 |
-
description=character_description or None
|
| 121 |
-
)
|
| 122 |
-
|
| 123 |
-
response: ReturnTts = await hume_client.tts.synthesize_json(
|
| 124 |
utterances=[utterance],
|
| 125 |
format=hume_config.file_format,
|
| 126 |
)
|
|
@@ -130,14 +128,15 @@ async def text_to_speech_with_hume(
|
|
| 130 |
|
| 131 |
generations = response.generations
|
| 132 |
if not generations:
|
| 133 |
-
|
| 134 |
-
logger.error(msg)
|
| 135 |
-
raise HumeError(msg)
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
return
|
| 141 |
|
| 142 |
except ApiError as e:
|
| 143 |
elapsed_time = time.time() - start_time
|
|
@@ -145,11 +144,7 @@ async def text_to_speech_with_hume(
|
|
| 145 |
clean_message = _extract_hume_api_error_message(e)
|
| 146 |
logger.error(f"Full Hume API error: {e!s}")
|
| 147 |
|
| 148 |
-
if
|
| 149 |
-
hasattr(e, 'status_code')
|
| 150 |
-
and e.status_code is not None
|
| 151 |
-
and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
|
| 152 |
-
):
|
| 153 |
raise UnretryableHumeError(message=clean_message, original_exception=e) from e
|
| 154 |
|
| 155 |
raise HumeError(message=clean_message, original_exception=e) from e
|
|
@@ -158,33 +153,9 @@ async def text_to_speech_with_hume(
|
|
| 158 |
error_type = type(e).__name__
|
| 159 |
error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
|
| 160 |
logger.error("Error during Hume API call: %s - %s", error_type, error_message)
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
def _parse_hume_tts_generation(generation: ReturnGeneration, config: Config) -> Tuple[str, str]:
|
| 165 |
-
"""
|
| 166 |
-
Parses a Hume TTS generation response and saves the decoded audio as an MP3 file.
|
| 167 |
-
|
| 168 |
-
Args:
|
| 169 |
-
generation (ReturnGeneration): TTS generation response containing generation_id and audio.
|
| 170 |
-
config (Config): Application configuration for saving the audio file.
|
| 171 |
-
|
| 172 |
-
Returns:
|
| 173 |
-
Tuple[str, str]: (generation_id, audio_path)
|
| 174 |
|
| 175 |
-
|
| 176 |
-
KeyError: If expected attributes are missing.
|
| 177 |
-
Exception: Propagates exceptions from saving the audio file.
|
| 178 |
-
"""
|
| 179 |
-
if not generation.generation_id:
|
| 180 |
-
raise KeyError("The generation is missing the generation_id.")
|
| 181 |
-
|
| 182 |
-
if not generation.audio:
|
| 183 |
-
raise KeyError("The generation is missing the audio data.")
|
| 184 |
-
|
| 185 |
-
filename = f"{generation.generation_id}.mp3"
|
| 186 |
-
audio_file_path = save_base64_audio_to_file(generation.audio, filename, config)
|
| 187 |
-
return generation.generation_id, audio_file_path
|
| 188 |
|
| 189 |
|
| 190 |
def _extract_hume_api_error_message(e: ApiError) -> str:
|
|
|
|
| 21 |
from hume import AsyncHumeClient
|
| 22 |
from hume.core.api_error import ApiError
|
| 23 |
from hume.tts import PostedUtterance
|
| 24 |
+
from hume.tts.types import Format, FormatMp3, ReturnTts
|
|
|
|
| 25 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
| 26 |
|
| 27 |
# Local Application Imports
|
|
|
|
| 46 |
computed_api_key = validate_env_var("HUME_API_KEY")
|
| 47 |
object.__setattr__(self, "api_key", computed_api_key)
|
| 48 |
|
| 49 |
+
@property
|
| 50 |
+
def client(self) -> AsyncHumeClient:
|
| 51 |
+
"""
|
| 52 |
+
Lazy initialization of the asynchronous Hume client.
|
| 53 |
+
|
| 54 |
+
Returns:
|
| 55 |
+
AsyncHumeClient: Configured async client instance.
|
| 56 |
+
"""
|
| 57 |
+
return AsyncHumeClient(
|
| 58 |
+
api_key=self.api_key,
|
| 59 |
+
timeout=self.request_timeout
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
|
| 63 |
class HumeError(Exception):
|
| 64 |
"""Custom exception for errors related to the Hume TTS API."""
|
|
|
|
| 112 |
HumeError: For errors communicating with the Hume API.
|
| 113 |
UnretryableHumeError: For client-side HTTP errors (status code 4xx).
|
| 114 |
"""
|
| 115 |
+
logger.debug(f"Synthesizing speech with Hume. Text length: {len(text)} characters.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
hume_config = config.hume_config
|
| 117 |
+
client = hume_config.client
|
| 118 |
start_time = time.time()
|
| 119 |
try:
|
| 120 |
+
utterance = PostedUtterance(text=text, description=character_description)
|
| 121 |
+
response: ReturnTts = await client.tts.synthesize_json(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
utterances=[utterance],
|
| 123 |
format=hume_config.file_format,
|
| 124 |
)
|
|
|
|
| 128 |
|
| 129 |
generations = response.generations
|
| 130 |
if not generations:
|
| 131 |
+
raise HumeError("No generations returned by Hume API.")
|
|
|
|
|
|
|
| 132 |
|
| 133 |
+
generation = generations[0]
|
| 134 |
+
generation_id = generation.generation_id
|
| 135 |
+
base64_audio = generation.audio
|
| 136 |
+
filename = f"{generation_id}.mp3"
|
| 137 |
+
audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
|
| 138 |
|
| 139 |
+
return generation_id, audio_file_path
|
| 140 |
|
| 141 |
except ApiError as e:
|
| 142 |
elapsed_time = time.time() - start_time
|
|
|
|
| 144 |
clean_message = _extract_hume_api_error_message(e)
|
| 145 |
logger.error(f"Full Hume API error: {e!s}")
|
| 146 |
|
| 147 |
+
if e.status_code is not None and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
raise UnretryableHumeError(message=clean_message, original_exception=e) from e
|
| 149 |
|
| 150 |
raise HumeError(message=clean_message, original_exception=e) from e
|
|
|
|
| 153 |
error_type = type(e).__name__
|
| 154 |
error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
|
| 155 |
logger.error("Error during Hume API call: %s - %s", error_type, error_message)
|
| 156 |
+
clean_message = "An unexpected error occurred while processing your speech request. Please try again later."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
+
raise HumeError(message=clean_message, original_exception=e) from e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
|
| 160 |
|
| 161 |
def _extract_hume_api_error_message(e: ApiError) -> str:
|