Spaces:
Running
Running
zach
commited on
Commit
·
fc85b67
1
Parent(s):
9dc43bf
Fix types in integrations package
Browse files- src/integrations/anthropic_api.py +85 -71
- src/integrations/elevenlabs_api.py +18 -9
- src/integrations/hume_api.py +38 -33
src/integrations/anthropic_api.py
CHANGED
|
@@ -20,12 +20,12 @@ Functions:
|
|
| 20 |
|
| 21 |
# Standard Library Imports
|
| 22 |
import logging
|
| 23 |
-
from dataclasses import dataclass
|
| 24 |
-
from typing import List, Optional, Union
|
| 25 |
|
| 26 |
# Third-Party Library Imports
|
| 27 |
from anthropic import Anthropic, APIError
|
| 28 |
-
from anthropic.types import Message, ModelParam, TextBlock
|
| 29 |
from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
|
| 30 |
|
| 31 |
# Local Application Imports
|
|
@@ -33,65 +33,65 @@ from src.config import Config, logger
|
|
| 33 |
from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
|
| 34 |
from src.utils import truncate_text, validate_env_var
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
@dataclass(frozen=True)
|
| 38 |
class AnthropicConfig:
|
| 39 |
"""Immutable configuration for interacting with the Anthropic API."""
|
| 40 |
|
| 41 |
-
api_key:
|
|
|
|
| 42 |
model: ModelParam = "claude-3-5-sonnet-latest"
|
| 43 |
max_tokens: int = 150
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def __post_init__(self):
|
| 49 |
-
# Validate that required attributes are set
|
| 50 |
-
if not self.api_key:
|
| 51 |
-
api_key = validate_env_var("ANTHROPIC_API_KEY")
|
| 52 |
-
object.__setattr__(self, "api_key", api_key)
|
| 53 |
if not self.model:
|
| 54 |
raise ValueError("Anthropic Model is not set.")
|
| 55 |
if not self.max_tokens:
|
| 56 |
raise ValueError("Anthropic Max Tokens is not set.")
|
| 57 |
-
if self.system_prompt is None:
|
| 58 |
-
system_prompt: str = f"""You are an expert at generating micro-content optimized for text-to-speech
|
| 59 |
-
synthesis. Your absolute priority is delivering complete, untruncated responses within strict length limits.
|
| 60 |
-
|
| 61 |
-
CRITICAL LENGTH CONSTRAINTS:
|
| 62 |
-
|
| 63 |
-
Maximum length: {self.max_tokens} tokens (approximately 400 characters)
|
| 64 |
-
You MUST complete all thoughts and sentences
|
| 65 |
-
Responses should be 25% shorter than you initially plan
|
| 66 |
-
Never exceed 400 characters total
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
Cut it down to 75% of its original length
|
| 72 |
-
Reserve the last 100 characters for a proper conclusion
|
| 73 |
-
If you start running long, immediately wrap up
|
| 74 |
-
End every piece with a clear conclusion
|
| 75 |
-
|
| 76 |
-
Content Requirements:
|
| 77 |
-
|
| 78 |
-
Allow natural emotional progression
|
| 79 |
-
Create an arc of connected moments
|
| 80 |
-
Use efficient but expressive language
|
| 81 |
-
Balance description with emotional depth
|
| 82 |
-
Ensure perfect completion
|
| 83 |
-
No meta-commentary or formatting
|
| 84 |
-
|
| 85 |
-
Structure for Emotional Pieces:
|
| 86 |
-
|
| 87 |
-
Opening hook (50-75 characters)
|
| 88 |
-
Emotional journey (200-250 characters)
|
| 89 |
-
Resolution (75-100 characters)
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
object.__setattr__(self, "system_prompt", system_prompt)
|
| 95 |
|
| 96 |
@property
|
| 97 |
def client(self) -> Anthropic:
|
|
@@ -127,7 +127,7 @@ Remember: A shorter, complete response is ALWAYS better than a longer, truncated
|
|
| 127 |
class AnthropicError(Exception):
|
| 128 |
"""Custom exception for errors related to the Anthropic API."""
|
| 129 |
|
| 130 |
-
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
| 131 |
super().__init__(message)
|
| 132 |
self.original_exception = original_exception
|
| 133 |
self.message = message
|
|
@@ -136,7 +136,7 @@ class AnthropicError(Exception):
|
|
| 136 |
class UnretryableAnthropicError(AnthropicError):
|
| 137 |
"""Custom exception for errors related to the Anthropic API that should not be retried."""
|
| 138 |
|
| 139 |
-
def __init__(self, message: str, original_exception: Optional[Exception] = None):
|
| 140 |
super().__init__(message, original_exception)
|
| 141 |
|
| 142 |
|
|
@@ -151,23 +151,29 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
|
|
| 151 |
"""
|
| 152 |
Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
|
| 153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
Args:
|
| 155 |
-
character_description (str): The input character description used to assist with generating text
|
|
|
|
| 156 |
|
| 157 |
Returns:
|
| 158 |
str: The generated text.
|
| 159 |
|
| 160 |
Raises:
|
| 161 |
-
|
|
|
|
| 162 |
"""
|
| 163 |
-
# Build prompt for claude with character description
|
| 164 |
-
anthropic_config = config.anthropic_config
|
| 165 |
-
prompt = anthropic_config.build_expressive_prompt(character_description)
|
| 166 |
-
logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
|
| 167 |
-
|
| 168 |
-
response = None
|
| 169 |
try:
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
response: Message = anthropic_config.client.messages.create(
|
| 172 |
model=anthropic_config.model,
|
| 173 |
max_tokens=anthropic_config.max_tokens,
|
|
@@ -176,17 +182,17 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
|
|
| 176 |
)
|
| 177 |
logger.debug(f"API response received: {truncate_text(str(response))}")
|
| 178 |
|
| 179 |
-
|
| 180 |
-
if not hasattr(response, "content"):
|
| 181 |
logger.error("Response is missing 'content'. Response: %s", response)
|
| 182 |
raise AnthropicError('Invalid API response: Missing "content".')
|
| 183 |
|
| 184 |
-
|
| 185 |
-
|
| 186 |
if isinstance(blocks, list):
|
| 187 |
result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
|
| 188 |
logger.debug(f"Processed response from list: {truncate_text(result)}")
|
| 189 |
return result
|
|
|
|
| 190 |
if isinstance(blocks, TextBlock):
|
| 191 |
logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
|
| 192 |
return blocks.text
|
|
@@ -195,13 +201,21 @@ def generate_text_with_claude(character_description: str, config: Config) -> str
|
|
| 195 |
return str(blocks or "No content generated.")
|
| 196 |
|
| 197 |
except Exception as e:
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
raise AnthropicError(
|
| 205 |
-
message=(
|
| 206 |
original_exception=e,
|
| 207 |
) from e
|
|
|
|
| 20 |
|
| 21 |
# Standard Library Imports
|
| 22 |
import logging
|
| 23 |
+
from dataclasses import dataclass, field
|
| 24 |
+
from typing import Any, Dict, List, Optional, Union, cast
|
| 25 |
|
| 26 |
# Third-Party Library Imports
|
| 27 |
from anthropic import Anthropic, APIError
|
| 28 |
+
from anthropic.types import Message, ModelParam, TextBlock, ToolUseBlock
|
| 29 |
from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
|
| 30 |
|
| 31 |
# Local Application Imports
|
|
|
|
| 33 |
from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
|
| 34 |
from src.utils import truncate_text, validate_env_var
|
| 35 |
|
| 36 |
+
PROMPT_TEMPLATE: str = (
|
| 37 |
+
"""You are an expert at generating micro-content optimized for text-to-speech synthesis.
|
| 38 |
+
Your absolute priority is delivering complete, untruncated responses within strict length limits.
|
| 39 |
+
|
| 40 |
+
CRITICAL LENGTH CONSTRAINTS:
|
| 41 |
+
- Maximum length: {max_tokens} tokens (approximately 400 characters)
|
| 42 |
+
- You MUST complete all thoughts and sentences
|
| 43 |
+
- Responses should be 25% shorter than you initially plan
|
| 44 |
+
- Never exceed 400 characters total
|
| 45 |
+
|
| 46 |
+
Response Generation Process:
|
| 47 |
+
- Draft your response mentally first
|
| 48 |
+
- ut it down to 75% of its original length
|
| 49 |
+
- Reserve the last 100 characters for a proper conclusion
|
| 50 |
+
- If you start running long, immediately wrap up
|
| 51 |
+
- End every piece with a clear conclusion
|
| 52 |
+
|
| 53 |
+
Content Requirements:
|
| 54 |
+
- Allow natural emotional progression
|
| 55 |
+
- Create an arc of connected moments
|
| 56 |
+
- Use efficient but expressive language
|
| 57 |
+
- Balance description with emotional depth
|
| 58 |
+
- Ensure perfect completion
|
| 59 |
+
- No meta-commentary or formatting
|
| 60 |
+
|
| 61 |
+
Structure for Emotional Pieces:
|
| 62 |
+
- Opening hook (50-75 characters)
|
| 63 |
+
- Emotional journey (200-250 characters)
|
| 64 |
+
- Resolution (75-100 characters)
|
| 65 |
+
|
| 66 |
+
MANDATORY: If you find yourself reaching 300 characters, immediately begin your conclusion regardless of
|
| 67 |
+
where you are in the narrative.
|
| 68 |
+
|
| 69 |
+
Remember: A shorter, complete response is ALWAYS better than a longer, truncated one."""
|
| 70 |
+
)
|
| 71 |
|
| 72 |
@dataclass(frozen=True)
|
| 73 |
class AnthropicConfig:
|
| 74 |
"""Immutable configuration for interacting with the Anthropic API."""
|
| 75 |
|
| 76 |
+
api_key: str = field(init=False)
|
| 77 |
+
system_prompt: str = field(init=False)
|
| 78 |
model: ModelParam = "claude-3-5-sonnet-latest"
|
| 79 |
max_tokens: int = 150
|
| 80 |
+
|
| 81 |
+
def __post_init__(self) -> None:
|
| 82 |
+
# Validate required non-computed attributes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
if not self.model:
|
| 84 |
raise ValueError("Anthropic Model is not set.")
|
| 85 |
if not self.max_tokens:
|
| 86 |
raise ValueError("Anthropic Max Tokens is not set.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# Compute the API key from the environment.
|
| 89 |
+
computed_api_key = validate_env_var("ANTHROPIC_API_KEY")
|
| 90 |
+
object.__setattr__(self, "api_key", computed_api_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
# Compute the system prompt using max_tokens and other logic.
|
| 93 |
+
computed_prompt = PROMPT_TEMPLATE.format(max_tokens=self.max_tokens)
|
| 94 |
+
object.__setattr__(self, "system_prompt", computed_prompt)
|
|
|
|
| 95 |
|
| 96 |
@property
|
| 97 |
def client(self) -> Anthropic:
|
|
|
|
| 127 |
class AnthropicError(Exception):
|
| 128 |
"""Custom exception for errors related to the Anthropic API."""
|
| 129 |
|
| 130 |
+
def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
|
| 131 |
super().__init__(message)
|
| 132 |
self.original_exception = original_exception
|
| 133 |
self.message = message
|
|
|
|
| 136 |
class UnretryableAnthropicError(AnthropicError):
|
| 137 |
"""Custom exception for errors related to the Anthropic API that should not be retried."""
|
| 138 |
|
| 139 |
+
def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
|
| 140 |
super().__init__(message, original_exception)
|
| 141 |
|
| 142 |
|
|
|
|
| 151 |
"""
|
| 152 |
Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
|
| 153 |
|
| 154 |
+
This function includes retry logic and error translation. It raises a custom
|
| 155 |
+
UnretryableAnthropicError for API errors deemed unretryable and AnthropicError
|
| 156 |
+
for other errors.
|
| 157 |
+
|
| 158 |
Args:
|
| 159 |
+
character_description (str): The input character description used to assist with generating text.
|
| 160 |
+
config (Config): Application configuration including Anthropic settings.
|
| 161 |
|
| 162 |
Returns:
|
| 163 |
str: The generated text.
|
| 164 |
|
| 165 |
Raises:
|
| 166 |
+
UnretryableAnthropicError: For errors that should not be retried.
|
| 167 |
+
AnthropicError: For other errors communicating with the Anthropic API.
|
| 168 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
try:
|
| 170 |
+
anthropic_config = config.anthropic_config
|
| 171 |
+
prompt = anthropic_config.build_expressive_prompt(character_description)
|
| 172 |
+
logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
|
| 173 |
+
|
| 174 |
+
# Ensure system_prompt is set (guaranteed by __post_init__)
|
| 175 |
+
assert anthropic_config.system_prompt is not None, "system_prompt must be set."
|
| 176 |
+
|
| 177 |
response: Message = anthropic_config.client.messages.create(
|
| 178 |
model=anthropic_config.model,
|
| 179 |
max_tokens=anthropic_config.max_tokens,
|
|
|
|
| 182 |
)
|
| 183 |
logger.debug(f"API response received: {truncate_text(str(response))}")
|
| 184 |
|
| 185 |
+
if not hasattr(response, "content") or response.content is None:
|
|
|
|
| 186 |
logger.error("Response is missing 'content'. Response: %s", response)
|
| 187 |
raise AnthropicError('Invalid API response: Missing "content".')
|
| 188 |
|
| 189 |
+
blocks: Union[List[Union[TextBlock, ToolUseBlock]], TextBlock, None] = response.content
|
| 190 |
+
|
| 191 |
if isinstance(blocks, list):
|
| 192 |
result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
|
| 193 |
logger.debug(f"Processed response from list: {truncate_text(result)}")
|
| 194 |
return result
|
| 195 |
+
|
| 196 |
if isinstance(blocks, TextBlock):
|
| 197 |
logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
|
| 198 |
return blocks.text
|
|
|
|
| 201 |
return str(blocks or "No content generated.")
|
| 202 |
|
| 203 |
except Exception as e:
|
| 204 |
+
# If the error is an APIError, check if it's unretryable.
|
| 205 |
+
if isinstance(e, APIError):
|
| 206 |
+
status_code: Optional[int] = getattr(e, "status_code", None)
|
| 207 |
+
if status_code is not None and CLIENT_ERROR_CODE <= status_code < SERVER_ERROR_CODE:
|
| 208 |
+
error_body: Any = e.body
|
| 209 |
+
error_message: str = "Unknown error"
|
| 210 |
+
if isinstance(error_body, dict):
|
| 211 |
+
error_message = cast(Dict[str, Any], error_body).get("error", {}).get("message", "Unknown error")
|
| 212 |
+
raise UnretryableAnthropicError(
|
| 213 |
+
message=f'"{error_message}"',
|
| 214 |
+
original_exception=e,
|
| 215 |
+
) from e
|
| 216 |
+
|
| 217 |
+
# For all other errors, wrap them in an AnthropicError.
|
| 218 |
raise AnthropicError(
|
| 219 |
+
message=str(e),
|
| 220 |
original_exception=e,
|
| 221 |
) from e
|
src/integrations/elevenlabs_api.py
CHANGED
|
@@ -22,7 +22,7 @@ Functions:
|
|
| 22 |
# Standard Library Imports
|
| 23 |
import logging
|
| 24 |
import random
|
| 25 |
-
from dataclasses import dataclass
|
| 26 |
from typing import Optional, Tuple
|
| 27 |
|
| 28 |
# Third-Party Library Imports
|
|
@@ -40,14 +40,17 @@ from src.utils import save_base64_audio_to_file, validate_env_var
|
|
| 40 |
class ElevenLabsConfig:
|
| 41 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
| 42 |
|
| 43 |
-
api_key:
|
| 44 |
output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
|
| 45 |
|
| 46 |
def __post_init__(self):
|
| 47 |
-
# Validate
|
| 48 |
-
if not self.
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
@property
|
| 53 |
def client(self) -> ElevenLabs:
|
|
@@ -83,7 +86,9 @@ class UnretryableElevenLabsError(ElevenLabsError):
|
|
| 83 |
after=after_log(logger, logging.DEBUG),
|
| 84 |
reraise=True,
|
| 85 |
)
|
| 86 |
-
def text_to_speech_with_elevenlabs(
|
|
|
|
|
|
|
| 87 |
"""
|
| 88 |
Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
|
| 89 |
|
|
@@ -94,7 +99,7 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
|
|
| 94 |
Returns:
|
| 95 |
Tuple[None, str]: A tuple containing:
|
| 96 |
- generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
|
| 97 |
-
across TTS integrations
|
| 98 |
- file_path (str): The relative file path to the audio file where the synthesized speech was saved.
|
| 99 |
|
| 100 |
Raises:
|
|
@@ -129,7 +134,11 @@ def text_to_speech_with_elevenlabs(character_description: str, text: str, config
|
|
| 129 |
return None, audio_file_path
|
| 130 |
|
| 131 |
except Exception as e:
|
| 132 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
raise UnretryableElevenLabsError(
|
| 134 |
message=f"{e.body['detail']['message']}",
|
| 135 |
original_exception=e,
|
|
|
|
| 22 |
# Standard Library Imports
|
| 23 |
import logging
|
| 24 |
import random
|
| 25 |
+
from dataclasses import dataclass, field
|
| 26 |
from typing import Optional, Tuple
|
| 27 |
|
| 28 |
# Third-Party Library Imports
|
|
|
|
| 40 |
class ElevenLabsConfig:
|
| 41 |
"""Immutable configuration for interacting with the ElevenLabs TTS API."""
|
| 42 |
|
| 43 |
+
api_key: str = field(init=False)
|
| 44 |
output_format: TextToVoiceCreatePreviewsRequestOutputFormat = "mp3_44100_128"
|
| 45 |
|
| 46 |
def __post_init__(self):
|
| 47 |
+
# Validate required attributes.
|
| 48 |
+
if not self.output_format:
|
| 49 |
+
raise ValueError("ElevenLabs TTS API output format is not set.")
|
| 50 |
+
|
| 51 |
+
# Compute the API key from the environment.
|
| 52 |
+
computed_key = validate_env_var("ELEVENLABS_API_KEY")
|
| 53 |
+
object.__setattr__(self, "api_key", computed_key)
|
| 54 |
|
| 55 |
@property
|
| 56 |
def client(self) -> ElevenLabs:
|
|
|
|
| 86 |
after=after_log(logger, logging.DEBUG),
|
| 87 |
reraise=True,
|
| 88 |
)
|
| 89 |
+
def text_to_speech_with_elevenlabs(
|
| 90 |
+
character_description: str, text: str, config: Config
|
| 91 |
+
) -> Tuple[None, str]:
|
| 92 |
"""
|
| 93 |
Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
|
| 94 |
|
|
|
|
| 99 |
Returns:
|
| 100 |
Tuple[None, str]: A tuple containing:
|
| 101 |
- generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
|
| 102 |
+
across TTS integrations.
|
| 103 |
- file_path (str): The relative file path to the audio file where the synthesized speech was saved.
|
| 104 |
|
| 105 |
Raises:
|
|
|
|
| 134 |
return None, audio_file_path
|
| 135 |
|
| 136 |
except Exception as e:
|
| 137 |
+
if (
|
| 138 |
+
isinstance(e, ApiError)
|
| 139 |
+
and e.status_code is not None
|
| 140 |
+
and CLIENT_ERROR_CODE <= e.status_code < SERVER_ERROR_CODE
|
| 141 |
+
):
|
| 142 |
raise UnretryableElevenLabsError(
|
| 143 |
message=f"{e.body['detail']['message']}",
|
| 144 |
original_exception=e,
|
src/integrations/hume_api.py
CHANGED
|
@@ -20,8 +20,8 @@ Functions:
|
|
| 20 |
|
| 21 |
# Standard Library Imports
|
| 22 |
import logging
|
| 23 |
-
from dataclasses import dataclass
|
| 24 |
-
from typing import Any, Dict, Literal,
|
| 25 |
|
| 26 |
# Third-Party Library Imports
|
| 27 |
import requests
|
|
@@ -34,43 +34,44 @@ from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
|
|
| 34 |
from src.utils import save_base64_audio_to_file, validate_env_var
|
| 35 |
|
| 36 |
HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
|
| 37 |
-
"""
|
| 38 |
|
| 39 |
|
| 40 |
@dataclass(frozen=True)
|
| 41 |
class HumeConfig:
|
| 42 |
"""Immutable configuration for interacting with the Hume TTS API."""
|
| 43 |
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
url: str = "https://test-api.hume.ai/v0/tts/octave"
|
| 46 |
-
headers: dict = None
|
| 47 |
file_format: HumeSupportedFileFormat = "mp3"
|
| 48 |
|
| 49 |
-
def __post_init__(self):
|
| 50 |
-
# Validate required attributes
|
| 51 |
-
if not self.api_key:
|
| 52 |
-
api_key = validate_env_var("HUME_API_KEY")
|
| 53 |
-
object.__setattr__(self, "api_key", api_key)
|
| 54 |
if not self.url:
|
| 55 |
raise ValueError("Hume TTS endpoint URL is not set.")
|
| 56 |
if not self.file_format:
|
| 57 |
raise ValueError("Hume TTS file format is not set.")
|
| 58 |
|
| 59 |
-
#
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
|
|
|
| 68 |
|
| 69 |
|
| 70 |
class HumeError(Exception):
|
| 71 |
"""Custom exception for errors related to the Hume TTS API."""
|
| 72 |
|
| 73 |
-
def __init__(self, message: str, original_exception:
|
| 74 |
super().__init__(message)
|
| 75 |
self.original_exception = original_exception
|
| 76 |
self.message = message
|
|
@@ -79,14 +80,11 @@ class HumeError(Exception):
|
|
| 79 |
class UnretryableHumeError(HumeError):
|
| 80 |
"""Custom exception for errors related to the Hume TTS API that should not be retried."""
|
| 81 |
|
| 82 |
-
def __init__(self, message: str, original_exception:
|
| 83 |
-
super().__init__(message)
|
| 84 |
self.original_exception = original_exception
|
| 85 |
|
| 86 |
|
| 87 |
-
# Initialize the Hume client
|
| 88 |
-
|
| 89 |
-
|
| 90 |
@retry(
|
| 91 |
stop=stop_after_attempt(3),
|
| 92 |
wait=wait_fixed(2),
|
|
@@ -95,7 +93,10 @@ class UnretryableHumeError(HumeError):
|
|
| 95 |
reraise=True,
|
| 96 |
)
|
| 97 |
def text_to_speech_with_hume(
|
| 98 |
-
character_description: str,
|
|
|
|
|
|
|
|
|
|
| 99 |
) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
| 100 |
"""
|
| 101 |
Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
|
|
@@ -110,9 +111,10 @@ def text_to_speech_with_hume(
|
|
| 110 |
character_description (str): A description of the character, which is used as contextual input
|
| 111 |
for generating the voice.
|
| 112 |
text (str): The text to be converted to speech.
|
| 113 |
-
num_generations (int
|
| 114 |
Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
|
| 115 |
-
generation is expected in the API response.
|
|
|
|
| 116 |
|
| 117 |
Returns:
|
| 118 |
Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
|
@@ -137,9 +139,7 @@ def text_to_speech_with_hume(
|
|
| 137 |
hume_config = config.hume_config
|
| 138 |
request_body = {
|
| 139 |
"utterances": [{"text": text, "description": character_description or None}],
|
| 140 |
-
"format": {
|
| 141 |
-
"type": hume_config.file_format,
|
| 142 |
-
},
|
| 143 |
"num_generations": num_generations,
|
| 144 |
}
|
| 145 |
|
|
@@ -159,7 +159,7 @@ def text_to_speech_with_hume(
|
|
| 159 |
logger.error(msg)
|
| 160 |
raise HumeError(msg)
|
| 161 |
|
| 162 |
-
# Extract the base64 encoded audio and generation ID from the generation
|
| 163 |
generation_a = generations[0]
|
| 164 |
generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
|
| 165 |
|
|
@@ -171,7 +171,11 @@ def text_to_speech_with_hume(
|
|
| 171 |
return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
|
| 172 |
|
| 173 |
except Exception as e:
|
| 174 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
raise UnretryableHumeError(
|
| 176 |
message=f"{e.response.text}",
|
| 177 |
original_exception=e,
|
|
@@ -197,6 +201,7 @@ def parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tup
|
|
| 197 |
Expected keys are:
|
| 198 |
- "generation_id" (str): A unique identifier for the generated audio.
|
| 199 |
- "audio" (str): A base64 encoded string of the audio data.
|
|
|
|
| 200 |
|
| 201 |
Returns:
|
| 202 |
Tuple[str, str]: A tuple containing:
|
|
|
|
| 20 |
|
| 21 |
# Standard Library Imports
|
| 22 |
import logging
|
| 23 |
+
from dataclasses import dataclass, field
|
| 24 |
+
from typing import Any, Dict, Literal, Tuple, Union
|
| 25 |
|
| 26 |
# Third-Party Library Imports
|
| 27 |
import requests
|
|
|
|
| 34 |
from src.utils import save_base64_audio_to_file, validate_env_var
|
| 35 |
|
| 36 |
HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
|
| 37 |
+
"""Supported audio file formats for the Hume TTS API"""
|
| 38 |
|
| 39 |
|
| 40 |
@dataclass(frozen=True)
|
| 41 |
class HumeConfig:
|
| 42 |
"""Immutable configuration for interacting with the Hume TTS API."""
|
| 43 |
|
| 44 |
+
# Computed fields.
|
| 45 |
+
api_key: str = field(init=False)
|
| 46 |
+
headers: Dict[str, str] = field(init=False)
|
| 47 |
+
|
| 48 |
+
# Provided fields.
|
| 49 |
url: str = "https://test-api.hume.ai/v0/tts/octave"
|
|
|
|
| 50 |
file_format: HumeSupportedFileFormat = "mp3"
|
| 51 |
|
| 52 |
+
def __post_init__(self) -> None:
|
| 53 |
+
# Validate required attributes.
|
|
|
|
|
|
|
|
|
|
| 54 |
if not self.url:
|
| 55 |
raise ValueError("Hume TTS endpoint URL is not set.")
|
| 56 |
if not self.file_format:
|
| 57 |
raise ValueError("Hume TTS file format is not set.")
|
| 58 |
|
| 59 |
+
# Compute the API key from the environment.
|
| 60 |
+
computed_api_key = validate_env_var("HUME_API_KEY")
|
| 61 |
+
object.__setattr__(self, "api_key", computed_api_key)
|
| 62 |
+
|
| 63 |
+
# Compute the headers.
|
| 64 |
+
computed_headers = {
|
| 65 |
+
"X-Hume-Api-Key": f"{computed_api_key}",
|
| 66 |
+
"Content-Type": "application/json",
|
| 67 |
+
}
|
| 68 |
+
object.__setattr__(self, "headers", computed_headers)
|
| 69 |
|
| 70 |
|
| 71 |
class HumeError(Exception):
|
| 72 |
"""Custom exception for errors related to the Hume TTS API."""
|
| 73 |
|
| 74 |
+
def __init__(self, message: str, original_exception: Union[Exception, None] = None):
|
| 75 |
super().__init__(message)
|
| 76 |
self.original_exception = original_exception
|
| 77 |
self.message = message
|
|
|
|
| 80 |
class UnretryableHumeError(HumeError):
|
| 81 |
"""Custom exception for errors related to the Hume TTS API that should not be retried."""
|
| 82 |
|
| 83 |
+
def __init__(self, message: str, original_exception: Union[Exception, None] = None):
|
| 84 |
+
super().__init__(message, original_exception)
|
| 85 |
self.original_exception = original_exception
|
| 86 |
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
@retry(
|
| 89 |
stop=stop_after_attempt(3),
|
| 90 |
wait=wait_fixed(2),
|
|
|
|
| 93 |
reraise=True,
|
| 94 |
)
|
| 95 |
def text_to_speech_with_hume(
|
| 96 |
+
character_description: str,
|
| 97 |
+
text: str,
|
| 98 |
+
num_generations: int,
|
| 99 |
+
config: Config,
|
| 100 |
) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
| 101 |
"""
|
| 102 |
Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
|
|
|
|
| 111 |
character_description (str): A description of the character, which is used as contextual input
|
| 112 |
for generating the voice.
|
| 113 |
text (str): The text to be converted to speech.
|
| 114 |
+
num_generations (int): The number of audio generations to request from the API.
|
| 115 |
Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
|
| 116 |
+
generation is expected in the API response.
|
| 117 |
+
config (Config): The application configuration containing Hume API settings.
|
| 118 |
|
| 119 |
Returns:
|
| 120 |
Union[Tuple[str, str], Tuple[str, str, str, str]]:
|
|
|
|
| 139 |
hume_config = config.hume_config
|
| 140 |
request_body = {
|
| 141 |
"utterances": [{"text": text, "description": character_description or None}],
|
| 142 |
+
"format": {"type": hume_config.file_format},
|
|
|
|
|
|
|
| 143 |
"num_generations": num_generations,
|
| 144 |
}
|
| 145 |
|
|
|
|
| 159 |
logger.error(msg)
|
| 160 |
raise HumeError(msg)
|
| 161 |
|
| 162 |
+
# Extract the base64 encoded audio and generation ID from the generation.
|
| 163 |
generation_a = generations[0]
|
| 164 |
generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
|
| 165 |
|
|
|
|
| 171 |
return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
|
| 172 |
|
| 173 |
except Exception as e:
|
| 174 |
+
if (
|
| 175 |
+
isinstance(e, HTTPError)
|
| 176 |
+
and e.response is not None
|
| 177 |
+
and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
|
| 178 |
+
):
|
| 179 |
raise UnretryableHumeError(
|
| 180 |
message=f"{e.response.text}",
|
| 181 |
original_exception=e,
|
|
|
|
| 201 |
Expected keys are:
|
| 202 |
- "generation_id" (str): A unique identifier for the generated audio.
|
| 203 |
- "audio" (str): A base64 encoded string of the audio data.
|
| 204 |
+
config (Config): The application configuration used for saving the audio file.
|
| 205 |
|
| 206 |
Returns:
|
| 207 |
Tuple[str, str]: A tuple containing:
|