Run_code_api / src /services /tts_service.py
ABAO77's picture
feat: text to speech for AI response
7f15e1c
raw
history blame
4.03 kB
"""
Text-to-Speech (TTS) Service using Deepgram API
"""
import requests
import os
import base64
from src.utils.logger import logger
from typing import Optional
class TTSService:
"""Service for handling text-to-speech conversion using Deepgram API"""
def __init__(self):
self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY")
self.base_url = "https://api.deepgram.com/v1/speak"
self.default_model = "aura-2-thalia-en"
if not self.api_key:
logger.error("Deepgram API key not found in environment variables")
raise ValueError("Deepgram API key is required")
async def text_to_speech(
self,
text: str,
model: Optional[str] = None,
format: str = "mp3"
) -> Optional[dict]:
"""
Convert text to speech using Deepgram API
Args:
text (str): The text to convert to speech
model (str): The TTS model to use (default: aura-2-thalia-en)
format (str): Audio format (default: mp3)
Returns:
dict: Contains audio data and metadata, or None if failed
"""
try:
if not text or not text.strip():
logger.warning("Empty text provided for TTS conversion")
return None
# Clean and prepare text
cleaned_text = text.strip()
if len(cleaned_text) > 2000: # Limit text length for TTS
cleaned_text = cleaned_text[:2000] + "..."
logger.warning(f"Text truncated to 2000 characters for TTS")
# Prepare request
url = self.base_url
querystring = {"model": model or self.default_model}
payload = {"text": cleaned_text}
headers = {
"Authorization": f"Token {self.api_key}",
"Content-Type": "application/json"
}
logger.info(f"Converting text to speech: {cleaned_text[:100]}...")
# Make request to Deepgram API
response = requests.post(
url,
json=payload,
headers=headers,
params=querystring,
timeout=30
)
if response.status_code == 200:
# Encode audio data as base64
audio_data = response.content
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
# Determine MIME type based on format
mime_type = f"audio/{format}"
if format == "mp3":
mime_type = "audio/mpeg"
elif format == "wav":
mime_type = "audio/wav"
result = {
"audio_data": audio_base64,
"mime_type": mime_type,
"format": format,
"text": cleaned_text,
"model": model or self.default_model,
"size_bytes": len(audio_data)
}
logger.info(f"TTS conversion successful: {len(audio_data)} bytes")
return result
else:
logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}")
return None
except requests.exceptions.Timeout:
logger.error("TTS request timed out")
return None
except requests.exceptions.RequestException as e:
logger.error(f"TTS request failed: {str(e)}")
return None
except Exception as e:
logger.error(f"Unexpected error in TTS conversion: {str(e)}")
return None
def is_available(self) -> bool:
"""Check if TTS service is available"""
return bool(self.api_key)
# Global TTS service instance
tts_service = TTSService()