Spaces:
Sleeping
Sleeping
| """ | |
| Text-to-Speech (TTS) Service using Deepgram API | |
| """ | |
| import requests | |
| import os | |
| import base64 | |
| from src.utils.logger import logger | |
| from typing import Optional | |
| class TTSService: | |
| """Service for handling text-to-speech conversion using Deepgram API""" | |
| def __init__(self): | |
| self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY") | |
| self.base_url = "https://api.deepgram.com/v1/speak" | |
| self.default_model = "aura-2-thalia-en" | |
| if not self.api_key: | |
| logger.error("Deepgram API key not found in environment variables") | |
| raise ValueError("Deepgram API key is required") | |
| async def text_to_speech( | |
| self, | |
| text: str, | |
| model: Optional[str] = None, | |
| format: str = "mp3" | |
| ) -> Optional[dict]: | |
| """ | |
| Convert text to speech using Deepgram API | |
| Args: | |
| text (str): The text to convert to speech | |
| model (str): The TTS model to use (default: aura-2-thalia-en) | |
| format (str): Audio format (default: mp3) | |
| Returns: | |
| dict: Contains audio data and metadata, or None if failed | |
| """ | |
| try: | |
| if not text or not text.strip(): | |
| logger.warning("Empty text provided for TTS conversion") | |
| return None | |
| # Clean and prepare text | |
| cleaned_text = text.strip() | |
| if len(cleaned_text) > 2000: # Limit text length for TTS | |
| cleaned_text = cleaned_text[:2000] + "..." | |
| logger.warning(f"Text truncated to 2000 characters for TTS") | |
| # Prepare request | |
| url = self.base_url | |
| querystring = {"model": model or self.default_model} | |
| payload = {"text": cleaned_text} | |
| headers = { | |
| "Authorization": f"Token {self.api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| logger.info(f"Converting text to speech: {cleaned_text[:100]}...") | |
| # Make request to Deepgram API | |
| response = requests.post( | |
| url, | |
| json=payload, | |
| headers=headers, | |
| params=querystring, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| # Encode audio data as base64 | |
| audio_data = response.content | |
| audio_base64 = base64.b64encode(audio_data).decode('utf-8') | |
| # Determine MIME type based on format | |
| mime_type = f"audio/{format}" | |
| if format == "mp3": | |
| mime_type = "audio/mpeg" | |
| elif format == "wav": | |
| mime_type = "audio/wav" | |
| result = { | |
| "audio_data": audio_base64, | |
| "mime_type": mime_type, | |
| "format": format, | |
| "text": cleaned_text, | |
| "model": model or self.default_model, | |
| "size_bytes": len(audio_data) | |
| } | |
| logger.info(f"TTS conversion successful: {len(audio_data)} bytes") | |
| return result | |
| else: | |
| logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}") | |
| return None | |
| except requests.exceptions.Timeout: | |
| logger.error("TTS request timed out") | |
| return None | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"TTS request failed: {str(e)}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Unexpected error in TTS conversion: {str(e)}") | |
| return None | |
| def is_available(self) -> bool: | |
| """Check if TTS service is available""" | |
| return bool(self.api_key) | |
| # Global TTS service instance | |
| tts_service = TTSService() |