Spaces:
Sleeping
Sleeping
| """ | |
| Text-to-Speech (TTS) Service using Deepgram API | |
| """ | |
| import requests | |
| import os | |
| import base64 | |
| import re | |
| from src.utils.logger import logger | |
| from typing import Optional | |
| class TTSService: | |
| """Service for handling text-to-speech conversion using Deepgram API""" | |
| def __init__(self): | |
| self.api_key = os.getenv("YOUR_DEEPGRAM_API_KEY") | |
| self.base_url = "https://api.deepgram.com/v1/speak" | |
| self.default_model = "aura-2-thalia-en" | |
| if not self.api_key: | |
| logger.error("Deepgram API key not found in environment variables") | |
| raise ValueError("Deepgram API key is required") | |
| def clean_text_for_speech(self, text: str) -> str: | |
| """ | |
| Clean text for speech synthesis by removing problematic characters | |
| Args: | |
| text (str): The text to clean | |
| Returns: | |
| str: Cleaned text suitable for speech synthesis | |
| """ | |
| if not text or not isinstance(text, str): | |
| return "" | |
| # Remove markdown formatting | |
| text = re.sub(r'\*\*(.*?)\*\*', r'\1', text) # Remove bold **text** | |
| text = re.sub(r'\*(.*?)\*', r'\1', text) # Remove italic *text* | |
| text = re.sub(r'`(.*?)`', r'\1', text) # Remove code `text` | |
| text = re.sub(r'#{1,6}\s', '', text) # Remove headers # ## ### | |
| text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text) # Remove links [text](url) -> text | |
| # Remove emojis and special unicode characters | |
| # Emoticons | |
| text = re.sub(r'[\U0001F600-\U0001F64F]', '', text) | |
| # Misc symbols | |
| text = re.sub(r'[\U0001F300-\U0001F5FF]', '', text) | |
| # Transport & map | |
| text = re.sub(r'[\U0001F680-\U0001F6FF]', '', text) | |
| # Regional indicators | |
| text = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', text) | |
| # Misc symbols | |
| text = re.sub(r'[\U00002600-\U000026FF]', '', text) | |
| # Dingbats | |
| text = re.sub(r'[\U00002700-\U000027BF]', '', text) | |
| # Variation selectors | |
| text = re.sub(r'[\U0000FE00-\U0000FE0F]', '', text) | |
| # Supplemental symbols | |
| text = re.sub(r'[\U0001F900-\U0001F9FF]', '', text) | |
| # Remove problematic punctuation and special characters | |
| text = re.sub(r'[""'']', '"', text) # Replace smart quotes with regular quotes | |
| text = re.sub(r'[–—]', '-', text) # Replace em/en dashes with hyphens | |
| text = re.sub(r'[…]', '...', text) # Replace ellipsis character | |
| text = re.sub(r'[«»]', '"', text) # Replace angle quotes | |
| text = re.sub(r'[‹›]', "'", text) # Replace single angle quotes | |
| # Remove control characters and zero-width characters | |
| text = re.sub(r'[\u200B-\u200D\uFEFF]', '', text) # Zero-width chars | |
| text = re.sub(r'[\u0000-\u001F\u007F-\u009F]', '', text) # Control chars | |
| # Clean up extra whitespace | |
| text = re.sub(r'\s+', ' ', text) # Multiple spaces to single space | |
| text = text.strip() # Trim leading/trailing spaces | |
| # Remove multiple consecutive punctuation | |
| text = re.sub(r'\.{3,}', '...', text) # Multiple dots to ellipsis | |
| text = re.sub(r'!{2,}', '!', text) # Multiple exclamations to single | |
| text = re.sub(r'\?{2,}', '?', text) # Multiple questions to single | |
| # Ensure proper sentence endings | |
| text = re.sub(r'([.!?])\s*([A-Z])', r'\1 \2', text) # Space after sentence endings | |
| return text | |
| async def text_to_speech( | |
| self, | |
| text: str, | |
| model: Optional[str] = None, | |
| format: str = "mp3" | |
| ) -> Optional[dict]: | |
| """ | |
| Convert text to speech using Deepgram API | |
| Args: | |
| text (str): The text to convert to speech | |
| model (str): The TTS model to use (default: aura-2-thalia-en) | |
| format (str): Audio format (default: mp3) | |
| Returns: | |
| dict: Contains audio data and metadata, or None if failed | |
| """ | |
| try: | |
| if not text or not text.strip(): | |
| logger.warning("Empty text provided for TTS conversion") | |
| return None | |
| # Clean and prepare text | |
| cleaned_text = self.clean_text_for_speech(text) | |
| if not cleaned_text or not cleaned_text.strip(): | |
| logger.warning("Text became empty after cleaning for TTS") | |
| return None | |
| if len(cleaned_text) > 2000: # Limit text length for TTS | |
| cleaned_text = cleaned_text[:2000] + "..." | |
| logger.warning(f"Text truncated to 2000 characters for TTS") | |
| # Prepare request | |
| url = self.base_url | |
| querystring = {"model": model or self.default_model} | |
| payload = {"text": cleaned_text} | |
| headers = { | |
| "Authorization": f"Token {self.api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| logger.info(f"Converting text to speech: '{cleaned_text[:100]}...' (original: '{text[:50]}...')") | |
| # Make request to Deepgram API | |
| response = requests.post( | |
| url, | |
| json=payload, | |
| headers=headers, | |
| params=querystring, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| # Encode audio data as base64 | |
| audio_data = response.content | |
| audio_base64 = base64.b64encode(audio_data).decode('utf-8') | |
| # Determine MIME type based on format | |
| mime_type = f"audio/{format}" | |
| if format == "mp3": | |
| mime_type = "audio/mpeg" | |
| elif format == "wav": | |
| mime_type = "audio/wav" | |
| result = { | |
| "audio_data": audio_base64, | |
| "mime_type": mime_type, | |
| "format": format, | |
| "text": cleaned_text, | |
| "model": model or self.default_model, | |
| "size_bytes": len(audio_data) | |
| } | |
| logger.info(f"TTS conversion successful: {len(audio_data)} bytes") | |
| return result | |
| else: | |
| logger.error(f"Deepgram TTS API error: {response.status_code} - {response.text}") | |
| return None | |
| except requests.exceptions.Timeout: | |
| logger.error("TTS request timed out") | |
| return None | |
| except requests.exceptions.RequestException as e: | |
| logger.error(f"TTS request failed: {str(e)}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Unexpected error in TTS conversion: {str(e)}") | |
| return None | |
| def is_available(self) -> bool: | |
| """Check if TTS service is available""" | |
| return bool(self.api_key) | |
| # Global TTS service instance | |
| tts_service = TTSService() |