Spaces:

lapnt3
/

my-gradio-app

Runtime error

File size: 6,619 Bytes

eeb0f9c

"""
Text-to-Speech Module
Handles text-to-speech conversion with Vietnamese language support
"""

import logging
import tempfile
import os
from pathlib import Path
from gtts import gTTS
import io
import base64

logger = logging.getLogger(__name__)

class VietnameseTTS:
    """Vietnamese Text-to-Speech class using gTTS"""
    
    def __init__(self, language='vi', slow=False):
        """
        Initialize Vietnamese TTS
        
        Args:
            language (str): Language code (default: 'vi' for Vietnamese)
            slow (bool): Whether to speak slowly (default: False)
        """
        self.language = language
        self.slow = slow
        
    def text_to_speech(self, text, output_path=None):
        """
        Convert text to speech and save as audio file
        
        Args:
            text (str): Text to convert to speech
            output_path (str, optional): Path to save audio file. If None, returns temp file path
            
        Returns:
            str: Path to the generated audio file
            
        Raises:
            Exception: If TTS conversion fails
        """
        try:
            if not text or not text.strip():
                raise ValueError("Text cannot be empty")
                
            # Create gTTS object
            tts = gTTS(text=text.strip(), lang=self.language, slow=self.slow)
            
            # If no output path specified, create temporary file
            if output_path is None:
                temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
                output_path = temp_file.name
                temp_file.close()
            
            # Save audio file
            tts.save(output_path)
            
            logger.info(f"TTS audio saved to: {output_path}")
            return output_path
            
        except Exception as e:
            logger.error(f"TTS conversion failed: {str(e)}")
            raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}")
    
    def text_to_speech_bytes(self, text):
        """
        Convert text to speech and return as bytes
        
        Args:
            text (str): Text to convert to speech
            
        Returns:
            bytes: Audio data as bytes
            
        Raises:
            Exception: If TTS conversion fails
        """
        try:
            if not text or not text.strip():
                raise ValueError("Text cannot be empty")
                
            # Create gTTS object
            tts = gTTS(text=text.strip(), lang=self.language, slow=self.slow)
            
            # Save to BytesIO buffer
            audio_buffer = io.BytesIO()
            tts.write_to_fp(audio_buffer)
            audio_buffer.seek(0)
            
            return audio_buffer.getvalue()
            
        except Exception as e:
            logger.error(f"TTS conversion to bytes failed: {str(e)}")
            raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}")
    
    def text_to_speech_base64(self, text):
        """
        Convert text to speech and return as base64 encoded string
        
        Args:
            text (str): Text to convert to speech
            
        Returns:
            str: Base64 encoded audio data
            
        Raises:
            Exception: If TTS conversion fails
        """
        try:
            audio_bytes = self.text_to_speech_bytes(text)
            return base64.b64encode(audio_bytes).decode('utf-8')
            
        except Exception as e:
            logger.error(f"TTS conversion to base64 failed: {str(e)}")
            raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}")


# Global TTS instance
_tts_instance = None

def get_tts_instance():
    """Get or create global TTS instance"""
    global _tts_instance
    if _tts_instance is None:
        _tts_instance = VietnameseTTS()
    return _tts_instance

def text_to_speech(text, output_path=None):
    """
    Convenience function to convert text to speech
    
    Args:
        text (str): Text to convert to speech
        output_path (str, optional): Path to save audio file
        
    Returns:
        str: Path to the generated audio file
    """
    tts = get_tts_instance()
    return tts.text_to_speech(text, output_path)

def text_to_speech_bytes(text):
    """
    Convenience function to convert text to speech bytes
    
    Args:
        text (str): Text to convert to speech
        
    Returns:
        bytes: Audio data as bytes
    """
    tts = get_tts_instance()
    return tts.text_to_speech_bytes(text)

def text_to_speech_base64(text):
    """
    Convenience function to convert text to speech base64
    
    Args:
        text (str): Text to convert to speech
        
    Returns:
        str: Base64 encoded audio data
    """
    tts = get_tts_instance()
    return tts.text_to_speech_base64(text)

def cleanup_temp_files(file_path):
    """
    Clean up temporary audio files
    
    Args:
        file_path (str): Path to the file to delete
    """
    try:
        if file_path and os.path.exists(file_path):
            os.unlink(file_path)
            logger.info(f"Cleaned up temp file: {file_path}")
    except Exception as e:
        logger.warning(f"Failed to cleanup temp file {file_path}: {str(e)}")

def is_vietnamese_text(text):
    """
    Check if text contains Vietnamese characters
    
    Args:
        text (str): Text to check
        
    Returns:
        bool: True if text contains Vietnamese characters
    """
    vietnamese_chars = set('àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ')
    vietnamese_chars.update('ÀÁẠẢÃÂẦẤẬẨẪĂẰẮẶẲẴÈÉẸẺẼÊỀẾỆỂỄÌÍỊỈĨÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠÙÚỤỦŨƯỪỨỰỬỮỲÝỴỶỸĐ')
    
    return any(char in vietnamese_chars for char in text.lower())

def get_supported_languages():
    """
    Get list of supported languages for TTS
    
    Returns:
        dict: Dictionary of language codes and names
    """
    return {
        'vi': 'Tiếng Việt',
        'en': 'English',
        'zh': '中文',
        'ja': '日本語',
        'ko': '한국어',
        'th': 'ไทย',
        'fr': 'Français',
        'de': 'Deutsch',
        'es': 'Español',
        'it': 'Italiano',
        'pt': 'Português',
        'ru': 'Русский'
    }