my-gradio-app / utils /text_to_speech.py
Nguyen Trong Lap
Recreate history without binary blobs
eeb0f9c
"""
Text-to-Speech Module
Handles text-to-speech conversion with Vietnamese language support
"""
import logging
import tempfile
import os
from pathlib import Path
from gtts import gTTS
import io
import base64
logger = logging.getLogger(__name__)
class VietnameseTTS:
"""Vietnamese Text-to-Speech class using gTTS"""
def __init__(self, language='vi', slow=False):
"""
Initialize Vietnamese TTS
Args:
language (str): Language code (default: 'vi' for Vietnamese)
slow (bool): Whether to speak slowly (default: False)
"""
self.language = language
self.slow = slow
def text_to_speech(self, text, output_path=None):
"""
Convert text to speech and save as audio file
Args:
text (str): Text to convert to speech
output_path (str, optional): Path to save audio file. If None, returns temp file path
Returns:
str: Path to the generated audio file
Raises:
Exception: If TTS conversion fails
"""
try:
if not text or not text.strip():
raise ValueError("Text cannot be empty")
# Create gTTS object
tts = gTTS(text=text.strip(), lang=self.language, slow=self.slow)
# If no output path specified, create temporary file
if output_path is None:
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
output_path = temp_file.name
temp_file.close()
# Save audio file
tts.save(output_path)
logger.info(f"TTS audio saved to: {output_path}")
return output_path
except Exception as e:
logger.error(f"TTS conversion failed: {str(e)}")
raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}")
def text_to_speech_bytes(self, text):
"""
Convert text to speech and return as bytes
Args:
text (str): Text to convert to speech
Returns:
bytes: Audio data as bytes
Raises:
Exception: If TTS conversion fails
"""
try:
if not text or not text.strip():
raise ValueError("Text cannot be empty")
# Create gTTS object
tts = gTTS(text=text.strip(), lang=self.language, slow=self.slow)
# Save to BytesIO buffer
audio_buffer = io.BytesIO()
tts.write_to_fp(audio_buffer)
audio_buffer.seek(0)
return audio_buffer.getvalue()
except Exception as e:
logger.error(f"TTS conversion to bytes failed: {str(e)}")
raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}")
def text_to_speech_base64(self, text):
"""
Convert text to speech and return as base64 encoded string
Args:
text (str): Text to convert to speech
Returns:
str: Base64 encoded audio data
Raises:
Exception: If TTS conversion fails
"""
try:
audio_bytes = self.text_to_speech_bytes(text)
return base64.b64encode(audio_bytes).decode('utf-8')
except Exception as e:
logger.error(f"TTS conversion to base64 failed: {str(e)}")
raise Exception(f"Không thể chuyển đổi văn bản thành giọng nói: {str(e)}")
# Global TTS instance
_tts_instance = None
def get_tts_instance():
"""Get or create global TTS instance"""
global _tts_instance
if _tts_instance is None:
_tts_instance = VietnameseTTS()
return _tts_instance
def text_to_speech(text, output_path=None):
"""
Convenience function to convert text to speech
Args:
text (str): Text to convert to speech
output_path (str, optional): Path to save audio file
Returns:
str: Path to the generated audio file
"""
tts = get_tts_instance()
return tts.text_to_speech(text, output_path)
def text_to_speech_bytes(text):
"""
Convenience function to convert text to speech bytes
Args:
text (str): Text to convert to speech
Returns:
bytes: Audio data as bytes
"""
tts = get_tts_instance()
return tts.text_to_speech_bytes(text)
def text_to_speech_base64(text):
"""
Convenience function to convert text to speech base64
Args:
text (str): Text to convert to speech
Returns:
str: Base64 encoded audio data
"""
tts = get_tts_instance()
return tts.text_to_speech_base64(text)
def cleanup_temp_files(file_path):
"""
Clean up temporary audio files
Args:
file_path (str): Path to the file to delete
"""
try:
if file_path and os.path.exists(file_path):
os.unlink(file_path)
logger.info(f"Cleaned up temp file: {file_path}")
except Exception as e:
logger.warning(f"Failed to cleanup temp file {file_path}: {str(e)}")
def is_vietnamese_text(text):
"""
Check if text contains Vietnamese characters
Args:
text (str): Text to check
Returns:
bool: True if text contains Vietnamese characters
"""
vietnamese_chars = set('àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ')
vietnamese_chars.update('ÀÁẠẢÃÂẦẤẬẨẪĂẰẮẶẲẴÈÉẸẺẼÊỀẾỆỂỄÌÍỊỈĨÒÓỌỎÕÔỒỐỘỔỖƠỜỚỢỞỠÙÚỤỦŨƯỪỨỰỬỮỲÝỴỶỸĐ')
return any(char in vietnamese_chars for char in text.lower())
def get_supported_languages():
"""
Get list of supported languages for TTS
Returns:
dict: Dictionary of language codes and names
"""
return {
'vi': 'Tiếng Việt',
'en': 'English',
'zh': '中文',
'ja': '日本語',
'ko': '한국어',
'th': 'ไทย',
'fr': 'Français',
'de': 'Deutsch',
'es': 'Español',
'it': 'Italiano',
'pt': 'Português',
'ru': 'Русский'
}