Spaces:
Sleeping
Sleeping
| # translation_utils.py | |
| import logging | |
| import sqlite3 | |
| from concurrent.futures import ThreadPoolExecutor | |
| import functools | |
| from deep_translator import GoogleTranslator, exceptions | |
| # Constants | |
| TRANSLATION_DATABASE_FILE = 'translation_database.db' | |
| SUPPORTED_LANGUAGES = {"af", "sq", "am", "ar", "hy", "az", "eu", "be", "bn", "bs", "bg", "ca", "ceb", "ny", "zh-CN", "zh-TW", "co", "hr", "cs", "da", "nl", "en", "eo", "et", "tl", "fi", "fr", "fy", "gl", "ka", "de", "el", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id", "ga", "it", "ja", "jw", "kn", "kk", "km", "ko", "ku", "ky", "lo", "la", "lv", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn", "my", "ne", "no", "ps", "fa", "pl", "pt", "pa", "ro", "ru", "sm", "gd", "sr", "st", "sn", "sd", "si", "sk", "sl", "so", "es", "su", "sw", "sv", "tg", "ta", "te", "th", "tr", "uk", "ur", "uz", "vi", "cy", "xh", "yi", "yo", "zu"} | |
| # Initialize logger | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| logger = logging.getLogger(__name__) | |
| def create_translation_table(): | |
| """Creates the translation table if it doesn't exist.""" | |
| try: | |
| with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute(''' | |
| CREATE TABLE IF NOT EXISTS translations ( | |
| phrase TEXT PRIMARY KEY | |
| ) | |
| ''') | |
| # Dynamically add language columns | |
| cursor.execute("PRAGMA table_info(translations)") | |
| existing_columns = {col[1] for col in cursor.fetchall()} | |
| for lang_code in SUPPORTED_LANGUAGES: | |
| column_name = lang_code.replace('-', '_') | |
| if column_name == "is": # Handle reserved keywords in SQLite | |
| column_name = "is_" | |
| if column_name not in existing_columns: | |
| try: | |
| cursor.execute(f"ALTER TABLE translations ADD COLUMN `{column_name}` TEXT") | |
| logger.info(f"Added column '{column_name}' to translations table.") | |
| except sqlite3.OperationalError as e: | |
| if "duplicate column name" in str(e).lower(): | |
| logger.debug(f"Column '{column_name}' already exists. Skipping.") | |
| else: | |
| logger.error(f"Error adding column '{column_name}': {e}") # More specific error | |
| conn.commit() | |
| except Exception as e: # Broad exception handling to catch any other potential issues | |
| logger.error(f"An unexpected error occurred in create_translation_table: {e}") | |
| # Use the correct decorator name | |
| def translate_cached(text, target_language, source_language="auto"): # Renamed to avoid conflicts | |
| """Translates text using Google Translate with caching.""" | |
| if not text: | |
| return "" | |
| try: | |
| translator = GoogleTranslator(source=source_language, target=target_language) | |
| translated = translator.translate(text) | |
| return translated | |
| except exceptions.TranslationNotFound: | |
| logger.error(f"Translation not found for: {text}") | |
| except Exception as e: # Catch generic exceptions | |
| logger.exception(f"Translation error: {e}") # Log with traceback | |
| return None | |
| def get_translation(phrase, target_language, source_language="auto"): | |
| """Retrieves a translation from the database or translates and stores it.""" | |
| if target_language not in SUPPORTED_LANGUAGES: | |
| logger.error(f"Unsupported target language: {target_language}") | |
| return None, False # Return None and False for failure | |
| try: | |
| with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn: | |
| cursor = conn.cursor() | |
| column_name = target_language.replace('-', '_') | |
| if column_name == "is": | |
| column_name = "is_" | |
| cursor.execute(f"SELECT `{column_name}` FROM translations WHERE phrase=?", (phrase,)) | |
| result = cursor.fetchone() | |
| if result and result[0]: # Check that the result is not empty and has a translated value | |
| return result[0], True | |
| translated_text = translate_cached(phrase, target_language, source_language) | |
| if translated_text: | |
| cursor.execute(f""" | |
| INSERT INTO translations (phrase, `{column_name}`) VALUES (?, ?) | |
| ON CONFLICT(phrase) DO UPDATE SET `{column_name}`=excluded.`{column_name}` | |
| """, (phrase, translated_text)) | |
| conn.commit() | |
| return translated_text, True | |
| else: | |
| return None, False # Explicitly return False when translation fails | |
| except sqlite3.Error as e: | |
| logger.error(f"Database error: {e}") | |
| return None, False # Return explicit failure indicator | |
| except Exception as e: | |
| logger.exception(f"Unexpected error in get_translation: {e}") # Generic Exception Catch-All | |
| return None, False # Return explicit failure indicator | |
| def batch_translate(phrases, target_language, source_languages): # Takes a list of source languages | |
| """Translates multiple phrases concurrently, respecting source language.""" | |
| with ThreadPoolExecutor() as executor: | |
| # Use source language for each phrase | |
| futures = [executor.submit(get_translation, phrase, target_language, source_languages[i]) | |
| for i, phrase in enumerate(phrases) if phrase] # Only translate if phrase is not empty | |
| results = [future.result() for future in futures] # Only includes results from phrases that were translated | |
| translations = {} | |
| j=0 #index for results | |
| for i, phrase in enumerate(phrases): | |
| if phrase: #if phrase was translated | |
| translations[phrase] = results[j][0] #Store translation (results[j] is a tuple (translation, success_flag), take the first element) | |
| j+=1 | |
| else: # If phrase is empty | |
| translations[phrase] = None #Or translations[phrase] = "" # Alternatively assign empty string if not translating empty phrases | |
| return translations | |