Spaces:

dsfsi
/

UPTranslate

Running

File size: 111,559 Bytes

import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import time
from PIL import Image

# Only import APIs if available
try:
    from google import genai
    GENAI_AVAILABLE = True
except ImportError:
    GENAI_AVAILABLE = False

try:
    from openai import OpenAI
    OPENAI_AVAILABLE = True
except ImportError:
    OPENAI_AVAILABLE = False

BASE_DIR = os.path.dirname(__file__)
DATA_DIR = os.path.join(BASE_DIR, "data")

# Page configuration
st.set_page_config(
    page_title="Translation Comparison Tool",
    page_icon="🌐",
    layout="wide",
    initial_sidebar_state="collapsed"
)

# Custom CSS for Material Design with Tailwind-inspired styling
st.markdown("""
<style>
    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
    
    .main-header {
        font-family: 'Inter', sans-serif;
        font-size: 1.8rem;
        font-weight: 600;
        color: #1f2937;
        margin-bottom: 0.5rem;
        letter-spacing: -0.025em;
        text-align: center;
    }
    
    .sub-header {
        font-family: 'Inter', sans-serif;
        font-size: 1.1rem;
        font-weight: 400;
        color: #6b7280;
        margin-bottom: 2rem;
        line-height: 1.6;
        text-align: center;
    }
    
    .logo-container {
        display: flex;
        justify-content: center;
        margin-bottom: 2rem;
    }
            
    /* Bold and full-width tabs */
.stTabs [data-baseweb="tab-list"] {
    gap: 0px;
    width: 100%;
}

.stTabs [data-baseweb="tab"] {
    font-family: 'Inter', sans-serif !important;
    font-size: 1.1rem !important;
    font-weight: 600 !important;
    padding: 12px 24px !important;
    width: 50% !important;
    justify-content: center !important;
    border-radius: 0 !important;
    background-color: #f8f9fa !important;
    color: #374151 !important;
    border: 1px solid #e5e7eb !important;
    margin: 0 !important;
}

.stTabs [data-baseweb="tab"]:hover {
    background-color: #f1f3f4 !important;
    color: #1f2937 !important;
}

.stTabs [aria-selected="true"] {
    background-color: #3b82f6 !important;
    color: white !important;
    font-weight: 700 !important;
    border-color: #3b82f6 !important;
}

.stTabs [data-baseweb="tab-highlight"] {
    display: none !important;
}

.stTabs [data-baseweb="tab-border"] {
    display: none !important;
}
    
    .tab-header {
        font-family: 'Inter', sans-serif;
        font-size: 1.5rem;
        font-weight: 600;
        color: #374151;
        margin-bottom: 1rem;
    }
    
    .metric-card {
        background: #f9fafb;
        border: 1px solid #e5e7eb;
        border-radius: 0.75rem;
        padding: 1.5rem;
        margin: 0.5rem 0;
        box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
    }
    
    .metric-title {
        font-family: 'Inter', sans-serif;
        font-size: 0.875rem;
        font-weight: 500;
        color: #6b7280;
        text-transform: uppercase;
        letter-spacing: 0.05em;
        margin-bottom: 0.25rem;
    }
    
    .metric-value {
        font-family: 'Inter', sans-serif;
        font-size: 2rem;
        font-weight: 700;
        color: #1f2937;
        line-height: 1;
    }
    
    .support-info {
        color: #5f6368;
        font-size: 12px;
        margin-top: 20px;
        text-align: center;
        font-family: 'Inter', sans-serif;
    }
    
    .translate-container {
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        margin: 20px 0;
        overflow: hidden;
        box-shadow: 0 2px 5px rgba(0,0,0,0.1);
    }
    
    .translate-header {
        background: #f8f9fa;
        border-bottom: 1px solid #e0e0e0;
        padding: 12px 16px;
        font-family: 'Inter', sans-serif;
        font-weight: 500;
        font-size: 14px;
        color: #5f6368;
        display: flex;
        align-items: center;
        box-sizing: border-box;
    }
    
    .language-tabs-container {
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        margin: 20px 0;
        overflow: hidden;
        box-shadow: 0 2px 5px rgba(0,0,0,0.1);
    }
    
    .language-tabs-header {
        background: #f8f9fa;
        border-bottom: 1px solid #e0e0e0;
        height: 45px;
        display: flex;
        align-items: stretch;
        box-sizing: border-box;
        padding: 0;
    }
    
    .language-tab {
        flex: 1;
        background: #f8f9fa;
        border: none;
        border-right: 1px solid #e0e0e0;
        padding: 0;
        font-family: 'Inter', sans-serif;
        font-size: 14px;
        font-weight: 500;
        cursor: pointer;
        transition: all 0.2s ease;
        color: #6b7280;
        text-align: center;
        height: 45px;
        display: flex;
        align-items: center;
        justify-content: center;
        box-sizing: border-box;
        text-decoration: none;
        outline: none;
    }
    
    .language-tab:last-child {
        border-right: none;
    }
    
    .language-tab.active {
        background: white;
        color: #3b82f6;
        border-bottom: 2px solid #3b82f6;
        font-weight: 600;
    }
    
    .language-tab:hover:not(.active) {
        background: #f1f3f4;
        color: #374151;
    }
    
    .stTextArea textarea {
        resize: none !important;
        min-height: 350px !important;
        max-height: 350px !important;
        height: 350px !important;
    }
            
    .stTextArea textarea[disabled] {
        color: #000000 !important;
        opacity: 1 !important;
        -webkit-text-fill-color: #000000 !important;
    }
    
    /* Make buttons rounded and complete */
.stButton > button {
    font-family: 'Inter', sans-serif !important;
    font-size: 0.75rem !important;
    font-weight: 500 !important;
    border-radius: 6px !important;  /* Changed from 0 to 6px for rounded corners */
    height: 35px !important;
    border: 1px solid #d1d5db !important;
    margin: 0 2px !important;  /* Added small margin between buttons */
    padding: 0 12px !important;  /* Increased padding for better look */
    cursor: pointer !important;
    transition: all 0.2s ease !important;
}

.stButton > button[data-testid="baseButton-secondary"] {
    background-color: #f3f4f6 !important;
    color: #374151 !important;
    border-color: #d1d5db !important;
    box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05) !important;
}

.stButton > button[data-testid="baseButton-secondary"]:hover {
    background-color: #e5e7eb !important;
    color: #1f2937 !important;
    border-color: #9ca3af !important;
    box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1) !important;
    transform: translateY(-1px) !important;
}

.stButton > button[data-testid="baseButton-primary"] {
    background-color: #3b82f6 !important;
    color: #ffffff !important;
    font-weight: 600 !important;
    border-color: #3b82f6 !important;
    box-shadow: 0 2px 4px 0 rgba(59, 130, 246, 0.3) !important;
}

.stButton > button[data-testid="baseButton-primary"]:hover {
    background-color: #2563eb !important;
    color: #ffffff !important;
    border-color: #2563eb !important;
    transform: translateY(-1px) !important;
}

/* Remove the border-right rule since we're using margins now */
    
    /* Hide the default Streamlit button styling for tab buttons */
    .language-tab-button {
        background: none !important;
        border: none !important;
        padding: 0 !important;
        margin: 0 !important;
        height: 100% !important;
        width: 100% !important;
        color: inherit !important;
        font-weight: inherit !important;
    }
    
    .language-tab-button:hover {
        background: none !important;
        border: none !important;
    }
    
    .language-tab-button:focus {
        background: none !important;
        border: none !important;
        box-shadow: none !important;
    }
            
    .score-card {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            border-radius: 12px;
            padding: 20px;
            text-align: center;
            color: white;
            margin: 10px 0;
        }
        .score-value {
            font-size: 2.5rem;
            font-weight: 700;
            margin: 10px 0;
        }
        .score-label {
            font-size: 0.9rem;
            opacity: 0.9;
            text-transform: uppercase;
            letter-spacing: 1px;
        }
        .comparison-container {
            background: #f8fafc;
            border: 1px solid #e2e8f0;
            border-radius: 12px;
            padding: 24px;
            margin: 20px 0;
        }
        .word-diff {
            display: inline-block;
            padding: 4px 8px;
            margin: 2px;
            border-radius: 6px;
            font-weight: 500;
        }
        .word-added {
            background: #dcfce7;
            color: #166534;
            border: 1px solid #bbf7d0;
        }
        .word-removed {
            background: #fef2f2;
            color: #dc2626;
            border: 1px solid #fecaca;
        }
        .word-common {
            background: #f1f5f9;
            color: #475569;
            border: 1px solid #e2e8f0;
        }

            .block-container {
        padding-top: 1rem;
        padding-bottom: 0rem;
    }
    
    .main > div {
        padding-top: 1rem;
    }
    
    /* Hide Streamlit header and footer */
    header[data-testid="stHeader"] {
        height: 0px;
        display: none;
    }
    
    .stDeployButton {
        display: none;
    }
    
    footer {
        display: none;
    }
    
    #MainMenu {
        display: none;
    }
</style>
""", unsafe_allow_html=True)

# Model configurations
MODEL_CONFIG = {
    'Gemini': {
        'languages': ['Afrikaans', 'Northern Sotho', 'isiZulu'],
        'models': ['gemini-2.0-flash-exp', 'gemini-1.5-flash', 'gemini-1.5-pro'],
        'default_model': 'gemini-2.0-flash-exp'
    },
    'GPT': {
        'languages': ['Afrikaans', 'Northern Sotho', 'isiZulu'],
        'models': ['gpt-4', 'gpt-4-turbo', 'gpt-3.5-turbo'],
        'default_model': 'gpt-4'
    },
    'NLLB': {
        'languages': ['Northern Sotho', 'isiZulu'],  # No Afrikaans model available
        'models': {
            'Northern Sotho': 'dsfsi/dcs-eng-nso-nllb-1.3B',
            'isiZulu': 'dsfsi/dcs-eng-zul-nllb-1.3B'
        }
    }
}

# Language code mappings
LANGUAGE_CODES = {
    'Afrikaans': 'afr',
    'Northern Sotho': 'nso', 
    'isiZulu': 'isizulu'
}

# Load logo
def load_logo():
    """Load logo with error handling"""
    try:
        if os.path.exists(f"{BASE_DIR}/logo.png"):
            return Image.open(f"{BASE_DIR}/logo.png")
    except Exception as e:
        st.warning(f"Could not load logo: {str(e)}")
    return None

# Load and cache data
@st.cache_data
def load_translation_data():
    """Load sample translation data"""
    try:
        sample_data = {
            'english': ['Hello world', 'How are you?', 'Good morning', 'Thank you', 'Welcome', 'Goodbye'],
            'afr': ['Hallo wêreld', 'Hoe gaan dit?', 'Goeie môre', 'Dankie', 'Welkom', 'Totsiens'],
            'afr_rev': ['Hallo wêreld', 'Hoe gaan dit met jou?', 'Goeie môre', 'Baie dankie', 'Welkom', 'Totsiens'],
            'nso': ['Dumela lefase', 'O kae?', 'Thobela', 'Ke a leboga', 'O amogetšwe', 'Šala gabotse'],
            'nso_rev': ['Dumela lefase', 'O phela bjang?', 'Thobela', 'Ke a leboga kudu', 'O amogetšwe', 'Šala gabotse'],
            'isizulu': ['Sawubona mhlaba', 'Unjani?', 'Sawubona', 'Ngiyabonga', 'Wamukelekile', 'Sala kahle'],
            'isizulu_rev': ['Sawubona mhlaba', 'Unjani wena?', 'Sawubona', 'Ngiyabonga kakhulu', 'Wamukelekile', 'Sala kahle'],
            'nso_mt_nllb': ['Dumela lefase', 'O kae?', 'Thobela', 'Ke a leboga', 'O amogetšwe', 'Šala gabotse'],
            'isizulu_mt_nllb': ['Sawubona mhlaba', 'Unjani?', 'Sawubona', 'Ngiyabonga', 'Wamukelekile', 'Sala kahle'],
            'afr_mt_gpt': ['Hallo wêreld', 'Hoe gaan dit?', 'Goeie môre', 'Dankie', 'Welkom', 'Totsiens'],
            'nso_mt_gpt': ['Dumela lefase', 'O kae?', 'Thobela', 'Ke a leboga', 'O amogetšwe', 'Šala gabotse'],
            'isizulu_mt_gpt': ['Sawubona mhlaba', 'Unjani?', 'Sawubona', 'Ngiyabonga', 'Wamukelekile', 'Sala kahle'],
            'afr_mt_gemini': ['Hallo wêreld', 'Hoe is dit?', 'Goeie môre', 'Baie dankie', 'Welkom', 'Totsiens'],
            'nso_mt_gemini': ['Dumela lefase', 'O phela bjang?', 'Thobela', 'Ke a leboga kudu', 'O amogetšwe', 'Šala gabotse'],
            'isizulu_mt_gemini': ['Sawubona mhlaba', 'Unjani wena?', 'Sawubona', 'Ngiyabonga kakhulu', 'Wamukelekile', 'Sala kahle']
        }
        return pd.DataFrame(sample_data)
    except Exception as e:
        st.error(f"Error loading data: {str(e)}")
        return pd.DataFrame({'english': ['Sample text'], 'error': ['Data loading failed']})

def translate_with_gemini(text, target_language, model_name="gemini-2.0-flash-exp", client=None):
    """Translate text using Gemini API"""
    try:
        if not GENAI_AVAILABLE:
            return "❌ Gemini library not installed"
            
        if not client:
            return "❌ Gemini API not configured. Please check your GEMINI_API_KEY."
            
        lang_map = {
            'Afrikaans': 'Afrikaans',
            'Northern Sotho': 'Northern Sotho (Sepedi)',
            'isiZulu': 'isiZulu'
        }
        
        prompt = f"Translate the following English text to {lang_map.get(target_language, target_language)}: '{text}'. Provide only the translation without any explanations."

        response = client.models.generate_content(
            model=model_name, contents=prompt
        )
        return response.text.strip()
    except Exception as e:
        return f"❌ Error: {str(e)}"
    
def translate_with_openai(text, target_language, model_name="gpt-4o", client=None):
    """Translate text using OpenAI API with Chat Completions"""
    try:
        if not OPENAI_AVAILABLE:
            return "❌ OpenAI library not installed"
            
        if not client:
            return "❌ OpenAI API not configured. Please check your OPENAI_API_KEY."
            
        lang_map = {
            'Afrikaans': 'Afrikaans',
            'Northern Sotho': 'Northern Sotho (Sepedi)',
            'isiZulu': 'isiZulu'
        }

        # Use Chat Completions API (supported indefinitely)
        response = client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": "You are a professional translator. Provide only the translation without any explanations."},
                {"role": "user", "content": f"Translate the following text to {lang_map.get(target_language, target_language)}: {text}"}
            ],
            max_tokens=1000,
            temperature=0.3  # Lower temperature for more consistent translations
        )
        
        return response.choices[0].message.content.strip()
        
    except Exception as e:
        return f"❌ Error: {str(e)}"

@st.cache_resource
def initialize_apis():
    """Initialize API clients with proper error handling, supporting both local and HF Spaces."""
    genai_client = None
    openai_client = None

    def get_secret(name):
        """Fetch secret from env first (Docker Spaces), then Streamlit secrets."""
        return (
            os.environ.get(name)
            or (st.secrets.get(name) if hasattr(st, "secrets") and name in st.secrets else None)
        )

    try:
        # Gemini API
        if GENAI_AVAILABLE:
            try:
                api_key = get_secret("GEMINI_API_KEY")
                if api_key:
                    genai_client = genai.Client(api_key=api_key)
                else:
                    st.warning("⚠️ Gemini API key not found")
            except Exception as e:
                st.error(f"❌ Gemini API error: {str(e)}")

        # OpenAI API
        if OPENAI_AVAILABLE:
            try:
                api_key = get_secret("OPENAI_API_KEY")
                if api_key:
                    try:
                        # Try new OpenAI API client
                        openai_client = OpenAI(api_key=api_key)
                    except TypeError:
                        import openai
                        openai.api_key = api_key
                        openai_client = openai
                else:
                    st.warning("⚠️ OpenAI API key not found")
            except Exception as e:
                st.error(f"❌ OpenAI API error: {str(e)}")

    except Exception as e:
        st.error(f"❌ API initialization error: {str(e)}")

    return genai_client, openai_client

def translate_with_nllb(text, target_language):
    """Translate text using unified NLLB API"""
    try:
        import requests
        
        # Single ngrok URL for unified API
        API_URL = "https://4c2faecc052a.ngrok-free.app"

        # Map Streamlit language names to API format
        lang_mapping = {
            'Northern Sotho': 'nso',
            'isiZulu': 'zul'
        }
        
        api_lang = lang_mapping.get(target_language, target_language.lower())
        
        response = requests.post(
            f"{API_URL}/translate_simple",
            params={
                "text": text,
                "target_language": api_lang
            },
            timeout=30
        )
        
        if response.status_code == 200:
            result = response.json()
            return result.get(api_lang, '❌ Translation not found')
        else:
            return f"❌ API Error: {response.status_code}"
            
    except Exception as e:
        return f"❌ Error: {str(e)}"

def create_language_tabs(available_languages, current_language, key_suffix=""):
    """Create language tabs with proper styling"""
    tabs_html = '<div class="language-tabs-container"><div class="language-tabs-header">'
    
    for lang in available_languages:
        active_class = "active" if lang == current_language else ""
        tabs_html += f'''
        <div class="language-tab {active_class}" onclick="selectLanguage('{lang}', '{key_suffix}')">
            {lang}
        </div>
        '''
    
    tabs_html += '</div></div>'
    
    # Add JavaScript for tab functionality
    script = f'''
    <script>
    function selectLanguage(lang, suffix) {{
        // This would normally update the session state, but since we can't do that from JavaScript,
        // we'll use the button approach below instead
    }}
    </script>
    '''
    
    return tabs_html + script

def main():
    """Main application function"""
    # Load and display logo and title side by side
    logo = load_logo()
    
    # Initialize session state FIRST to avoid refreshes
    if 'target_language' not in st.session_state:
        st.session_state.target_language = 'Afrikaans'
    if 'translation_result' not in st.session_state:
        st.session_state.translation_result = ""
    if 'current_page' not in st.session_state:
        st.session_state.current_page = 1
    if 'initialized' not in st.session_state:
        st.session_state.initialized = True
    
    col1, col2, col3 = st.columns([1, 2, 1])
    with col2:
        if logo:
            # Convert logo to base64 for HTML embedding
            import base64
            from io import BytesIO
            buffered = BytesIO()
            logo.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode()
            
            st.markdown(f'''
            <div style="display: flex; align-items: center; justify-content: center; gap: 0px; margin-bottom: 1rem;">
                <img src="data:image/png;base64,{img_str}" width="180">
                <h1 class="main-header" style="margin: 20px;">UP Translate</h1>
            </div>
            ''', unsafe_allow_html=True)
        else:
            st.markdown('<h1 class="main-header" style="margin-bottom: 1rem;">UP Translate</h1>', unsafe_allow_html=True)
    
    # Initialize APIs
    genai_client, openai_client = initialize_apis()
    
    # Initialize session state
    if 'target_language' not in st.session_state:
        st.session_state.target_language = 'Afrikaans'
    if 'translation_result' not in st.session_state:
        st.session_state.translation_result = ""
    
    # Create tabs
    tab1, tab2 = st.tabs(["🤖 Live Translations", "📊 Existing Translations"])
    
    with tab1:
        # st.markdown('<h2 class="tab-header">Live Translation</h2>', unsafe_allow_html=True)
        
        # Create simplified model options
        model_options = []
        model_mapping = {}
        
        # Add Gemini models
        for model in MODEL_CONFIG['Gemini']['models']:
            display_name = f"Gemini - {model}"
            model_options.append(display_name)
            model_mapping[display_name] = ('Gemini', None, model)
        
        # Add GPT models
        for model in MODEL_CONFIG['GPT']['models']:
            display_name = f"GPT - {model}"
            model_options.append(display_name)
            model_mapping[display_name] = ('GPT', None, model)
        
        # Add single NLLB option
        model_options.append("NLLB - Specialized Models")
        model_mapping["NLLB - Specialized Models"] = ('NLLB', None, None)
        
        # Model selection with inline label
        label_col, dropdown_col = st.columns([2, 10])
        with label_col:
            st.markdown('<div style="margin-top: 8px; font-weight: 500;">Select Model:</div>', unsafe_allow_html=True)
        with dropdown_col:
            selected_model_option = st.selectbox(
                "Select Model:",
                model_options,
                index=0,
                key="model_selection_dropdown",
                label_visibility="collapsed"
            )
        
        selected_provider, _, selected_model = model_mapping[selected_model_option]
        
        # Translation interface
        col_left, col_center, col_right = st.columns([5, 1, 5])
        
        # Left side - English Input
        with col_left:
            st.markdown('<div class="translate-container">', unsafe_allow_html=True)
            st.markdown('<div class="translate-header">English</div>', unsafe_allow_html=True)
            st.markdown('</div>', unsafe_allow_html=True)
            
            input_text = st.text_area(
                "Input",
                placeholder="Input text here",
                height=350,
                key="input_text_live",
                label_visibility="collapsed"
            )
        
        # Center - Translate Button
        with col_center:
            # Add spacing to align button with text areas
            st.markdown('<div style="height: 150px;"></div>', unsafe_allow_html=True)
            translate_clicked = st.button(
                "Translate",
                key="translate_btn_live",
                help="Translate text",
                type="primary",
                use_container_width=True
            )

        # Right side - Translation Output
        with col_right:
            # Determine available languages based on selected provider
            if selected_provider == 'NLLB':
                available_languages = MODEL_CONFIG['NLLB']['languages']
            else:
                available_languages = ['Afrikaans', 'Northern Sotho', 'isiZulu']
            
            # Set default language to first available if current selection not available
            if st.session_state.target_language not in available_languages:
                st.session_state.target_language = available_languages[0]
            
            # Create container with custom styling
            st.markdown('<div class="translate-container">', unsafe_allow_html=True)
            
            # Language selection buttons
            lang_cols = st.columns(len(available_languages))
            for i, lang in enumerate(available_languages):
                with lang_cols[i]:
                    button_type = "primary" if lang == st.session_state.target_language else "secondary"
                    if st.button(
                        lang,
                        key=f"lang_btn_{lang}_live",
                        type=button_type,
                        use_container_width=True
                    ):
                        if st.session_state.target_language != lang:  # Only update if different
                            st.session_state.target_language = lang
                            st.session_state.translation_result = ""  # Clear previous result
                            st.rerun()
            
            # Translation logic
            if translate_clicked and input_text:
                with st.spinner("Translating..."):
                    target_lang = st.session_state.target_language
                    
                    if selected_provider == 'Gemini':
                        result = translate_with_gemini(input_text, target_lang, selected_model, genai_client)
                    
                    elif selected_provider == 'GPT':
                        result = translate_with_openai(input_text, target_lang, selected_model, openai_client)
                    
                    elif selected_provider == 'NLLB':
                        result = translate_with_nllb(input_text, target_lang)
                    
                    st.session_state.translation_result = result

            # Translation output area with proper labeling
            st.text_area(
                f"Translation ({st.session_state.target_language})",  # Dynamic label
                value=st.session_state.translation_result,
                placeholder="Translation will appear here",
                height=350,
                key="translation_output_live_fixed",  # Changed key to avoid conflicts
                disabled=True,
                label_visibility="collapsed"
            )
            
        # Support information
        st.markdown("""
        <div class="support-info">
        <strong>Available Models:</strong><br>
        🔮 <strong>Gemini:</strong> All languages (gemini-2.0-flash-exp, gemini-1.5-flash, gemini-1.5-pro)<br>
        🧠 <strong>GPT:</strong> All languages (gpt-4, gpt-4-turbo, gpt-3.5-turbo)<br>
        🤗 <strong>NLLB:</strong> Northern Sotho, isiZulu only (specialized models)
        </div>
        """, unsafe_allow_html=True)
    
        with tab2:
            # Load data from base directory automatically
            @st.cache_data
            def load_analysis_data():
                """Load all analysis data from base directory"""
                df_translations = None
                df_bleu = None
                df_chrf = None
                df_comet = None
                
                try:
                    # Try to load translations data
                    if os.path.exists(f"{DATA_DIR}/translations.tsv"):
                        df_translations = pd.read_csv(f"{DATA_DIR}/translations.tsv", sep="\t")
                        
                        # Convert new CSV format to expected format for analysis
                        # New format: id,english,afr_human,afr_revised,nso_human,nso_revised,zul_human,zul_revised,afr_gemini,afr_gpt,nso_gemini,nso_gpt,nso_nllb,zul_gemini,zul_gpt,zul_nllb
                        # Expected format: english, afr_human, afr_revised, nso_human, nso_revised, isizulu_human, isizulu_revised, etc.
                        
                        # Rename zul columns to isizulu for backward compatibility with analysis code
                        column_mapping = {
                            'zul_human': 'isizulu_human',
                            'zul_revised': 'isizulu_revised', 
                            'zul_gemini': 'isizulu_mt_gemini',
                            'zul_gpt': 'isizulu_mt_gpt',
                            'zul_nllb': 'isizulu_mt_nllb',
                            'afr_gemini': 'afr_mt_gemini',
                            'afr_gpt': 'afr_mt_gpt',
                            'nso_gemini': 'nso_mt_gemini', 
                            'nso_gpt': 'nso_mt_gpt',
                            'nso_nllb': 'nso_mt_nllb'
                        }
                        
                        df_translations = df_translations.rename(columns=column_mapping)
                        
                    elif os.path.exists(f"{DATA_DIR}/translation_data.csv"):
                        df_translations = pd.read_csv(f"{DATA_DIR}/translation_data.csv")
                    else:
                        print("No translation data found, using sample data")
                        df_translations = load_translation_data()  # Fallback to sample data
                    
                    # Try to load BLEU scores
                    if os.path.exists(f"{DATA_DIR}/bleu_scores.csv"):
                        df_bleu = pd.read_csv(f"{DATA_DIR}/bleu_scores.csv")
                        
                        # Convert zul references to isizulu for compatibility
                        df_bleu['comparison_pair'] = df_bleu['comparison_pair'].str.replace('zul_', 'isizulu_')
                        df_bleu['language'] = df_bleu['language'].replace('isiZulu', 'isiZulu')  # Already correct
                        
                    else:
                        # Sample BLEU data (using isizulu for compatibility with existing analysis code)
                        df_bleu = pd.DataFrame({
                            'comparison_pair': ['afr_human_vs_afr_gemini', 'afr_human_vs_afr_gpt', 'afr_human_vs_afr_revised', 'nso_human_vs_nso_gemini', 'nso_human_vs_nso_gpt', 'nso_human_vs_nso_revised', 'nso_human_vs_nso_nllb', 'isizulu_human_vs_isizulu_gemini', 'isizulu_human_vs_isizulu_gpt', 'isizulu_human_vs_isizulu_revised', 'isizulu_human_vs_isizulu_nllb'],
                            'bleu_score': [0.78, 0.72, 0.89, 0.65, 0.68, 0.85, 0.71, 0.71, 0.69, 0.87, 0.73],
                            'language': ['Afrikaans', 'Afrikaans', 'Afrikaans', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'isiZulu', 'isiZulu', 'isiZulu', 'isiZulu']
                        })
                    
                    # Try to load COMET scores
                    if os.path.exists(f"{DATA_DIR}/comet_scores.csv"):
                        df_comet = pd.read_csv(f"{DATA_DIR}/comet_scores.csv")
                    else:
                        # Sample COMET data
                        df_comet = pd.DataFrame({
                            'comparison_pair': ['afr_human_vs_afr_gemini', 'afr_human_vs_afr_gpt', 'afr_human_vs_afr_revised', 'nso_human_vs_nso_gemini', 'nso_human_vs_nso_gpt', 'nso_human_vs_nso_revised', 'isizulu_human_vs_isizulu_gemini', 'isizulu_human_vs_isizulu_gpt', 'isizulu_human_vs_isizulu_revised'],
                            'comet_score': [0.82, 0.79, 0.92, 0.71, 0.74, 0.88, 0.76, 0.73, 0.90],
                            'language': ['Afrikaans', 'Afrikaans', 'Afrikaans', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'isiZulu', 'isiZulu', 'isiZulu']
                        })

                    # Try to load CHRF scores
                    if os.path.exists(f"{DATA_DIR}/chrf_scores.csv"):
                        df_chrf = pd.read_csv(f"{DATA_DIR}/chrf_scores.csv")
                    else:
                        # Sample CHRF data
                        df_chrf = pd.DataFrame({
                            'comparison_pair': ['afr_human_vs_afr_gemini', 'afr_human_vs_afr_gpt', 'afr_human_vs_afr_revised', 'nso_human_vs_nso_gemini', 'nso_human_vs_nso_gpt', 'nso_human_vs_nso_revised', 'isizulu_human_vs_isizulu_gemini', 'isizulu_human_vs_isizulu_gpt', 'isizulu_human_vs_isizulu_revised'],
                            'chrf_score': [0.75, 0.70, 0.88, 0.60, 0.65, 0.80, 0.68, 0.66, 0.85],
                            'language': ['Afrikaans', 'Afrikaans', 'Afrikaans', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'isiZulu', 'isiZulu', 'isiZulu']
                        })
                    
                    return df_translations, df_bleu, df_comet, df_chrf

                except Exception as e:
                    st.error(f"Error loading data: {str(e)}")
                    return None, None, None, None
            
            # Load all data
            df_translations, df_bleu, df_comet, df_chrf = load_analysis_data()

            if df_translations is not None:
                # Language selection in columns
                lang_col1, lang_col2 = st.columns([2, 10])
                with lang_col1:
                    st.markdown('<div style="margin-top: 8px; font-weight: 500;">Select Language:</div>', unsafe_allow_html=True)
                with lang_col2:
                    languages = ['Afrikaans', 'Northern Sotho', 'isiZulu']
                    selected_lang = st.selectbox(
                        "Select Language for Analysis:", 
                        languages,
                        key="global_lang_select",
                        label_visibility="collapsed"
                    )
                
                # Get language code
                lang_codes = {'Afrikaans': 'afr', 'Northern Sotho': 'nso', 'isiZulu': 'isizulu'}
                code = lang_codes[selected_lang]
                
                # Create analysis tabs
                analysis_tab1, analysis_tab2, analysis_tab3, analysis_tab4 = st.tabs(["Sample Translations", "📊 Quality Metrics", "🔄 Revision Analysis", "🔍 Word Comparison"])

                with analysis_tab1:
                    # Translation Samples Tab
                    st.markdown("""
                    <div style="margin: 20px 0;">
                        <h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
                            📝 Translation Samples for {selected_lang}
                        </h4>
                    </div>
                    """.format(selected_lang=selected_lang), unsafe_allow_html=True)
                    
                    # Use the global language selection
                    samples_code = code
                    
                    # Show sample translations for the selected language
                    display_cols = ['english'] + [col for col in df_translations.columns if col.startswith(samples_code)]
                    
                    if display_cols and len(display_cols) > 1:  # Need at least english + 1 translation column
                        # Control panel
                        control_col1, control_col2, control_col3, control_col4 = st.columns([1, 7, 1, 2])
                        
                        with control_col1:
                            st.markdown('<div style="margin-top: 8px; font-weight: 500;">Samples per page:</div>', unsafe_allow_html=True)
                        with control_col2:
                            page_size = st.selectbox(
                                "Samples per page:",
                                [10, 25, 50, 100],
                                index=0,
                                key="page_size_select",
                                label_visibility="collapsed"
                            )
                        
                        # Initialize session state for pagination
                        if 'current_page' not in st.session_state:
                            st.session_state.current_page = 1
                        
                        # Filter data and calculate pagination
                        available_data = df_translations[display_cols].dropna(subset=[col for col in display_cols if col != 'english'], how='all')
                        total_samples = len(available_data)
                        total_pages = max(1, (total_samples + page_size - 1) // page_size)  # Ceiling division
                        
                        # Ensure current page is valid
                        if st.session_state.current_page > total_pages:
                            st.session_state.current_page = 1
                        
                        # Calculate start and end indices
                        start_idx = (st.session_state.current_page - 1) * page_size
                        end_idx = min(start_idx + page_size, total_samples)
                        
                        # Get current page data
                        current_page_data = available_data.iloc[start_idx:end_idx]
                        
                        with control_col3:
                            st.markdown('<div style="margin-top: 8px; font-weight: 500;">Page:</div>', unsafe_allow_html=True)
                        with control_col4:
                            # Page navigation
                            nav_col1, nav_col2, nav_col3, nav_col4, nav_col5 = st.columns([1, 1, 2, 1, 1])
                            
                            with nav_col1:
                                if st.button("⏮️", key="first_page", help="First page", disabled=(st.session_state.current_page == 1)):
                                    st.session_state.current_page = 1
                                    st.rerun()
                            
                            with nav_col2:
                                if st.button("◀️", key="prev_page", help="Previous page", disabled=(st.session_state.current_page == 1)):
                                    st.session_state.current_page -= 1
                                    st.rerun()
                            
                            with nav_col3:
                                st.markdown(f'<div style="text-align: center; margin-top: 8px; font-weight: 500;">{st.session_state.current_page} / {total_pages}</div>', unsafe_allow_html=True)
                            
                            with nav_col4:
                                if st.button("▶️", key="next_page", help="Next page", disabled=(st.session_state.current_page == total_pages)):
                                    st.session_state.current_page += 1
                                    st.rerun()
                            
                            with nav_col5:
                                if st.button("⏭️", key="last_page", help="Last page", disabled=(st.session_state.current_page == total_pages)):
                                    st.session_state.current_page = total_pages
                                    st.rerun()
                        
                        # Statistics cards
                        stats_col1, stats_col2, stats_col3, stats_col4 = st.columns(4)
                        
                        with stats_col1:
                            st.markdown(f"""
                            <div class="metric-card">
                                <div class="metric-title">Showing</div>
                                <div class="metric-value">{len(current_page_data)}</div>
                            </div>
                            """, unsafe_allow_html=True)
                        
                        with stats_col2:
                            available_systems = len([col for col in display_cols if col != 'english'])
                            st.markdown(f"""
                            <div class="metric-card">
                                <div class="metric-title">Translation Systems</div>
                                <div class="metric-value">{available_systems}</div>
                            </div>
                            """, unsafe_allow_html=True)
                        
                        with stats_col3:
                            st.markdown(f"""
                            <div class="metric-card">
                                <div class="metric-title">Total Available</div>
                                <div class="metric-value">{total_samples}</div>
                            </div>
                            """, unsafe_allow_html=True)
                        
                        with stats_col4:
                            st.markdown(f"""
                            <div class="metric-card">
                                <div class="metric-title">Current Page</div>
                                <div class="metric-value">{st.session_state.current_page}/{total_pages}</div>
                            </div>
                            """, unsafe_allow_html=True)
                        
                        # Display the samples table
                        st.markdown("### Translation Examples")
                        
                        if len(current_page_data) > 0:
                            # Create a styled dataframe with better column names
                            display_df = current_page_data.copy()
                            
                            # Rename columns for better display
                            column_rename = {
                                'english': 'English (Source)',
                            }
                            
                            # Add human-readable names for translation columns
                            for col in display_df.columns:
                                if col.startswith(samples_code):
                                    if '_human' in col:
                                        column_rename[col] = f'{selected_lang} (Human)'
                                    elif '_revised' in col:
                                        column_rename[col] = f'{selected_lang} (Revised)'
                                    elif '_mt_gemini' in col or '_gemini' in col:
                                        column_rename[col] = f'{selected_lang} (Gemini)'
                                    elif '_mt_gpt' in col or '_gpt' in col:
                                        column_rename[col] = f'{selected_lang} (GPT)'
                                    elif '_mt_nllb' in col or '_nllb' in col:
                                        column_rename[col] = f'{selected_lang} (NLLB)'
                                    else:
                                        # Generic fallback
                                        clean_name = col.replace(f'{samples_code}_', '').replace('_', ' ').title()
                                        column_rename[col] = f'{selected_lang} ({clean_name})'
                            
                            display_df = display_df.rename(columns=column_rename)
                            
                            # Add row numbers based on actual position in full dataset
                            display_df.index = range(start_idx + 1, end_idx + 1)
                            display_df.index.name = 'Sample #'
                            
                            st.dataframe(
                                display_df,
                                use_container_width=True,
                                height=min(600, 50 + len(display_df) * 35),  # Dynamic height based on content
                                column_config={
                                    col: st.column_config.TextColumn(col, width="medium")
                                    for col in display_df.columns
                                }
                            )
                            
                            # Page info summary
                            st.markdown(f"""
                            <div style="margin-top: 16px; padding: 12px; background: #f8fafc; border-radius: 6px; text-align: center; color: #6b7280; font-size: 0.9rem;">
                                📄 Showing samples {start_idx + 1} to {end_idx} of {total_samples} total samples • Page {st.session_state.current_page} of {total_pages}
                            </div>
                            """, unsafe_allow_html=True)
                            
                            # Quick jump to page
                            if total_pages > 5:  # Only show quick jump for datasets with many pages
                                st.markdown("### Quick Navigation")
                                jump_col1, jump_col2, jump_col3 = st.columns([1, 2, 1])
                                
                                with jump_col2:
                                    target_page = st.number_input(
                                        f"Jump to page (1-{total_pages}):",
                                        min_value=1,
                                        max_value=total_pages,
                                        value=st.session_state.current_page,
                                        key="page_jump"
                                    )
                                    
                                    if st.button("🔗 Go to Page", use_container_width=True):
                                        if target_page != st.session_state.current_page:
                                            st.session_state.current_page = target_page
                                            st.rerun()
                        
                        else:
                            st.warning("⚠️ No translation samples found for the current page.")
                    
                    else:
                        st.warning(f"⚠️ No translation data available for {selected_lang}. Expected columns starting with '{samples_code}_'")
                        
                        # Debug information
                        available_columns = [col for col in df_translations.columns if col.startswith(samples_code)]
                        if available_columns:
                            st.info(f"🔍 Found columns: {', '.join(available_columns)}")
                        else:
                            all_lang_columns = [col for col in df_translations.columns if any(col.startswith(prefix) for prefix in ['afr_', 'nso_', 'isizulu_'])]
                            if all_lang_columns:
                                st.info(f"💡 Available language columns: {', '.join(all_lang_columns[:10])}{'...' if len(all_lang_columns) > 10 else ''}")
                
                with analysis_tab2:
                    st.markdown("""
                    <div style="margin: 20px 0;">
                        <h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
                            📈 Quality Metrics for {selected_lang}
                        </h4>
                    </div>
                    """.format(selected_lang=selected_lang), unsafe_allow_html=True)
                    
                    # Get language code
                    lang_codes = {'Afrikaans': 'afr', 'Northern Sotho': 'nso', 'isiZulu': 'isizulu'}
                    code = lang_codes[selected_lang]
                    
                    # Score visualizations
                    if df_bleu is not None and df_chrf is not None and df_comet is not None:
                        # Filter scores for selected language
                        lang_bleu = df_bleu[df_bleu['language'] == selected_lang] if 'language' in df_bleu.columns else df_bleu
                        lang_chrf = df_chrf[df_chrf['language'] == selected_lang] if 'language' in df_chrf.columns else df_chrf
                        lang_comet = df_comet[df_comet['language'] == selected_lang] if 'language' in df_comet.columns else df_comet
                        
                        # Check if we have domain-level data
                        has_domain_data = ('domain' in lang_bleu.columns and 'domain' in lang_chrf.columns and 
                                        'domain' in lang_comet.columns and 
                                        len(lang_bleu[lang_bleu['domain'] != 'Overall']) > 0)
                        
                        if has_domain_data:
                            # Add domain filter
                            available_domains = sorted(lang_bleu['domain'].unique())
                            domain_options = ['Overall'] + [d for d in available_domains if d != 'Overall']
                            
                            selected_domain = st.selectbox(
                                "📍 Select Domain for Analysis:",
                                domain_options,
                                key=f"domain_selector_{selected_lang}"
                            )
                            
                            # Filter data based on selected domain
                            if selected_domain == 'Overall':
                                display_bleu = lang_bleu[lang_bleu['domain'] == 'Overall']
                                display_chrf = lang_chrf[lang_chrf['domain'] == 'Overall']
                                display_comet = lang_comet[lang_comet['domain'] == 'Overall']
                                chart_title_suffix = " - Overall"
                            else:
                                display_bleu = lang_bleu[lang_bleu['domain'] == selected_domain]
                                display_chrf = lang_chrf[lang_chrf['domain'] == selected_domain]
                                display_comet = lang_comet[lang_comet['domain'] == selected_domain]
                                chart_title_suffix = f" - {selected_domain}"
                        else:
                            # Use all data if no domain column
                            display_bleu = lang_bleu
                            display_chrf = lang_chrf
                            display_comet = lang_comet
                            chart_title_suffix = ""
                        
                        # Create score charts
                        if len(display_bleu) > 0 and len(display_chrf) > 0 and len(display_comet) > 0:
                            chart_col1, chart_col2, chart_col3 = st.columns(3)
                            
                            with chart_col1:
                                # chrF Score Chart
                                fig_chrf = px.bar(
                                    display_chrf, 
                                    x='comparison_pair', 
                                    y='chrf_score',
                                    title=f'chrF Scores - {selected_lang}{chart_title_suffix}',
                                    color='chrf_score',
                                    color_continuous_scale='oranges'
                                )
                                fig_chrf.update_layout(
                                    xaxis_title="Translation Pairs",
                                    yaxis_title="chrF Score",
                                    xaxis_tickangle=-45,
                                    height=400,
                                    font=dict(family="Inter", size=12)
                                )
                                st.plotly_chart(fig_chrf, use_container_width=True)
                            
                            with chart_col2:
                                # BLEU Score Chart
                                fig_bleu = px.bar(
                                    display_bleu, 
                                    x='comparison_pair', 
                                    y='bleu_score',
                                    title=f'BLEU Scores - {selected_lang}{chart_title_suffix}',
                                    color='bleu_score',
                                    color_continuous_scale='blues'
                                )
                                fig_bleu.update_layout(
                                    xaxis_title="Translation Pairs",
                                    yaxis_title="BLEU Score",
                                    xaxis_tickangle=-45,
                                    height=400,
                                    font=dict(family="Inter", size=12)
                                )
                                st.plotly_chart(fig_bleu, use_container_width=True)
                            
                            with chart_col3:
                                # COMET Score Chart
                                fig_comet = px.bar(
                                    display_comet, 
                                    x='comparison_pair', 
                                    y='comet_score',
                                    title=f'COMET Scores - {selected_lang}{chart_title_suffix}',
                                    color='comet_score',
                                    color_continuous_scale='greens'
                                )
                                fig_comet.update_layout(
                                    xaxis_title="Translation Pairs",
                                    yaxis_title="COMET Score",
                                    xaxis_tickangle=-45,
                                    height=400,
                                    font=dict(family="Inter", size=12)
                                )
                                st.plotly_chart(fig_comet, use_container_width=True)
                            
                            # PRIMARY SPIDER CHART - Domain Performance when available, Model Performance otherwise
                            if has_domain_data:
                                st.markdown(f"""
                                <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 20px 0 16px 0;">
                                    🕸️ Domain Performance Spider Charts - {selected_lang}
                                </h4>
                                """, unsafe_allow_html=True)

                                # Filter out "Overall" so only domain-level values are shown
                                domain_bleu = lang_bleu[lang_bleu['domain'] != 'Overall']
                                domain_chrf = lang_chrf[lang_chrf['domain'] != 'Overall']
                                domain_comet = lang_comet[lang_comet['domain'] != 'Overall']

                                # Pivot all metrics
                                pivot_bleu = domain_bleu.pivot(
                                    index='comparison_pair',
                                    columns='domain',
                                    values='bleu_score'
                                ).fillna(0)

                                pivot_chrf = domain_chrf.pivot(
                                    index='comparison_pair',
                                    columns='domain',
                                    values='chrf_score'
                                ).fillna(0)

                                pivot_comet = domain_comet.pivot(
                                    index='comparison_pair',
                                    columns='domain',
                                    values='comet_score'
                                ).fillna(0)

                                # Ensure domains are in the same order for all metrics
                                domains = sorted(set(pivot_bleu.columns) | set(pivot_chrf.columns) | set(pivot_comet.columns))
                                pivot_bleu = pivot_bleu.reindex(columns=domains, fill_value=0)
                                pivot_chrf = pivot_chrf.reindex(columns=domains, fill_value=0)
                                pivot_comet = pivot_comet.reindex(columns=domains, fill_value=0)

                                # Define distinct colors with reduced opacity
                                distinct_colors = [
                                    'rgba(255, 99, 132, 0.4)',   # Red
                                    'rgba(54, 162, 235, 0.4)',   # Blue
                                    'rgba(99, 255, 132, 0.4)',   # Green
                                    'rgba(75, 192, 192, 0.4)',   # Teal
                                    'rgba(255, 205, 86, 0.4)',   # Yellow
                                    'rgba(153, 102, 255, 0.4)',  # Purple
                                    'rgba(255, 159, 64, 0.4)',   # Orange
                                    'rgba(199, 199, 199, 0.4)',  # Grey
                                    'rgba(83, 102, 255, 0.4)',   # Indigo
                                    'rgba(255, 99, 255, 0.4)',   # Magenta
                                ]
                                
                                # Border colors (same colors but full opacity for borders)
                                border_colors = [
                                    'rgba(255, 99, 132, 1.0)',   # Red
                                    'rgba(54, 162, 235, 1.0)',   # Blue  
                                    'rgba(99, 255, 132, 1.0)',   # Green
                                    'rgba(75, 192, 192, 1.0)',   # Teal
                                    'rgba(255, 205, 86, 1.0)',   # Yellow
                                    'rgba(153, 102, 255, 1.0)',  # Purple
                                    'rgba(255, 159, 64, 1.0)',   # Orange
                                    'rgba(199, 199, 199, 1.0)',  # Grey
                                    'rgba(83, 102, 255, 1.0)',   # Indigo
                                    'rgba(255, 99, 255, 1.0)',   # Magenta
                                ]

                                # Layout for three side-by-side spider charts
                                spider_col1, spider_col2, spider_col3 = st.columns(3)

                                # ---------------- CHRF SPIDER ----------------
                                with spider_col1:
                                    fig_chrf_spider = go.Figure()
                                    for i, (model_name, row) in enumerate(pivot_chrf.iterrows()):
                                        color_idx = i % len(distinct_colors)
                                        fig_chrf_spider.add_trace(go.Scatterpolar(
                                            r=row.tolist() + [row.tolist()[0]],  # close loop
                                            theta=domains + [domains[0]],
                                            fill='toself',
                                            name=model_name.split('_')[-1].upper(),
                                            fillcolor=distinct_colors[color_idx],
                                            line=dict(color=border_colors[color_idx], width=2),
                                            opacity=0.7,
                                            showlegend=False  # Hide legend on first chart
                                        ))
                                    fig_chrf_spider.update_layout(
                                        polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
                                        showlegend=False,
                                        title=dict(text=f"Domain Performance (chrF) - {selected_lang}"),
                                        height=450
                                    )
                                    st.plotly_chart(fig_chrf_spider, use_container_width=True)

                                # ---------------- BLEU SPIDER ----------------
                                with spider_col2:
                                    fig_bleu_spider = go.Figure()
                                    for i, (model_name, row) in enumerate(pivot_bleu.iterrows()):
                                        color_idx = i % len(distinct_colors)
                                        fig_bleu_spider.add_trace(go.Scatterpolar(
                                            r=row.tolist() + [row.tolist()[0]],  # close loop
                                            theta=domains + [domains[0]],
                                            fill='toself',
                                            name=model_name.split('_')[-1].upper(),
                                            fillcolor=distinct_colors[color_idx],
                                            line=dict(color=border_colors[color_idx], width=2),
                                            opacity=0.7,
                                            showlegend=True  # Show legend on middle chart
                                        ))
                                    fig_bleu_spider.update_layout(
                                        polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
                                        showlegend=True,
                                        title=dict(text=f"Domain Performance (BLEU) - {selected_lang}"),
                                        height=450,
                                        legend=dict(
                                            orientation="h",
                                            yanchor="bottom",
                                            y=-0.3,
                                            xanchor="center",
                                            x=0.5
                                        )
                                    )
                                    st.plotly_chart(fig_bleu_spider, use_container_width=True)

                                # ---------------- COMET SPIDER ----------------
                                with spider_col3:
                                    fig_comet_spider = go.Figure()
                                    for i, (model_name, row) in enumerate(pivot_comet.iterrows()):
                                        color_idx = i % len(distinct_colors)
                                        fig_comet_spider.add_trace(go.Scatterpolar(
                                            r=row.tolist() + [row.tolist()[0]],  # close loop
                                            theta=domains + [domains[0]],
                                            fill='toself',
                                            name=model_name.split('_')[-1].upper(),
                                            fillcolor=distinct_colors[color_idx],
                                            line=dict(color=border_colors[color_idx], width=2),
                                            opacity=0.7,
                                            showlegend=False  # Hide legend on last chart
                                        ))
                                    fig_comet_spider.update_layout(
                                        polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
                                        showlegend=False,
                                        title=dict(text=f"Domain Performance (COMET) - {selected_lang}"),
                                        height=450
                                    )
                                    st.plotly_chart(fig_comet_spider, use_container_width=True)
                            
                            # # Overall Performance Summary
                            # st.markdown(""" 
                            # <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
                            #     📋 Overall Performance Summary
                            # </h4>
                            # """, unsafe_allow_html=True)
                            
                            # # Create overall summary table
                            # if len(display_bleu) > 0 and len(display_chrf) > 0 and len(display_comet) > 0:
                            #     # Merge all three metrics
                            #     merged_scores = pd.merge(display_bleu, display_chrf, on='comparison_pair', suffixes=('_bleu', '_chrf'))
                            #     merged_scores = pd.merge(merged_scores, display_comet, on='comparison_pair')
                            #     merged_scores['model'] = merged_scores['comparison_pair'].apply(lambda x: x.split('_')[-1].upper())
                                
                            #     summary_data = []
                            #     for _, row in merged_scores.iterrows():
                            #         summary_data.append({
                            #             'Model': row['model'],
                            #             'BLEU Score': f"{row['bleu_score']:.3f}",
                            #             'chrF Score': f"{row['chrf_score']:.3f}",
                            #             'COMET Score': f"{row['comet_score']:.3f}",
                            #             'Average': f"{(row['bleu_score'] + row['chrf_score'] + row['comet_score']) / 3:.3f}"
                            #         })

                            #     summary_df = pd.DataFrame(summary_data)

                            #     # Only sort if dataframe has data and 'Average' column exists
                            #     if len(summary_df) > 0 and 'Average' in summary_df.columns:
                            #         summary_df = summary_df.sort_values('Average', ascending=False)
                                
                            #     # Style the dataframe
                            #     st.dataframe(
                            #         summary_df,
                            #         use_container_width=True,
                            #         hide_index=True,
                            #         column_config={
                            #             "Model": st.column_config.TextColumn("Model", width="medium"),
                            #             "BLEU Score": st.column_config.NumberColumn("BLEU Score", format="%.3f"),
                            #             "chrF Score": st.column_config.NumberColumn("chrF Score", format="%.3f"),
                            #             "COMET Score": st.column_config.NumberColumn("COMET Score", format="%.3f"),
                            #             "Average": st.column_config.NumberColumn("Average", format="%.3f")
                            #         }
                            #     )
                
                with analysis_tab3:
                    # Revision Analysis Tab
                    st.markdown("""
                    <div style="margin: 20px 0;">
                        <h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
                            ✏️ Human Translation Revision Analysis for {selected_lang}
                        </h4>
                    </div>
                    """.format(selected_lang=selected_lang), unsafe_allow_html=True)
                    
                    # Use the global language selection
                    rev_code = code
                    
                    # Check for revision columns
                    human_col = f"{rev_code}_human"
                    revised_col = f"{rev_code}_revised"
                    
                    if human_col in df_translations.columns and revised_col in df_translations.columns:
                        # Get all rows with human translations for this language
                        df_lang_data = df_translations[[human_col, revised_col]].copy()
                        
                        # Remove rows where human translation is missing (can't analyze revisions without original)
                        df_lang_data = df_lang_data[df_lang_data[human_col].notna()].copy()
                        
                        total_human_translations = len(df_lang_data)
                        
                        if total_human_translations == 0:
                            st.warning(f"⚠️ No human translations found for {selected_lang}")
                        else:
                            # Calculate revision statistics
                            # For missing revised translations, we assume no revision was made (same as original)
                            df_lang_data[revised_col] = df_lang_data[revised_col].fillna(df_lang_data[human_col])
                            
                            # Count actual changes
                            revisions_made = sum(df_lang_data[human_col] != df_lang_data[revised_col])
                            revision_rate = (revisions_made / total_human_translations) * 100
                            
                            # Count how many had revision data available
                            revisions_available = sum(df_translations[revised_col].notna())
                            
                            # Calculate revision types
                            def categorize_revision(original, revised):
                                if pd.isna(original) or pd.isna(revised):
                                    return "Missing Data"
                                if str(original).strip() == str(revised).strip():
                                    return "No Change"
                                
                                orig_words = str(original).lower().split()
                                rev_words = str(revised).lower().split()
                                
                                if len(rev_words) > len(orig_words):
                                    return "Expansion"
                                elif len(rev_words) < len(orig_words):
                                    return "Reduction"
                                else:
                                    return "Modification"
                            
                            df_lang_data['revision_type'] = df_lang_data.apply(
                                lambda row: categorize_revision(row[human_col], row[revised_col]), axis=1
                            )
                            
                            # Revision statistics cards
                            rev_col1, rev_col2, rev_col3, rev_col4 = st.columns(4)
                            
                            with rev_col1:
                                st.markdown(f"""
                                <div class="metric-card">
                                    <div class="metric-title">Human Translations</div>
                                    <div class="metric-value">{total_human_translations}</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            with rev_col2:
                                st.markdown(f"""
                                <div class="metric-card">
                                    <div class="metric-title">Revisions Available</div>
                                    <div class="metric-value">{revisions_available}</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            with rev_col3:
                                st.markdown(f"""
                                <div class="metric-card">
                                    <div class="metric-title">Changes Made</div>
                                    <div class="metric-value">{revisions_made}</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            with rev_col4:
                                st.markdown(f"""
                                <div class="metric-card">
                                    <div class="metric-title">Revision Rate</div>
                                    <div class="metric-value">{revision_rate:.1f}%</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            # Revision type analysis
                            st.markdown("""
                            <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
                                📈 Revision Pattern Analysis
                            </h4>
                            """, unsafe_allow_html=True)
                            
                            revision_counts = df_lang_data['revision_type'].value_counts()
                            
                            if len(revision_counts) > 0:
                                # Create revision type charts
                                rev_chart_col1, rev_chart_col2 = st.columns(2)
                                
                                with rev_chart_col1:
                                    # Pie chart of revision types
                                    fig_pie = px.pie(
                                        values=revision_counts.values,
                                        names=revision_counts.index,
                                        title=f"Revision Types Distribution",
                                        color_discrete_sequence=px.colors.qualitative.Set3
                                    )
                                    fig_pie.update_layout(height=400, font=dict(family="Inter", size=12))
                                    st.plotly_chart(fig_pie, use_container_width=True)
                                
                                with rev_chart_col2:
                                    # Bar chart of revision types
                                    fig_bar = px.bar(
                                        x=revision_counts.values,
                                        y=revision_counts.index,
                                        orientation='h',
                                        title=f"Revision Frequency",
                                        color=revision_counts.values,
                                        color_continuous_scale='viridis'
                                    )
                                    fig_bar.update_layout(
                                        height=400,
                                        xaxis_title="Count",
                                        yaxis_title="Revision Type",
                                        font=dict(family="Inter", size=12)
                                    )
                                    st.plotly_chart(fig_bar, use_container_width=True)
                                
                                # Word-level revision analysis
                                st.markdown("""
                                <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
                                    🔤 Word-Level Changes Analysis
                                </h4>
                                """, unsafe_allow_html=True)
                                
                                # Calculate word changes only for actual revisions
                                words_added = []
                                words_removed = []
                                
                                changed_revisions = df_lang_data[df_lang_data['revision_type'] != 'No Change']
                                
                                for _, row in changed_revisions.iterrows():
                                    if pd.notna(row[human_col]) and pd.notna(row[revised_col]):
                                        orig_words = set(str(row[human_col]).lower().split())
                                        rev_words = set(str(row[revised_col]).lower().split())
                                        
                                        added = rev_words - orig_words
                                        removed = orig_words - rev_words
                                        
                                        words_added.extend(list(added))
                                        words_removed.extend(list(removed))
                                
                                from collections import Counter
                                added_counts = Counter(words_added)
                                removed_counts = Counter(words_removed)
                                
                                word_analysis_col1, word_analysis_col2 = st.columns(2)
                                
                                with word_analysis_col1:
                                    st.markdown("**🟢 Most Added Words**")
                                    if added_counts:
                                        top_added = dict(added_counts.most_common(15))
                                        
                                        # Create horizontal bar chart for added words
                                        fig_added = px.bar(
                                            x=list(top_added.values()),
                                            y=list(top_added.keys()),
                                            orientation='h',
                                            title="Most Frequently Added Words",
                                            color=list(top_added.values()),
                                            color_continuous_scale='Greens'
                                        )
                                        fig_added.update_layout(
                                            height=400,
                                            xaxis_title="Frequency",
                                            yaxis_title="Words",
                                            font=dict(family="Inter", size=10)
                                        )
                                        st.plotly_chart(fig_added, use_container_width=True)
                                    else:
                                        st.markdown("*No words added in revisions*")
                                
                                with word_analysis_col2:
                                    st.markdown("**🔴 Most Removed Words**")
                                    if removed_counts:
                                        top_removed = dict(removed_counts.most_common(15))
                                        
                                        # Create horizontal bar chart for removed words
                                        fig_removed = px.bar(
                                            x=list(top_removed.values()),
                                            y=list(top_removed.keys()),
                                            orientation='h',
                                            title="Most Frequently Removed Words",
                                            color=list(top_removed.values()),
                                            color_continuous_scale='Reds'
                                        )
                                        fig_removed.update_layout(
                                            height=400,
                                            xaxis_title="Frequency",
                                            yaxis_title="Words",
                                            font=dict(family="Inter", size=10)
                                        )
                                        st.plotly_chart(fig_removed, use_container_width=True)
                                    else:
                                        st.markdown("*No words removed in revisions*")
                                
                                # Revision examples
                                st.markdown("""
                                <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
                                    📝 Revision Examples
                                </h4>
                                """, unsafe_allow_html=True)
                                
                                # Show examples of different types of revisions
                                revision_examples = changed_revisions.head(10)
                                if len(revision_examples) > 0:
                                    
                                    # Create tabs for different revision types
                                    available_types = revision_examples['revision_type'].unique()
                                    if len(available_types) > 1:
                                        type_tabs = st.tabs([f"{rtype} ({len(revision_examples[revision_examples['revision_type'] == rtype])})" 
                                                        for rtype in available_types])
                                        
                                        for i, rtype in enumerate(available_types):
                                            with type_tabs[i]:
                                                type_examples = revision_examples[revision_examples['revision_type'] == rtype].head(5)
                                                for idx, row in type_examples.iterrows():
                                                    st.markdown(f"""
                                                    <div style="background: #f8fafc; border-left: 4px solid #3b82f6; padding: 16px; margin: 10px 0; border-radius: 0 8px 8px 0;">
                                                        <div style="font-weight: 600; color: #1e40af; margin-bottom: 8px;">Original:</div>
                                                        <div style="margin-bottom: 12px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[human_col]}</div>
                                                        <div style="font-weight: 600; color: #059669; margin-bottom: 8px;">Revised:</div>
                                                        <div style="margin-bottom: 8px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[revised_col]}</div>
                                                        <div style="font-size: 0.875rem; color: #6b7280;">Type: <strong>{row['revision_type']}</strong></div>
                                                    </div>
                                                    """, unsafe_allow_html=True)
                                    else:
                                        # Single type, show directly
                                        for idx, row in revision_examples.iterrows():
                                            st.markdown(f"""
                                            <div style="background: #f8fafc; border-left: 4px solid #3b82f6; padding: 16px; margin: 10px 0; border-radius: 0 8px 8px 0;">
                                                <div style="font-weight: 600; color: #1e40af; margin-bottom: 8px;">Original:</div>
                                                <div style="margin-bottom: 12px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[human_col]}</div>
                                                <div style="font-weight: 600; color: #059669; margin-bottom: 8px;">Revised:</div>
                                                <div style="margin-bottom: 8px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[revised_col]}</div>
                                                <div style="font-size: 0.875rem; color: #6b7280;">Type: <strong>{row['revision_type']}</strong></div>
                                            </div>
                                            """, unsafe_allow_html=True)
                                else:
                                    st.info(f"No revisions found for {selected_lang}.")
                            else:
                                st.info(f"No revision data available for analysis.")
                    
                    else:
                        st.warning(f"⚠️ Revision columns not found for {selected_lang}. Expected columns: `{human_col}` and `{revised_col}`")
                
                with analysis_tab4:
                    # Translation comparison section
                    st.markdown("""
                    <div style="margin: 20px 0;">
                        <h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
                            🔍 Translation Comparison & Word Analysis for {selected_lang}
                        </h4>
                    </div>
                    """.format(selected_lang=selected_lang), unsafe_allow_html=True)
                    
                    # Use the global language selection
                    comp_code = code
                    
                    # Get available translation columns for selected language
                    available_cols = []
                    for col in df_translations.columns:
                        if col.startswith(comp_code) and col != 'english':
                            available_cols.append(col)
                    
                    if len(available_cols) >= 2:
                        comp_col1, comp_col2, comp_col3 = st.columns([1, 1, 1])
                        
                        with comp_col1:
                            col1_selection = st.selectbox(
                                "First Translation:",
                                available_cols,
                                key="col1_select"
                            )
                        
                        with comp_col2:
                            col2_selection = st.selectbox(
                                "Second Translation:",
                                [col for col in available_cols if col != col1_selection],
                                key="col2_select"
                            )
                        
                        with comp_col3:
                            # Add spacing to align button with selectboxes
                            st.markdown('<div style="margin-top: 25px;"></div>', unsafe_allow_html=True)
                            analyze_clicked = st.button(
                                "🔍 Analyze",
                                type="primary", 
                                use_container_width=True,
                                key="analyze_word_diff_btn"
                            )
                        
                        if analyze_clicked:
                            # Perform word analysis with ALL available data
                            def get_word_differences(text1, text2):
                                # Handle missing data by using available text
                                if pd.isna(text1) and pd.isna(text2):
                                    return set(), set(), set()
                                
                                # If one is missing, treat it as empty for comparison
                                words1 = set(str(text1).lower().split()) if pd.notna(text1) else set()
                                words2 = set(str(text2).lower().split()) if pd.notna(text2) else set()
                                
                                only_in_1 = words1 - words2
                                only_in_2 = words2 - words1
                                common = words1 & words2
                                
                                return only_in_1, only_in_2, common
                            
                            # Analyze ALL rows with available data
                            unique_words_1 = []
                            unique_words_2 = []
                            common_words = []
                            all_words_1 = []  # For frequency counting
                            all_words_2 = []  # For frequency counting
                            
                            # Process all rows, including those with missing revisions
                            for _, row in df_translations.iterrows():
                                # Get text from columns, using original if revision is missing
                                text1 = row[col1_selection] if pd.notna(row[col1_selection]) else None
                                text2 = row[col2_selection] if pd.notna(row[col2_selection]) else None
                                
                                # Skip if both are missing
                                if text1 is None and text2 is None:
                                    continue
                                
                                # Collect ALL words from each column for frequency analysis
                                if text1 is not None:
                                    words_from_1 = str(text1).lower().split()
                                    all_words_1.extend(words_from_1)
                                
                                if text2 is not None:
                                    words_from_2 = str(text2).lower().split()
                                    all_words_2.extend(words_from_2)
                                
                                # Only do comparison if both texts exist
                                if text1 is not None and text2 is not None:
                                    only_1, only_2, common = get_word_differences(text1, text2)
                                    unique_words_1.extend(list(only_1))
                                    unique_words_2.extend(list(only_2))
                                    common_words.extend(list(common))
                            
                            from collections import Counter
                            
                            # Count frequencies from ALL words
                            all_freq_1 = Counter(all_words_1)  # All words from column 1
                            all_freq_2 = Counter(all_words_2)  # All words from column 2
                            unique_freq_1 = Counter(unique_words_1)  # Only unique words
                            unique_freq_2 = Counter(unique_words_2)  # Only unique words
                            common_freq = Counter(common_words)  # Only common words
                            
                            # Display statistics
                            st.markdown('<div class="comparison-container">', unsafe_allow_html=True)
                            
                            col_result1, col_result2, col_result3, col_result4 = st.columns(4)
                            
                            with col_result1:
                                st.markdown(f"""
                                <div style="text-align: center; padding: 15px;">
                                    <h5 style="color: #dc2626; margin-bottom: 10px;">Unique to {col1_selection.replace('_', ' ').title()}</h5>
                                    <div style="font-size: 1.3rem; font-weight: bold; color: #dc2626;">{len(unique_freq_1)}</div>
                                    <div style="color: #6b7280; font-size: 0.8rem;">unique words</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            with col_result2:
                                st.markdown(f"""
                                <div style="text-align: center; padding: 15px;">
                                    <h5 style="color: #166534; margin-bottom: 10px;">Unique to {col2_selection.replace('_', ' ').title()}</h5>
                                    <div style="font-size: 1.3rem; font-weight: bold; color: #166534;">{len(unique_freq_2)}</div>
                                    <div style="color: #6b7280; font-size: 0.8rem;">unique words</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            with col_result3:
                                st.markdown(f"""
                                <div style="text-align: center; padding: 15px;">
                                    <h5 style="color: #475569; margin-bottom: 10px;">Common Words</h5>
                                    <div style="font-size: 1.3rem; font-weight: bold; color: #475569;">{len(common_freq)}</div>
                                    <div style="color: #6b7280; font-size: 0.8rem;">shared words</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            with col_result4:
                                st.markdown(f"""
                                <div style="text-align: center; padding: 15px;">
                                    <h5 style="color: #7c3aed; margin-bottom: 10px;">Total Vocabulary</h5>
                                    <div style="font-size: 1.3rem; font-weight: bold; color: #7c3aed;">{len(set(all_words_1 + all_words_2))}</div>
                                    <div style="color: #6b7280; font-size: 0.8rem;">total unique words</div>
                                </div>
                                """, unsafe_allow_html=True)
                            
                            st.markdown('</div>', unsafe_allow_html=True)
                            
                            # Word Clouds Section
                            st.markdown("""
                            <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
                                ☁️ Word Clouds Visualization
                            </h4>
                            """, unsafe_allow_html=True)
                            
                            # Generate word clouds using matplotlib and wordcloud
                            try:
                                # Show loading spinner while generating word clouds
                                with st.spinner("🎨 Generating word clouds... This may take a moment."):
                                    import matplotlib.pyplot as plt
                                    from wordcloud import WordCloud
                                    import io
                                    import base64
                                    
                                    # Function to create word cloud image (optimized)
                                    def create_wordcloud_image(word_freq, title, color_scheme='viridis'):
                                        if not word_freq or len(word_freq) == 0:
                                            return None
                                        
                                        try:
                                            # Create word cloud with all frequency data, but limit max_words to 25
                                            wordcloud = WordCloud(
                                                width=300,  # Reduced size
                                                height=200,  # Reduced size
                                                background_color='white',
                                                colormap=color_scheme,
                                                max_words=25,  # Display top 25 words
                                                relative_scaling=0.6,
                                                random_state=42,
                                                min_font_size=8,
                                                max_font_size=60,
                                                prefer_horizontal=0.9,
                                                collocations=False  # Avoid word combinations
                                            ).generate_from_frequencies(word_freq)  # Use ALL frequency data
                                            
                                            # Create matplotlib figure with smaller size
                                            fig, ax = plt.subplots(figsize=(5, 3))  # Smaller figure
                                            ax.imshow(wordcloud, interpolation='bilinear')
                                            ax.axis('off')
                                            ax.set_title(title, fontsize=10, fontweight='bold', pad=10)
                                            
                                            # Convert to base64 for HTML display
                                            buffer = io.BytesIO()
                                            plt.savefig(buffer, format='png', bbox_inches='tight', dpi=100, facecolor='white')  # Lower DPI
                                            buffer.seek(0)
                                            image_base64 = base64.b64encode(buffer.getvalue()).decode()
                                            plt.close(fig)  # Important: close figure to free memory
                                            
                                            return image_base64
                                        except Exception as e:
                                            st.warning(f"Error creating word cloud for {title}: {str(e)}")
                                            return None
                                    
                                    # Create all word clouds in one row
                                    cloud_col1, cloud_col2, cloud_col3 = st.columns(3)
                                    
                                    with cloud_col1:
                                        if unique_freq_1 and len(unique_freq_1) > 0:
                                            # Use ALL unique words but display top 25 in cloud
                                            img1 = create_wordcloud_image(
                                                dict(unique_freq_1),  # Use ALL unique words for frequency
                                                f"Unique: {col1_selection.replace('_', ' ').title()}", 
                                                'Reds'
                                            )
                                            if img1:
                                                st.markdown(f'''
                                                <div style="text-align: center; margin: 10px 0;">
                                                    <img src="data:image/png;base64,{img1}" style="max-width: 100%; height: auto; border-radius: 6px; box-shadow: 0 1px 4px rgba(0,0,0,0.1);">
                                                </div>
                                                <div style="text-align: center; font-size: 0.8rem; color: #6b7280;">
                                                    Showing top 25 of {len(unique_freq_1)} unique words
                                                </div>
                                                ''', unsafe_allow_html=True)
                                            else:
                                                st.markdown("""
                                                <div style="text-align: center; padding: 40px; background: #fef2f2; border-radius: 6px; color: #dc2626;">
                                                    <div style="font-size: 2rem;">📝</div>
                                                    <div style="font-size: 0.9rem; margin-top: 8px;">No unique words</div>
                                                </div>
                                                """, unsafe_allow_html=True)
                                        else:
                                            st.markdown("""
                                            <div style="text-align: center; padding: 40px; background: #f9fafb; border-radius: 6px; color: #6b7280;">
                                                <div style="font-size: 2rem;">📝</div>
                                                <div style="font-size: 0.9rem; margin-top: 8px;">No unique words found</div>
                                            </div>
                                            """, unsafe_allow_html=True)
                                    
                                    with cloud_col2:
                                        if unique_freq_2 and len(unique_freq_2) > 0:
                                            # Use ALL unique words but display top 25 in cloud
                                            img2 = create_wordcloud_image(
                                                dict(unique_freq_2),  # Use ALL unique words for frequency
                                                f"Unique: {col2_selection.replace('_', ' ').title()}", 
                                                'Greens'
                                            )
                                            if img2:
                                                st.markdown(f'''
                                                <div style="text-align: center; margin: 10px 0;">
                                                    <img src="data:image/png;base64,{img2}" style="max-width: 100%; height: auto; border-radius: 6px; box-shadow: 0 1px 4px rgba(0,0,0,0.1);">
                                                </div>
                                                <div style="text-align: center; font-size: 0.8rem; color: #6b7280;">
                                                    Showing top 25 of {len(unique_freq_2)} unique words
                                                </div>
                                                ''', unsafe_allow_html=True)
                                            else:
                                                st.markdown("""
                                                <div style="text-align: center; padding: 40px; background: #f0fdf4; border-radius: 6px; color: #166534;">
                                                    <div style="font-size: 2rem;">📝</div>
                                                    <div style="font-size: 0.9rem; margin-top: 8px;">No unique words</div>
                                                </div>
                                                """, unsafe_allow_html=True)
                                        else:
                                            st.markdown("""
                                            <div style="text-align: center; padding: 40px; background: #f9fafb; border-radius: 6px; color: #6b7280;">
                                                <div style="font-size: 2rem;">📝</div>
                                                <div style="font-size: 0.9rem; margin-top: 8px;">No unique words found</div>
                                            </div>
                                            """, unsafe_allow_html=True)
                                    
                                    with cloud_col3:
                                        if common_freq and len(common_freq) > 0:
                                            # Use ALL common words but display top 25 in cloud
                                            img3 = create_wordcloud_image(
                                                dict(common_freq),  # Use ALL common words for frequency
                                                "Common Words", 
                                                'Blues'
                                            )
                                            if img3:
                                                st.markdown(f'''
                                                <div style="text-align: center; margin: 10px 0;">
                                                    <img src="data:image/png;base64,{img3}" style="max-width: 100%; height: auto; border-radius: 6px; box-shadow: 0 1px 4px rgba(0,0,0,0.1);">
                                                </div>
                                                <div style="text-align: center; font-size: 0.8rem; color: #6b7280;">
                                                    Showing top 25 of {len(common_freq)} common words
                                                </div>
                                                ''', unsafe_allow_html=True)
                                            else:
                                                st.markdown("""
                                                <div style="text-align: center; padding: 40px; background: #eff6ff; border-radius: 6px; color: #1d4ed8;">
                                                    <div style="font-size: 2rem;">📝</div>
                                                    <div style="font-size: 0.9rem; margin-top: 8px;">No common words</div>
                                                </div>
                                                """, unsafe_allow_html=True)
                                        else:
                                            st.markdown("""
                                            <div style="text-align: center; padding: 40px; background: #f9fafb; border-radius: 6px; color: #6b7280;">
                                                <div style="font-size: 2rem;">🤝</div>
                                                <div style="font-size: 0.9rem; margin-top: 8px;">No common words found</div>
                                            </div>
                                            """, unsafe_allow_html=True)
                                
                            except ImportError:
                                st.warning("📦 WordCloud library not available. Install with: `pip install wordcloud`")
                                
                                # Fallback to top words lists
                                st.markdown("**📋 Top Unique Words (Fallback)**")
                                
                                fallback_col1, fallback_col2, fallback_col3 = st.columns(3)
                                
                                with fallback_col1:
                                    st.markdown(f"**🔴 Unique to {col1_selection.replace('_', ' ').title()}**")
                                    if unique_freq_1:
                                        for word, count in unique_freq_1.most_common(10):
                                            st.markdown(f"• {word} ({count})")
                                    else:
                                        st.markdown("*No unique words*")
                                
                                with fallback_col2:
                                    st.markdown(f"**🟢 Unique to {col2_selection.replace('_', ' ').title()}**")
                                    if unique_freq_2:
                                        for word, count in unique_freq_2.most_common(10):
                                            st.markdown(f"• {word} ({count})")
                                    else:
                                        st.markdown("*No unique words*")
                                
                                with fallback_col3:
                                    st.markdown("**🔵 Common Words**")
                                    if common_freq:
                                        for word, count in common_freq.most_common(10):
                                            st.markdown(f"• {word} ({count})")
                                    else:
                                        st.markdown("*No common words*")
                            
                            # Word frequency bar charts as additional analysis
                            st.markdown("""
                            <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
                                📊 Top Words Frequency Comparison
                            </h4>
                            """, unsafe_allow_html=True)
                            
                            freq_col1, freq_col2 = st.columns(2)
                            
                            with freq_col1:
                                if unique_freq_1:
                                    top_words_1 = dict(unique_freq_1.most_common(10))
                                    fig_freq1 = px.bar(
                                        x=list(top_words_1.values()),
                                        y=list(top_words_1.keys()),
                                        orientation='h',
                                        title=f"Top Unique Words: {col1_selection.replace('_', ' ').title()}",
                                        color=list(top_words_1.values()),
                                        color_continuous_scale='Reds'
                                    )
                                    fig_freq1.update_layout(
                                        height=400, 
                                        xaxis_title="Frequency",
                                        yaxis_title="Words",
                                        font=dict(family="Inter", size=10)
                                    )
                                    st.plotly_chart(fig_freq1, use_container_width=True)
                            
                            with freq_col2:
                                if unique_freq_2:
                                    top_words_2 = dict(unique_freq_2.most_common(10))
                                    fig_freq2 = px.bar(
                                        x=list(top_words_2.values()),
                                        y=list(top_words_2.keys()),
                                        orientation='h',
                                        title=f"Top Unique Words: {col2_selection.replace('_', ' ').title()}",
                                        color=list(top_words_2.values()),
                                        color_continuous_scale='Greens'
                                    )
                                    fig_freq2.update_layout(
                                        height=400,
                                        xaxis_title="Frequency", 
                                        yaxis_title="Words",
                                        font=dict(family="Inter", size=10)
                                    )
                                    st.plotly_chart(fig_freq2, use_container_width=True)
                    else:
                        st.warning("⚠️ Need at least 2 translation columns for comparison analysis.")
                
            else:
                st.markdown("""
                <div style="background: #fef2f2; border: 1px solid #fecaca; border-radius: 8px; padding: 24px; margin: 16px 0; text-align: center;">
                    <h3 style="font-family: 'Inter', sans-serif; color: #dc2626; margin: 0 0 12px 0;">❌ No Data Available</h3>
                    <p style="font-family: 'Inter', sans-serif; color: #7f1d1d; margin: 0;">
                        Please ensure translation data files are available in the data directory.
                    </p>
                </div>
                """, unsafe_allow_html=True)
    
    # Footer
    st.markdown("---")
    st.markdown("""
    <div style="text-align: center; color: #6b7280; font-family: 'Inter', sans-serif; font-size: 0.875rem;">
        Built for DSFSI using Streamlit • Translation APIs: Gemini, GPT, NLLB (hosted locally) • Data Science for Social Impact
    </div>
    """, unsafe_allow_html=True)

if __name__ == "__main__":
    main()