Spaces:

dsfsi
/

UPTranslate

Sleeping

App Files Files Community

UPTranslate / src /streamlit_app.py

abumafrim

first commit

4a4996a 3 months ago

raw

history blame contribute delete

112 kB

	import streamlit as st
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import os
	import time
	from PIL import Image

	# Only import APIs if available
	try:
	from google import genai
	GENAI_AVAILABLE = True
	except ImportError:
	GENAI_AVAILABLE = False

	try:
	from openai import OpenAI
	OPENAI_AVAILABLE = True
	except ImportError:
	OPENAI_AVAILABLE = False

	BASE_DIR = os.path.dirname(__file__)
	DATA_DIR = os.path.join(BASE_DIR, "data")

	# Page configuration
	st.set_page_config(
	page_title="Translation Comparison Tool",
	page_icon="🌐",
	layout="wide",
	initial_sidebar_state="collapsed"
	)

	# Custom CSS for Material Design with Tailwind-inspired styling
	st.markdown("""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');

	.main-header {
	font-family: 'Inter', sans-serif;
	font-size: 1.8rem;
	font-weight: 600;
	color: #1f2937;
	margin-bottom: 0.5rem;
	letter-spacing: -0.025em;
	text-align: center;
	}

	.sub-header {
	font-family: 'Inter', sans-serif;
	font-size: 1.1rem;
	font-weight: 400;
	color: #6b7280;
	margin-bottom: 2rem;
	line-height: 1.6;
	text-align: center;
	}

	.logo-container {
	display: flex;
	justify-content: center;
	margin-bottom: 2rem;
	}

	/* Bold and full-width tabs */
	.stTabs [data-baseweb="tab-list"] {
	gap: 0px;
	width: 100%;
	}

	.stTabs [data-baseweb="tab"] {
	font-family: 'Inter', sans-serif !important;
	font-size: 1.1rem !important;
	font-weight: 600 !important;
	padding: 12px 24px !important;
	width: 50% !important;
	justify-content: center !important;
	border-radius: 0 !important;
	background-color: #f8f9fa !important;
	color: #374151 !important;
	border: 1px solid #e5e7eb !important;
	margin: 0 !important;
	}

	.stTabs [data-baseweb="tab"]:hover {
	background-color: #f1f3f4 !important;
	color: #1f2937 !important;
	}

	.stTabs [aria-selected="true"] {
	background-color: #3b82f6 !important;
	color: white !important;
	font-weight: 700 !important;
	border-color: #3b82f6 !important;
	}

	.stTabs [data-baseweb="tab-highlight"] {
	display: none !important;
	}

	.stTabs [data-baseweb="tab-border"] {
	display: none !important;
	}

	.tab-header {
	font-family: 'Inter', sans-serif;
	font-size: 1.5rem;
	font-weight: 600;
	color: #374151;
	margin-bottom: 1rem;
	}

	.metric-card {
	background: #f9fafb;
	border: 1px solid #e5e7eb;
	border-radius: 0.75rem;
	padding: 1.5rem;
	margin: 0.5rem 0;
	box-shadow: 0 1px 3px 0 rgba(0, 0, 0, 0.1);
	}

	.metric-title {
	font-family: 'Inter', sans-serif;
	font-size: 0.875rem;
	font-weight: 500;
	color: #6b7280;
	text-transform: uppercase;
	letter-spacing: 0.05em;
	margin-bottom: 0.25rem;
	}

	.metric-value {
	font-family: 'Inter', sans-serif;
	font-size: 2rem;
	font-weight: 700;
	color: #1f2937;
	line-height: 1;
	}

	.support-info {
	color: #5f6368;
	font-size: 12px;
	margin-top: 20px;
	text-align: center;
	font-family: 'Inter', sans-serif;
	}

	.translate-container {
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	margin: 20px 0;
	overflow: hidden;
	box-shadow: 0 2px 5px rgba(0,0,0,0.1);
	}

	.translate-header {
	background: #f8f9fa;
	border-bottom: 1px solid #e0e0e0;
	padding: 12px 16px;
	font-family: 'Inter', sans-serif;
	font-weight: 500;
	font-size: 14px;
	color: #5f6368;
	display: flex;
	align-items: center;
	box-sizing: border-box;
	}

	.language-tabs-container {
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	margin: 20px 0;
	overflow: hidden;
	box-shadow: 0 2px 5px rgba(0,0,0,0.1);
	}

	.language-tabs-header {
	background: #f8f9fa;
	border-bottom: 1px solid #e0e0e0;
	height: 45px;
	display: flex;
	align-items: stretch;
	box-sizing: border-box;
	padding: 0;
	}

	.language-tab {
	flex: 1;
	background: #f8f9fa;
	border: none;
	border-right: 1px solid #e0e0e0;
	padding: 0;
	font-family: 'Inter', sans-serif;
	font-size: 14px;
	font-weight: 500;
	cursor: pointer;
	transition: all 0.2s ease;
	color: #6b7280;
	text-align: center;
	height: 45px;
	display: flex;
	align-items: center;
	justify-content: center;
	box-sizing: border-box;
	text-decoration: none;
	outline: none;
	}

	.language-tab:last-child {
	border-right: none;
	}

	.language-tab.active {
	background: white;
	color: #3b82f6;
	border-bottom: 2px solid #3b82f6;
	font-weight: 600;
	}

	.language-tab:hover:not(.active) {
	background: #f1f3f4;
	color: #374151;
	}

	.stTextArea textarea {
	resize: none !important;
	min-height: 350px !important;
	max-height: 350px !important;
	height: 350px !important;
	}

	.stTextArea textarea[disabled] {
	color: #000000 !important;
	opacity: 1 !important;
	-webkit-text-fill-color: #000000 !important;
	}

	/* Make buttons rounded and complete */
	.stButton > button {
	font-family: 'Inter', sans-serif !important;
	font-size: 0.75rem !important;
	font-weight: 500 !important;
	border-radius: 6px !important; /* Changed from 0 to 6px for rounded corners */
	height: 35px !important;
	border: 1px solid #d1d5db !important;
	margin: 0 2px !important; /* Added small margin between buttons */
	padding: 0 12px !important; /* Increased padding for better look */
	cursor: pointer !important;
	transition: all 0.2s ease !important;
	}

	.stButton > button[data-testid="baseButton-secondary"] {
	background-color: #f3f4f6 !important;
	color: #374151 !important;
	border-color: #d1d5db !important;
	box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05) !important;
	}

	.stButton > button[data-testid="baseButton-secondary"]:hover {
	background-color: #e5e7eb !important;
	color: #1f2937 !important;
	border-color: #9ca3af !important;
	box-shadow: 0 2px 4px 0 rgba(0, 0, 0, 0.1) !important;
	transform: translateY(-1px) !important;
	}

	.stButton > button[data-testid="baseButton-primary"] {
	background-color: #3b82f6 !important;
	color: #ffffff !important;
	font-weight: 600 !important;
	border-color: #3b82f6 !important;
	box-shadow: 0 2px 4px 0 rgba(59, 130, 246, 0.3) !important;
	}

	.stButton > button[data-testid="baseButton-primary"]:hover {
	background-color: #2563eb !important;
	color: #ffffff !important;
	border-color: #2563eb !important;
	transform: translateY(-1px) !important;
	}

	/* Remove the border-right rule since we're using margins now */

	/* Hide the default Streamlit button styling for tab buttons */
	.language-tab-button {
	background: none !important;
	border: none !important;
	padding: 0 !important;
	margin: 0 !important;
	height: 100% !important;
	width: 100% !important;
	color: inherit !important;
	font-weight: inherit !important;
	}

	.language-tab-button:hover {
	background: none !important;
	border: none !important;
	}

	.language-tab-button:focus {
	background: none !important;
	border: none !important;
	box-shadow: none !important;
	}

	.score-card {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	border-radius: 12px;
	padding: 20px;
	text-align: center;
	color: white;
	margin: 10px 0;
	}
	.score-value {
	font-size: 2.5rem;
	font-weight: 700;
	margin: 10px 0;
	}
	.score-label {
	font-size: 0.9rem;
	opacity: 0.9;
	text-transform: uppercase;
	letter-spacing: 1px;
	}
	.comparison-container {
	background: #f8fafc;
	border: 1px solid #e2e8f0;
	border-radius: 12px;
	padding: 24px;
	margin: 20px 0;
	}
	.word-diff {
	display: inline-block;
	padding: 4px 8px;
	margin: 2px;
	border-radius: 6px;
	font-weight: 500;
	}
	.word-added {
	background: #dcfce7;
	color: #166534;
	border: 1px solid #bbf7d0;
	}
	.word-removed {
	background: #fef2f2;
	color: #dc2626;
	border: 1px solid #fecaca;
	}
	.word-common {
	background: #f1f5f9;
	color: #475569;
	border: 1px solid #e2e8f0;
	}

	.block-container {
	padding-top: 1rem;
	padding-bottom: 0rem;
	}

	.main > div {
	padding-top: 1rem;
	}

	/* Hide Streamlit header and footer */
	header[data-testid="stHeader"] {
	height: 0px;
	display: none;
	}

	.stDeployButton {
	display: none;
	}

	footer {
	display: none;
	}

	#MainMenu {
	display: none;
	}
	</style>
	""", unsafe_allow_html=True)

	# Model configurations
	MODEL_CONFIG = {
	'Gemini': {
	'languages': ['Afrikaans', 'Northern Sotho', 'isiZulu'],
	'models': ['gemini-2.0-flash-exp', 'gemini-1.5-flash', 'gemini-1.5-pro'],
	'default_model': 'gemini-2.0-flash-exp'
	},
	'GPT': {
	'languages': ['Afrikaans', 'Northern Sotho', 'isiZulu'],
	'models': ['gpt-4', 'gpt-4-turbo', 'gpt-3.5-turbo'],
	'default_model': 'gpt-4'
	},
	'NLLB': {
	'languages': ['Northern Sotho', 'isiZulu'], # No Afrikaans model available
	'models': {
	'Northern Sotho': 'dsfsi/dcs-eng-nso-nllb-1.3B',
	'isiZulu': 'dsfsi/dcs-eng-zul-nllb-1.3B'
	}
	}
	}

	# Language code mappings
	LANGUAGE_CODES = {
	'Afrikaans': 'afr',
	'Northern Sotho': 'nso',
	'isiZulu': 'isizulu'
	}

	# Load logo
	def load_logo():
	"""Load logo with error handling"""
	try:
	if os.path.exists(f"{BASE_DIR}/logo.png"):
	return Image.open(f"{BASE_DIR}/logo.png")
	except Exception as e:
	st.warning(f"Could not load logo: {str(e)}")
	return None

	# Load and cache data
	@st.cache_data
	def load_translation_data():
	"""Load sample translation data"""
	try:
	sample_data = {
	'english': ['Hello world', 'How are you?', 'Good morning', 'Thank you', 'Welcome', 'Goodbye'],
	'afr': ['Hallo wêreld', 'Hoe gaan dit?', 'Goeie môre', 'Dankie', 'Welkom', 'Totsiens'],
	'afr_rev': ['Hallo wêreld', 'Hoe gaan dit met jou?', 'Goeie môre', 'Baie dankie', 'Welkom', 'Totsiens'],
	'nso': ['Dumela lefase', 'O kae?', 'Thobela', 'Ke a leboga', 'O amogetšwe', 'Šala gabotse'],
	'nso_rev': ['Dumela lefase', 'O phela bjang?', 'Thobela', 'Ke a leboga kudu', 'O amogetšwe', 'Šala gabotse'],
	'isizulu': ['Sawubona mhlaba', 'Unjani?', 'Sawubona', 'Ngiyabonga', 'Wamukelekile', 'Sala kahle'],
	'isizulu_rev': ['Sawubona mhlaba', 'Unjani wena?', 'Sawubona', 'Ngiyabonga kakhulu', 'Wamukelekile', 'Sala kahle'],
	'nso_mt_nllb': ['Dumela lefase', 'O kae?', 'Thobela', 'Ke a leboga', 'O amogetšwe', 'Šala gabotse'],
	'isizulu_mt_nllb': ['Sawubona mhlaba', 'Unjani?', 'Sawubona', 'Ngiyabonga', 'Wamukelekile', 'Sala kahle'],
	'afr_mt_gpt': ['Hallo wêreld', 'Hoe gaan dit?', 'Goeie môre', 'Dankie', 'Welkom', 'Totsiens'],
	'nso_mt_gpt': ['Dumela lefase', 'O kae?', 'Thobela', 'Ke a leboga', 'O amogetšwe', 'Šala gabotse'],
	'isizulu_mt_gpt': ['Sawubona mhlaba', 'Unjani?', 'Sawubona', 'Ngiyabonga', 'Wamukelekile', 'Sala kahle'],
	'afr_mt_gemini': ['Hallo wêreld', 'Hoe is dit?', 'Goeie môre', 'Baie dankie', 'Welkom', 'Totsiens'],
	'nso_mt_gemini': ['Dumela lefase', 'O phela bjang?', 'Thobela', 'Ke a leboga kudu', 'O amogetšwe', 'Šala gabotse'],
	'isizulu_mt_gemini': ['Sawubona mhlaba', 'Unjani wena?', 'Sawubona', 'Ngiyabonga kakhulu', 'Wamukelekile', 'Sala kahle']
	}
	return pd.DataFrame(sample_data)
	except Exception as e:
	st.error(f"Error loading data: {str(e)}")
	return pd.DataFrame({'english': ['Sample text'], 'error': ['Data loading failed']})

	def translate_with_gemini(text, target_language, model_name="gemini-2.0-flash-exp", client=None):
	"""Translate text using Gemini API"""
	try:
	if not GENAI_AVAILABLE:
	return "❌ Gemini library not installed"

	if not client:
	return "❌ Gemini API not configured. Please check your GEMINI_API_KEY."

	lang_map = {
	'Afrikaans': 'Afrikaans',
	'Northern Sotho': 'Northern Sotho (Sepedi)',
	'isiZulu': 'isiZulu'
	}

	prompt = f"Translate the following English text to {lang_map.get(target_language, target_language)}: '{text}'. Provide only the translation without any explanations."

	response = client.models.generate_content(
	model=model_name, contents=prompt
	)
	return response.text.strip()
	except Exception as e:
	return f"❌ Error: {str(e)}"

	def translate_with_openai(text, target_language, model_name="gpt-4o", client=None):
	"""Translate text using OpenAI API with Chat Completions"""
	try:
	if not OPENAI_AVAILABLE:
	return "❌ OpenAI library not installed"

	if not client:
	return "❌ OpenAI API not configured. Please check your OPENAI_API_KEY."

	lang_map = {
	'Afrikaans': 'Afrikaans',
	'Northern Sotho': 'Northern Sotho (Sepedi)',
	'isiZulu': 'isiZulu'
	}

	# Use Chat Completions API (supported indefinitely)
	response = client.chat.completions.create(
	model=model_name,
	messages=[
	{"role": "system", "content": "You are a professional translator. Provide only the translation without any explanations."},
	{"role": "user", "content": f"Translate the following text to {lang_map.get(target_language, target_language)}: {text}"}
	],
	max_tokens=1000,
	temperature=0.3 # Lower temperature for more consistent translations
	)

	return response.choices[0].message.content.strip()

	except Exception as e:
	return f"❌ Error: {str(e)}"

	@st.cache_resource
	def initialize_apis():
	"""Initialize API clients with proper error handling, supporting both local and HF Spaces."""
	genai_client = None
	openai_client = None

	def get_secret(name):
	"""Fetch secret from env first (Docker Spaces), then Streamlit secrets."""
	return (
	os.environ.get(name)
	or (st.secrets.get(name) if hasattr(st, "secrets") and name in st.secrets else None)
	)

	try:
	# Gemini API
	if GENAI_AVAILABLE:
	try:
	api_key = get_secret("GEMINI_API_KEY")
	if api_key:
	genai_client = genai.Client(api_key=api_key)
	else:
	st.warning("⚠️ Gemini API key not found")
	except Exception as e:
	st.error(f"❌ Gemini API error: {str(e)}")

	# OpenAI API
	if OPENAI_AVAILABLE:
	try:
	api_key = get_secret("OPENAI_API_KEY")
	if api_key:
	try:
	# Try new OpenAI API client
	openai_client = OpenAI(api_key=api_key)
	except TypeError:
	import openai
	openai.api_key = api_key
	openai_client = openai
	else:
	st.warning("⚠️ OpenAI API key not found")
	except Exception as e:
	st.error(f"❌ OpenAI API error: {str(e)}")

	except Exception as e:
	st.error(f"❌ API initialization error: {str(e)}")

	return genai_client, openai_client

	def translate_with_nllb(text, target_language):
	"""Translate text using unified NLLB API"""
	try:
	import requests

	# Single ngrok URL for unified API
	API_URL = "https://4c2faecc052a.ngrok-free.app"

	# Map Streamlit language names to API format
	lang_mapping = {
	'Northern Sotho': 'nso',
	'isiZulu': 'zul'
	}

	api_lang = lang_mapping.get(target_language, target_language.lower())

	response = requests.post(
	f"{API_URL}/translate_simple",
	params={
	"text": text,
	"target_language": api_lang
	},
	timeout=30
	)

	if response.status_code == 200:
	result = response.json()
	return result.get(api_lang, '❌ Translation not found')
	else:
	return f"❌ API Error: {response.status_code}"

	except Exception as e:
	return f"❌ Error: {str(e)}"

	def create_language_tabs(available_languages, current_language, key_suffix=""):
	"""Create language tabs with proper styling"""
	tabs_html = '<div class="language-tabs-container"><div class="language-tabs-header">'

	for lang in available_languages:
	active_class = "active" if lang == current_language else ""
	tabs_html += f'''
	<div class="language-tab {active_class}" onclick="selectLanguage('{lang}', '{key_suffix}')">
	{lang}
	</div>
	'''

	tabs_html += '</div></div>'

	# Add JavaScript for tab functionality
	script = f'''
	<script>
	function selectLanguage(lang, suffix) {{
	// This would normally update the session state, but since we can't do that from JavaScript,
	// we'll use the button approach below instead
	}}
	</script>
	'''

	return tabs_html + script

	def main():
	"""Main application function"""
	# Load and display logo and title side by side
	logo = load_logo()

	# Initialize session state FIRST to avoid refreshes
	if 'target_language' not in st.session_state:
	st.session_state.target_language = 'Afrikaans'
	if 'translation_result' not in st.session_state:
	st.session_state.translation_result = ""
	if 'current_page' not in st.session_state:
	st.session_state.current_page = 1
	if 'initialized' not in st.session_state:
	st.session_state.initialized = True

	col1, col2, col3 = st.columns([1, 2, 1])
	with col2:
	if logo:
	# Convert logo to base64 for HTML embedding
	import base64
	from io import BytesIO
	buffered = BytesIO()
	logo.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()

	st.markdown(f'''
	<div style="display: flex; align-items: center; justify-content: center; gap: 0px; margin-bottom: 1rem;">
	<img src="data:image/png;base64,{img_str}" width="180">
	<h1 class="main-header" style="margin: 20px;">UP Translate</h1>
	</div>
	''', unsafe_allow_html=True)
	else:
	st.markdown('<h1 class="main-header" style="margin-bottom: 1rem;">UP Translate</h1>', unsafe_allow_html=True)

	# Initialize APIs
	genai_client, openai_client = initialize_apis()

	# Initialize session state
	if 'target_language' not in st.session_state:
	st.session_state.target_language = 'Afrikaans'
	if 'translation_result' not in st.session_state:
	st.session_state.translation_result = ""

	# Create tabs
	tab1, tab2 = st.tabs(["🤖 Live Translations", "📊 Existing Translations"])

	with tab1:
	# st.markdown('<h2 class="tab-header">Live Translation</h2>', unsafe_allow_html=True)

	# Create simplified model options
	model_options = []
	model_mapping = {}

	# Add Gemini models
	for model in MODEL_CONFIG['Gemini']['models']:
	display_name = f"Gemini - {model}"
	model_options.append(display_name)
	model_mapping[display_name] = ('Gemini', None, model)

	# Add GPT models
	for model in MODEL_CONFIG['GPT']['models']:
	display_name = f"GPT - {model}"
	model_options.append(display_name)
	model_mapping[display_name] = ('GPT', None, model)

	# Add single NLLB option
	model_options.append("NLLB - Specialized Models")
	model_mapping["NLLB - Specialized Models"] = ('NLLB', None, None)

	# Model selection with inline label
	label_col, dropdown_col = st.columns([2, 10])
	with label_col:
	st.markdown('<div style="margin-top: 8px; font-weight: 500;">Select Model:</div>', unsafe_allow_html=True)
	with dropdown_col:
	selected_model_option = st.selectbox(
	"Select Model:",
	model_options,
	index=0,
	key="model_selection_dropdown",
	label_visibility="collapsed"
	)

	selected_provider, _, selected_model = model_mapping[selected_model_option]

	# Translation interface
	col_left, col_center, col_right = st.columns([5, 1, 5])

	# Left side - English Input
	with col_left:
	st.markdown('<div class="translate-container">', unsafe_allow_html=True)
	st.markdown('<div class="translate-header">English</div>', unsafe_allow_html=True)
	st.markdown('</div>', unsafe_allow_html=True)

	input_text = st.text_area(
	"Input",
	placeholder="Input text here",
	height=350,
	key="input_text_live",
	label_visibility="collapsed"
	)

	# Center - Translate Button
	with col_center:
	# Add spacing to align button with text areas
	st.markdown('<div style="height: 150px;"></div>', unsafe_allow_html=True)
	translate_clicked = st.button(
	"Translate",
	key="translate_btn_live",
	help="Translate text",
	type="primary",
	use_container_width=True
	)

	# Right side - Translation Output
	with col_right:
	# Determine available languages based on selected provider
	if selected_provider == 'NLLB':
	available_languages = MODEL_CONFIG['NLLB']['languages']
	else:
	available_languages = ['Afrikaans', 'Northern Sotho', 'isiZulu']

	# Set default language to first available if current selection not available
	if st.session_state.target_language not in available_languages:
	st.session_state.target_language = available_languages[0]

	# Create container with custom styling
	st.markdown('<div class="translate-container">', unsafe_allow_html=True)

	# Language selection buttons
	lang_cols = st.columns(len(available_languages))
	for i, lang in enumerate(available_languages):
	with lang_cols[i]:
	button_type = "primary" if lang == st.session_state.target_language else "secondary"
	if st.button(
	lang,
	key=f"lang_btn_{lang}_live",
	type=button_type,
	use_container_width=True
	):
	if st.session_state.target_language != lang: # Only update if different
	st.session_state.target_language = lang
	st.session_state.translation_result = "" # Clear previous result
	st.rerun()

	# Translation logic
	if translate_clicked and input_text:
	with st.spinner("Translating..."):
	target_lang = st.session_state.target_language

	if selected_provider == 'Gemini':
	result = translate_with_gemini(input_text, target_lang, selected_model, genai_client)

	elif selected_provider == 'GPT':
	result = translate_with_openai(input_text, target_lang, selected_model, openai_client)

	elif selected_provider == 'NLLB':
	result = translate_with_nllb(input_text, target_lang)

	st.session_state.translation_result = result

	# Translation output area with proper labeling
	st.text_area(
	f"Translation ({st.session_state.target_language})", # Dynamic label
	value=st.session_state.translation_result,
	placeholder="Translation will appear here",
	height=350,
	key="translation_output_live_fixed", # Changed key to avoid conflicts
	disabled=True,
	label_visibility="collapsed"
	)

	# Support information
	st.markdown("""
	<div class="support-info">
	<strong>Available Models:</strong><br>
	🔮 <strong>Gemini:</strong> All languages (gemini-2.0-flash-exp, gemini-1.5-flash, gemini-1.5-pro)<br>
	🧠 <strong>GPT:</strong> All languages (gpt-4, gpt-4-turbo, gpt-3.5-turbo)<br>
	🤗 <strong>NLLB:</strong> Northern Sotho, isiZulu only (specialized models)
	</div>
	""", unsafe_allow_html=True)

	with tab2:
	# Load data from base directory automatically
	@st.cache_data
	def load_analysis_data():
	"""Load all analysis data from base directory"""
	df_translations = None
	df_bleu = None
	df_chrf = None
	df_comet = None

	try:
	# Try to load translations data
	if os.path.exists(f"{DATA_DIR}/translations.tsv"):
	df_translations = pd.read_csv(f"{DATA_DIR}/translations.tsv", sep="\t")

	# Convert new CSV format to expected format for analysis
	# New format: id,english,afr_human,afr_revised,nso_human,nso_revised,zul_human,zul_revised,afr_gemini,afr_gpt,nso_gemini,nso_gpt,nso_nllb,zul_gemini,zul_gpt,zul_nllb
	# Expected format: english, afr_human, afr_revised, nso_human, nso_revised, isizulu_human, isizulu_revised, etc.

	# Rename zul columns to isizulu for backward compatibility with analysis code
	column_mapping = {
	'zul_human': 'isizulu_human',
	'zul_revised': 'isizulu_revised',
	'zul_gemini': 'isizulu_mt_gemini',
	'zul_gpt': 'isizulu_mt_gpt',
	'zul_nllb': 'isizulu_mt_nllb',
	'afr_gemini': 'afr_mt_gemini',
	'afr_gpt': 'afr_mt_gpt',
	'nso_gemini': 'nso_mt_gemini',
	'nso_gpt': 'nso_mt_gpt',
	'nso_nllb': 'nso_mt_nllb'
	}

	df_translations = df_translations.rename(columns=column_mapping)

	elif os.path.exists(f"{DATA_DIR}/translation_data.csv"):
	df_translations = pd.read_csv(f"{DATA_DIR}/translation_data.csv")
	else:
	print("No translation data found, using sample data")
	df_translations = load_translation_data() # Fallback to sample data

	# Try to load BLEU scores
	if os.path.exists(f"{DATA_DIR}/bleu_scores.csv"):
	df_bleu = pd.read_csv(f"{DATA_DIR}/bleu_scores.csv")

	# Convert zul references to isizulu for compatibility
	df_bleu['comparison_pair'] = df_bleu['comparison_pair'].str.replace('zul_', 'isizulu_')
	df_bleu['language'] = df_bleu['language'].replace('isiZulu', 'isiZulu') # Already correct

	else:
	# Sample BLEU data (using isizulu for compatibility with existing analysis code)
	df_bleu = pd.DataFrame({
	'comparison_pair': ['afr_human_vs_afr_gemini', 'afr_human_vs_afr_gpt', 'afr_human_vs_afr_revised', 'nso_human_vs_nso_gemini', 'nso_human_vs_nso_gpt', 'nso_human_vs_nso_revised', 'nso_human_vs_nso_nllb', 'isizulu_human_vs_isizulu_gemini', 'isizulu_human_vs_isizulu_gpt', 'isizulu_human_vs_isizulu_revised', 'isizulu_human_vs_isizulu_nllb'],
	'bleu_score': [0.78, 0.72, 0.89, 0.65, 0.68, 0.85, 0.71, 0.71, 0.69, 0.87, 0.73],
	'language': ['Afrikaans', 'Afrikaans', 'Afrikaans', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'isiZulu', 'isiZulu', 'isiZulu', 'isiZulu']
	})

	# Try to load COMET scores
	if os.path.exists(f"{DATA_DIR}/comet_scores.csv"):
	df_comet = pd.read_csv(f"{DATA_DIR}/comet_scores.csv")
	else:
	# Sample COMET data
	df_comet = pd.DataFrame({
	'comparison_pair': ['afr_human_vs_afr_gemini', 'afr_human_vs_afr_gpt', 'afr_human_vs_afr_revised', 'nso_human_vs_nso_gemini', 'nso_human_vs_nso_gpt', 'nso_human_vs_nso_revised', 'isizulu_human_vs_isizulu_gemini', 'isizulu_human_vs_isizulu_gpt', 'isizulu_human_vs_isizulu_revised'],
	'comet_score': [0.82, 0.79, 0.92, 0.71, 0.74, 0.88, 0.76, 0.73, 0.90],
	'language': ['Afrikaans', 'Afrikaans', 'Afrikaans', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'isiZulu', 'isiZulu', 'isiZulu']
	})

	# Try to load CHRF scores
	if os.path.exists(f"{DATA_DIR}/chrf_scores.csv"):
	df_chrf = pd.read_csv(f"{DATA_DIR}/chrf_scores.csv")
	else:
	# Sample CHRF data
	df_chrf = pd.DataFrame({
	'comparison_pair': ['afr_human_vs_afr_gemini', 'afr_human_vs_afr_gpt', 'afr_human_vs_afr_revised', 'nso_human_vs_nso_gemini', 'nso_human_vs_nso_gpt', 'nso_human_vs_nso_revised', 'isizulu_human_vs_isizulu_gemini', 'isizulu_human_vs_isizulu_gpt', 'isizulu_human_vs_isizulu_revised'],
	'chrf_score': [0.75, 0.70, 0.88, 0.60, 0.65, 0.80, 0.68, 0.66, 0.85],
	'language': ['Afrikaans', 'Afrikaans', 'Afrikaans', 'Northern Sotho', 'Northern Sotho', 'Northern Sotho', 'isiZulu', 'isiZulu', 'isiZulu']
	})

	return df_translations, df_bleu, df_comet, df_chrf

	except Exception as e:
	st.error(f"Error loading data: {str(e)}")
	return None, None, None, None

	# Load all data
	df_translations, df_bleu, df_comet, df_chrf = load_analysis_data()

	if df_translations is not None:
	# Language selection in columns
	lang_col1, lang_col2 = st.columns([2, 10])
	with lang_col1:
	st.markdown('<div style="margin-top: 8px; font-weight: 500;">Select Language:</div>', unsafe_allow_html=True)
	with lang_col2:
	languages = ['Afrikaans', 'Northern Sotho', 'isiZulu']
	selected_lang = st.selectbox(
	"Select Language for Analysis:",
	languages,
	key="global_lang_select",
	label_visibility="collapsed"
	)

	# Get language code
	lang_codes = {'Afrikaans': 'afr', 'Northern Sotho': 'nso', 'isiZulu': 'isizulu'}
	code = lang_codes[selected_lang]

	# Create analysis tabs
	analysis_tab1, analysis_tab2, analysis_tab3, analysis_tab4 = st.tabs(["Sample Translations", "📊 Quality Metrics", "🔄 Revision Analysis", "🔍 Word Comparison"])

	with analysis_tab1:
	# Translation Samples Tab
	st.markdown("""
	<div style="margin: 20px 0;">
	<h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
	📝 Translation Samples for {selected_lang}
	</h4>
	</div>
	""".format(selected_lang=selected_lang), unsafe_allow_html=True)

	# Use the global language selection
	samples_code = code

	# Show sample translations for the selected language
	display_cols = ['english'] + [col for col in df_translations.columns if col.startswith(samples_code)]

	if display_cols and len(display_cols) > 1: # Need at least english + 1 translation column
	# Control panel
	control_col1, control_col2, control_col3, control_col4 = st.columns([1, 7, 1, 2])

	with control_col1:
	st.markdown('<div style="margin-top: 8px; font-weight: 500;">Samples per page:</div>', unsafe_allow_html=True)
	with control_col2:
	page_size = st.selectbox(
	"Samples per page:",
	[10, 25, 50, 100],
	index=0,
	key="page_size_select",
	label_visibility="collapsed"
	)

	# Initialize session state for pagination
	if 'current_page' not in st.session_state:
	st.session_state.current_page = 1

	# Filter data and calculate pagination
	available_data = df_translations[display_cols].dropna(subset=[col for col in display_cols if col != 'english'], how='all')
	total_samples = len(available_data)
	total_pages = max(1, (total_samples + page_size - 1) // page_size) # Ceiling division

	# Ensure current page is valid
	if st.session_state.current_page > total_pages:
	st.session_state.current_page = 1

	# Calculate start and end indices
	start_idx = (st.session_state.current_page - 1) * page_size
	end_idx = min(start_idx + page_size, total_samples)

	# Get current page data
	current_page_data = available_data.iloc[start_idx:end_idx]

	with control_col3:
	st.markdown('<div style="margin-top: 8px; font-weight: 500;">Page:</div>', unsafe_allow_html=True)
	with control_col4:
	# Page navigation
	nav_col1, nav_col2, nav_col3, nav_col4, nav_col5 = st.columns([1, 1, 2, 1, 1])

	with nav_col1:
	if st.button("⏮️", key="first_page", help="First page", disabled=(st.session_state.current_page == 1)):
	st.session_state.current_page = 1
	st.rerun()

	with nav_col2:
	if st.button("◀️", key="prev_page", help="Previous page", disabled=(st.session_state.current_page == 1)):
	st.session_state.current_page -= 1
	st.rerun()

	with nav_col3:
	st.markdown(f'<div style="text-align: center; margin-top: 8px; font-weight: 500;">{st.session_state.current_page} / {total_pages}</div>', unsafe_allow_html=True)

	with nav_col4:
	if st.button("▶️", key="next_page", help="Next page", disabled=(st.session_state.current_page == total_pages)):
	st.session_state.current_page += 1
	st.rerun()

	with nav_col5:
	if st.button("⏭️", key="last_page", help="Last page", disabled=(st.session_state.current_page == total_pages)):
	st.session_state.current_page = total_pages
	st.rerun()

	# Statistics cards
	stats_col1, stats_col2, stats_col3, stats_col4 = st.columns(4)

	with stats_col1:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Showing</div>
	<div class="metric-value">{len(current_page_data)}</div>
	</div>
	""", unsafe_allow_html=True)

	with stats_col2:
	available_systems = len([col for col in display_cols if col != 'english'])
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Translation Systems</div>
	<div class="metric-value">{available_systems}</div>
	</div>
	""", unsafe_allow_html=True)

	with stats_col3:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Total Available</div>
	<div class="metric-value">{total_samples}</div>
	</div>
	""", unsafe_allow_html=True)

	with stats_col4:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Current Page</div>
	<div class="metric-value">{st.session_state.current_page}/{total_pages}</div>
	</div>
	""", unsafe_allow_html=True)

	# Display the samples table
	st.markdown("### Translation Examples")

	if len(current_page_data) > 0:
	# Create a styled dataframe with better column names
	display_df = current_page_data.copy()

	# Rename columns for better display
	column_rename = {
	'english': 'English (Source)',
	}

	# Add human-readable names for translation columns
	for col in display_df.columns:
	if col.startswith(samples_code):
	if '_human' in col:
	column_rename[col] = f'{selected_lang} (Human)'
	elif '_revised' in col:
	column_rename[col] = f'{selected_lang} (Revised)'
	elif '_mt_gemini' in col or '_gemini' in col:
	column_rename[col] = f'{selected_lang} (Gemini)'
	elif '_mt_gpt' in col or '_gpt' in col:
	column_rename[col] = f'{selected_lang} (GPT)'
	elif '_mt_nllb' in col or '_nllb' in col:
	column_rename[col] = f'{selected_lang} (NLLB)'
	else:
	# Generic fallback
	clean_name = col.replace(f'{samples_code}_', '').replace('_', ' ').title()
	column_rename[col] = f'{selected_lang} ({clean_name})'

	display_df = display_df.rename(columns=column_rename)

	# Add row numbers based on actual position in full dataset
	display_df.index = range(start_idx + 1, end_idx + 1)
	display_df.index.name = 'Sample #'

	st.dataframe(
	display_df,
	use_container_width=True,
	height=min(600, 50 + len(display_df) * 35), # Dynamic height based on content
	column_config={
	col: st.column_config.TextColumn(col, width="medium")
	for col in display_df.columns
	}
	)

	# Page info summary
	st.markdown(f"""
	<div style="margin-top: 16px; padding: 12px; background: #f8fafc; border-radius: 6px; text-align: center; color: #6b7280; font-size: 0.9rem;">
	📄 Showing samples {start_idx + 1} to {end_idx} of {total_samples} total samples • Page {st.session_state.current_page} of {total_pages}
	</div>
	""", unsafe_allow_html=True)

	# Quick jump to page
	if total_pages > 5: # Only show quick jump for datasets with many pages
	st.markdown("### Quick Navigation")
	jump_col1, jump_col2, jump_col3 = st.columns([1, 2, 1])

	with jump_col2:
	target_page = st.number_input(
	f"Jump to page (1-{total_pages}):",
	min_value=1,
	max_value=total_pages,
	value=st.session_state.current_page,
	key="page_jump"
	)

	if st.button("🔗 Go to Page", use_container_width=True):
	if target_page != st.session_state.current_page:
	st.session_state.current_page = target_page
	st.rerun()

	else:
	st.warning("⚠️ No translation samples found for the current page.")

	else:
	st.warning(f"⚠️ No translation data available for {selected_lang}. Expected columns starting with '{samples_code}_'")

	# Debug information
	available_columns = [col for col in df_translations.columns if col.startswith(samples_code)]
	if available_columns:
	st.info(f"🔍 Found columns: {', '.join(available_columns)}")
	else:
	all_lang_columns = [col for col in df_translations.columns if any(col.startswith(prefix) for prefix in ['afr_', 'nso_', 'isizulu_'])]
	if all_lang_columns:
	st.info(f"💡 Available language columns: {', '.join(all_lang_columns[:10])}{'...' if len(all_lang_columns) > 10 else ''}")

	with analysis_tab2:
	st.markdown("""
	<div style="margin: 20px 0;">
	<h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
	📈 Quality Metrics for {selected_lang}
	</h4>
	</div>
	""".format(selected_lang=selected_lang), unsafe_allow_html=True)

	# Get language code
	lang_codes = {'Afrikaans': 'afr', 'Northern Sotho': 'nso', 'isiZulu': 'isizulu'}
	code = lang_codes[selected_lang]

	# Score visualizations
	if df_bleu is not None and df_chrf is not None and df_comet is not None:
	# Filter scores for selected language
	lang_bleu = df_bleu[df_bleu['language'] == selected_lang] if 'language' in df_bleu.columns else df_bleu
	lang_chrf = df_chrf[df_chrf['language'] == selected_lang] if 'language' in df_chrf.columns else df_chrf
	lang_comet = df_comet[df_comet['language'] == selected_lang] if 'language' in df_comet.columns else df_comet

	# Check if we have domain-level data
	has_domain_data = ('domain' in lang_bleu.columns and 'domain' in lang_chrf.columns and
	'domain' in lang_comet.columns and
	len(lang_bleu[lang_bleu['domain'] != 'Overall']) > 0)

	if has_domain_data:
	# Add domain filter
	available_domains = sorted(lang_bleu['domain'].unique())
	domain_options = ['Overall'] + [d for d in available_domains if d != 'Overall']

	selected_domain = st.selectbox(
	"📍 Select Domain for Analysis:",
	domain_options,
	key=f"domain_selector_{selected_lang}"
	)

	# Filter data based on selected domain
	if selected_domain == 'Overall':
	display_bleu = lang_bleu[lang_bleu['domain'] == 'Overall']
	display_chrf = lang_chrf[lang_chrf['domain'] == 'Overall']
	display_comet = lang_comet[lang_comet['domain'] == 'Overall']
	chart_title_suffix = " - Overall"
	else:
	display_bleu = lang_bleu[lang_bleu['domain'] == selected_domain]
	display_chrf = lang_chrf[lang_chrf['domain'] == selected_domain]
	display_comet = lang_comet[lang_comet['domain'] == selected_domain]
	chart_title_suffix = f" - {selected_domain}"
	else:
	# Use all data if no domain column
	display_bleu = lang_bleu
	display_chrf = lang_chrf
	display_comet = lang_comet
	chart_title_suffix = ""

	# Create score charts
	if len(display_bleu) > 0 and len(display_chrf) > 0 and len(display_comet) > 0:
	chart_col1, chart_col2, chart_col3 = st.columns(3)

	with chart_col1:
	# chrF Score Chart
	fig_chrf = px.bar(
	display_chrf,
	x='comparison_pair',
	y='chrf_score',
	title=f'chrF Scores - {selected_lang}{chart_title_suffix}',
	color='chrf_score',
	color_continuous_scale='oranges'
	)
	fig_chrf.update_layout(
	xaxis_title="Translation Pairs",
	yaxis_title="chrF Score",
	xaxis_tickangle=-45,
	height=400,
	font=dict(family="Inter", size=12)
	)
	st.plotly_chart(fig_chrf, use_container_width=True)

	with chart_col2:
	# BLEU Score Chart
	fig_bleu = px.bar(
	display_bleu,
	x='comparison_pair',
	y='bleu_score',
	title=f'BLEU Scores - {selected_lang}{chart_title_suffix}',
	color='bleu_score',
	color_continuous_scale='blues'
	)
	fig_bleu.update_layout(
	xaxis_title="Translation Pairs",
	yaxis_title="BLEU Score",
	xaxis_tickangle=-45,
	height=400,
	font=dict(family="Inter", size=12)
	)
	st.plotly_chart(fig_bleu, use_container_width=True)

	with chart_col3:
	# COMET Score Chart
	fig_comet = px.bar(
	display_comet,
	x='comparison_pair',
	y='comet_score',
	title=f'COMET Scores - {selected_lang}{chart_title_suffix}',
	color='comet_score',
	color_continuous_scale='greens'
	)
	fig_comet.update_layout(
	xaxis_title="Translation Pairs",
	yaxis_title="COMET Score",
	xaxis_tickangle=-45,
	height=400,
	font=dict(family="Inter", size=12)
	)
	st.plotly_chart(fig_comet, use_container_width=True)

	# PRIMARY SPIDER CHART - Domain Performance when available, Model Performance otherwise
	if has_domain_data:
	st.markdown(f"""
	<h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 20px 0 16px 0;">
	🕸️ Domain Performance Spider Charts - {selected_lang}
	</h4>
	""", unsafe_allow_html=True)

	# Filter out "Overall" so only domain-level values are shown
	domain_bleu = lang_bleu[lang_bleu['domain'] != 'Overall']
	domain_chrf = lang_chrf[lang_chrf['domain'] != 'Overall']
	domain_comet = lang_comet[lang_comet['domain'] != 'Overall']

	# Pivot all metrics
	pivot_bleu = domain_bleu.pivot(
	index='comparison_pair',
	columns='domain',
	values='bleu_score'
	).fillna(0)

	pivot_chrf = domain_chrf.pivot(
	index='comparison_pair',
	columns='domain',
	values='chrf_score'
	).fillna(0)

	pivot_comet = domain_comet.pivot(
	index='comparison_pair',
	columns='domain',
	values='comet_score'
	).fillna(0)

	# Ensure domains are in the same order for all metrics
	domains = sorted(set(pivot_bleu.columns) \| set(pivot_chrf.columns) \| set(pivot_comet.columns))
	pivot_bleu = pivot_bleu.reindex(columns=domains, fill_value=0)
	pivot_chrf = pivot_chrf.reindex(columns=domains, fill_value=0)
	pivot_comet = pivot_comet.reindex(columns=domains, fill_value=0)

	# Define distinct colors with reduced opacity
	distinct_colors = [
	'rgba(255, 99, 132, 0.4)', # Red
	'rgba(54, 162, 235, 0.4)', # Blue
	'rgba(99, 255, 132, 0.4)', # Green
	'rgba(75, 192, 192, 0.4)', # Teal
	'rgba(255, 205, 86, 0.4)', # Yellow
	'rgba(153, 102, 255, 0.4)', # Purple
	'rgba(255, 159, 64, 0.4)', # Orange
	'rgba(199, 199, 199, 0.4)', # Grey
	'rgba(83, 102, 255, 0.4)', # Indigo
	'rgba(255, 99, 255, 0.4)', # Magenta
	]

	# Border colors (same colors but full opacity for borders)
	border_colors = [
	'rgba(255, 99, 132, 1.0)', # Red
	'rgba(54, 162, 235, 1.0)', # Blue
	'rgba(99, 255, 132, 1.0)', # Green
	'rgba(75, 192, 192, 1.0)', # Teal
	'rgba(255, 205, 86, 1.0)', # Yellow
	'rgba(153, 102, 255, 1.0)', # Purple
	'rgba(255, 159, 64, 1.0)', # Orange
	'rgba(199, 199, 199, 1.0)', # Grey
	'rgba(83, 102, 255, 1.0)', # Indigo
	'rgba(255, 99, 255, 1.0)', # Magenta
	]

	# Layout for three side-by-side spider charts
	spider_col1, spider_col2, spider_col3 = st.columns(3)

	# ---------------- CHRF SPIDER ----------------
	with spider_col1:
	fig_chrf_spider = go.Figure()
	for i, (model_name, row) in enumerate(pivot_chrf.iterrows()):
	color_idx = i % len(distinct_colors)
	fig_chrf_spider.add_trace(go.Scatterpolar(
	r=row.tolist() + [row.tolist()[0]], # close loop
	theta=domains + [domains[0]],
	fill='toself',
	name=model_name.split('_')[-1].upper(),
	fillcolor=distinct_colors[color_idx],
	line=dict(color=border_colors[color_idx], width=2),
	opacity=0.7,
	showlegend=False # Hide legend on first chart
	))
	fig_chrf_spider.update_layout(
	polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
	showlegend=False,
	title=dict(text=f"Domain Performance (chrF) - {selected_lang}"),
	height=450
	)
	st.plotly_chart(fig_chrf_spider, use_container_width=True)

	# ---------------- BLEU SPIDER ----------------
	with spider_col2:
	fig_bleu_spider = go.Figure()
	for i, (model_name, row) in enumerate(pivot_bleu.iterrows()):
	color_idx = i % len(distinct_colors)
	fig_bleu_spider.add_trace(go.Scatterpolar(
	r=row.tolist() + [row.tolist()[0]], # close loop
	theta=domains + [domains[0]],
	fill='toself',
	name=model_name.split('_')[-1].upper(),
	fillcolor=distinct_colors[color_idx],
	line=dict(color=border_colors[color_idx], width=2),
	opacity=0.7,
	showlegend=True # Show legend on middle chart
	))
	fig_bleu_spider.update_layout(
	polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
	showlegend=True,
	title=dict(text=f"Domain Performance (BLEU) - {selected_lang}"),
	height=450,
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=-0.3,
	xanchor="center",
	x=0.5
	)
	)
	st.plotly_chart(fig_bleu_spider, use_container_width=True)

	# ---------------- COMET SPIDER ----------------
	with spider_col3:
	fig_comet_spider = go.Figure()
	for i, (model_name, row) in enumerate(pivot_comet.iterrows()):
	color_idx = i % len(distinct_colors)
	fig_comet_spider.add_trace(go.Scatterpolar(
	r=row.tolist() + [row.tolist()[0]], # close loop
	theta=domains + [domains[0]],
	fill='toself',
	name=model_name.split('_')[-1].upper(),
	fillcolor=distinct_colors[color_idx],
	line=dict(color=border_colors[color_idx], width=2),
	opacity=0.7,
	showlegend=False # Hide legend on last chart
	))
	fig_comet_spider.update_layout(
	polar=dict(radialaxis=dict(visible=True, range=[0, 1])),
	showlegend=False,
	title=dict(text=f"Domain Performance (COMET) - {selected_lang}"),
	height=450
	)
	st.plotly_chart(fig_comet_spider, use_container_width=True)

	# # Overall Performance Summary
	# st.markdown("""
	# <h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
	# 📋 Overall Performance Summary
	# </h4>
	# """, unsafe_allow_html=True)

	# # Create overall summary table
	# if len(display_bleu) > 0 and len(display_chrf) > 0 and len(display_comet) > 0:
	# # Merge all three metrics
	# merged_scores = pd.merge(display_bleu, display_chrf, on='comparison_pair', suffixes=('_bleu', '_chrf'))
	# merged_scores = pd.merge(merged_scores, display_comet, on='comparison_pair')
	# merged_scores['model'] = merged_scores['comparison_pair'].apply(lambda x: x.split('_')[-1].upper())

	# summary_data = []
	# for _, row in merged_scores.iterrows():
	# summary_data.append({
	# 'Model': row['model'],
	# 'BLEU Score': f"{row['bleu_score']:.3f}",
	# 'chrF Score': f"{row['chrf_score']:.3f}",
	# 'COMET Score': f"{row['comet_score']:.3f}",
	# 'Average': f"{(row['bleu_score'] + row['chrf_score'] + row['comet_score']) / 3:.3f}"
	# })

	# summary_df = pd.DataFrame(summary_data)

	# # Only sort if dataframe has data and 'Average' column exists
	# if len(summary_df) > 0 and 'Average' in summary_df.columns:
	# summary_df = summary_df.sort_values('Average', ascending=False)

	# # Style the dataframe
	# st.dataframe(
	# summary_df,
	# use_container_width=True,
	# hide_index=True,
	# column_config={
	# "Model": st.column_config.TextColumn("Model", width="medium"),
	# "BLEU Score": st.column_config.NumberColumn("BLEU Score", format="%.3f"),
	# "chrF Score": st.column_config.NumberColumn("chrF Score", format="%.3f"),
	# "COMET Score": st.column_config.NumberColumn("COMET Score", format="%.3f"),
	# "Average": st.column_config.NumberColumn("Average", format="%.3f")
	# }
	# )

	with analysis_tab3:
	# Revision Analysis Tab
	st.markdown("""
	<div style="margin: 20px 0;">
	<h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
	✏️ Human Translation Revision Analysis for {selected_lang}
	</h4>
	</div>
	""".format(selected_lang=selected_lang), unsafe_allow_html=True)

	# Use the global language selection
	rev_code = code

	# Check for revision columns
	human_col = f"{rev_code}_human"
	revised_col = f"{rev_code}_revised"

	if human_col in df_translations.columns and revised_col in df_translations.columns:
	# Get all rows with human translations for this language
	df_lang_data = df_translations[[human_col, revised_col]].copy()

	# Remove rows where human translation is missing (can't analyze revisions without original)
	df_lang_data = df_lang_data[df_lang_data[human_col].notna()].copy()

	total_human_translations = len(df_lang_data)

	if total_human_translations == 0:
	st.warning(f"⚠️ No human translations found for {selected_lang}")
	else:
	# Calculate revision statistics
	# For missing revised translations, we assume no revision was made (same as original)
	df_lang_data[revised_col] = df_lang_data[revised_col].fillna(df_lang_data[human_col])

	# Count actual changes
	revisions_made = sum(df_lang_data[human_col] != df_lang_data[revised_col])
	revision_rate = (revisions_made / total_human_translations) * 100

	# Count how many had revision data available
	revisions_available = sum(df_translations[revised_col].notna())

	# Calculate revision types
	def categorize_revision(original, revised):
	if pd.isna(original) or pd.isna(revised):
	return "Missing Data"
	if str(original).strip() == str(revised).strip():
	return "No Change"

	orig_words = str(original).lower().split()
	rev_words = str(revised).lower().split()

	if len(rev_words) > len(orig_words):
	return "Expansion"
	elif len(rev_words) < len(orig_words):
	return "Reduction"
	else:
	return "Modification"

	df_lang_data['revision_type'] = df_lang_data.apply(
	lambda row: categorize_revision(row[human_col], row[revised_col]), axis=1
	)

	# Revision statistics cards
	rev_col1, rev_col2, rev_col3, rev_col4 = st.columns(4)

	with rev_col1:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Human Translations</div>
	<div class="metric-value">{total_human_translations}</div>
	</div>
	""", unsafe_allow_html=True)

	with rev_col2:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Revisions Available</div>
	<div class="metric-value">{revisions_available}</div>
	</div>
	""", unsafe_allow_html=True)

	with rev_col3:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Changes Made</div>
	<div class="metric-value">{revisions_made}</div>
	</div>
	""", unsafe_allow_html=True)

	with rev_col4:
	st.markdown(f"""
	<div class="metric-card">
	<div class="metric-title">Revision Rate</div>
	<div class="metric-value">{revision_rate:.1f}%</div>
	</div>
	""", unsafe_allow_html=True)

	# Revision type analysis
	st.markdown("""
	<h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
	📈 Revision Pattern Analysis
	</h4>
	""", unsafe_allow_html=True)

	revision_counts = df_lang_data['revision_type'].value_counts()

	if len(revision_counts) > 0:
	# Create revision type charts
	rev_chart_col1, rev_chart_col2 = st.columns(2)

	with rev_chart_col1:
	# Pie chart of revision types
	fig_pie = px.pie(
	values=revision_counts.values,
	names=revision_counts.index,
	title=f"Revision Types Distribution",
	color_discrete_sequence=px.colors.qualitative.Set3
	)
	fig_pie.update_layout(height=400, font=dict(family="Inter", size=12))
	st.plotly_chart(fig_pie, use_container_width=True)

	with rev_chart_col2:
	# Bar chart of revision types
	fig_bar = px.bar(
	x=revision_counts.values,
	y=revision_counts.index,
	orientation='h',
	title=f"Revision Frequency",
	color=revision_counts.values,
	color_continuous_scale='viridis'
	)
	fig_bar.update_layout(
	height=400,
	xaxis_title="Count",
	yaxis_title="Revision Type",
	font=dict(family="Inter", size=12)
	)
	st.plotly_chart(fig_bar, use_container_width=True)

	# Word-level revision analysis
	st.markdown("""
	<h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
	🔤 Word-Level Changes Analysis
	</h4>
	""", unsafe_allow_html=True)

	# Calculate word changes only for actual revisions
	words_added = []
	words_removed = []

	changed_revisions = df_lang_data[df_lang_data['revision_type'] != 'No Change']

	for _, row in changed_revisions.iterrows():
	if pd.notna(row[human_col]) and pd.notna(row[revised_col]):
	orig_words = set(str(row[human_col]).lower().split())
	rev_words = set(str(row[revised_col]).lower().split())

	added = rev_words - orig_words
	removed = orig_words - rev_words

	words_added.extend(list(added))
	words_removed.extend(list(removed))

	from collections import Counter
	added_counts = Counter(words_added)
	removed_counts = Counter(words_removed)

	word_analysis_col1, word_analysis_col2 = st.columns(2)

	with word_analysis_col1:
	st.markdown("🟢 Most Added Words")
	if added_counts:
	top_added = dict(added_counts.most_common(15))

	# Create horizontal bar chart for added words
	fig_added = px.bar(
	x=list(top_added.values()),
	y=list(top_added.keys()),
	orientation='h',
	title="Most Frequently Added Words",
	color=list(top_added.values()),
	color_continuous_scale='Greens'
	)
	fig_added.update_layout(
	height=400,
	xaxis_title="Frequency",
	yaxis_title="Words",
	font=dict(family="Inter", size=10)
	)
	st.plotly_chart(fig_added, use_container_width=True)
	else:
	st.markdown("No words added in revisions")

	with word_analysis_col2:
	st.markdown("🔴 Most Removed Words")
	if removed_counts:
	top_removed = dict(removed_counts.most_common(15))

	# Create horizontal bar chart for removed words
	fig_removed = px.bar(
	x=list(top_removed.values()),
	y=list(top_removed.keys()),
	orientation='h',
	title="Most Frequently Removed Words",
	color=list(top_removed.values()),
	color_continuous_scale='Reds'
	)
	fig_removed.update_layout(
	height=400,
	xaxis_title="Frequency",
	yaxis_title="Words",
	font=dict(family="Inter", size=10)
	)
	st.plotly_chart(fig_removed, use_container_width=True)
	else:
	st.markdown("No words removed in revisions")

	# Revision examples
	st.markdown("""
	<h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
	📝 Revision Examples
	</h4>
	""", unsafe_allow_html=True)

	# Show examples of different types of revisions
	revision_examples = changed_revisions.head(10)
	if len(revision_examples) > 0:

	# Create tabs for different revision types
	available_types = revision_examples['revision_type'].unique()
	if len(available_types) > 1:
	type_tabs = st.tabs([f"{rtype} ({len(revision_examples[revision_examples['revision_type'] == rtype])})"
	for rtype in available_types])

	for i, rtype in enumerate(available_types):
	with type_tabs[i]:
	type_examples = revision_examples[revision_examples['revision_type'] == rtype].head(5)
	for idx, row in type_examples.iterrows():
	st.markdown(f"""
	<div style="background: #f8fafc; border-left: 4px solid #3b82f6; padding: 16px; margin: 10px 0; border-radius: 0 8px 8px 0;">
	<div style="font-weight: 600; color: #1e40af; margin-bottom: 8px;">Original:</div>
	<div style="margin-bottom: 12px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[human_col]}</div>
	<div style="font-weight: 600; color: #059669; margin-bottom: 8px;">Revised:</div>
	<div style="margin-bottom: 8px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[revised_col]}</div>
	<div style="font-size: 0.875rem; color: #6b7280;">Type: <strong>{row['revision_type']}</strong></div>
	</div>
	""", unsafe_allow_html=True)
	else:
	# Single type, show directly
	for idx, row in revision_examples.iterrows():
	st.markdown(f"""
	<div style="background: #f8fafc; border-left: 4px solid #3b82f6; padding: 16px; margin: 10px 0; border-radius: 0 8px 8px 0;">
	<div style="font-weight: 600; color: #1e40af; margin-bottom: 8px;">Original:</div>
	<div style="margin-bottom: 12px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[human_col]}</div>
	<div style="font-weight: 600; color: #059669; margin-bottom: 8px;">Revised:</div>
	<div style="margin-bottom: 8px; font-family: monospace; background: #fff; padding: 8px; border-radius: 4px;">{row[revised_col]}</div>
	<div style="font-size: 0.875rem; color: #6b7280;">Type: <strong>{row['revision_type']}</strong></div>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.info(f"No revisions found for {selected_lang}.")
	else:
	st.info(f"No revision data available for analysis.")

	else:
	st.warning(f"⚠️ Revision columns not found for {selected_lang}. Expected columns: `{human_col}` and `{revised_col}`")

	with analysis_tab4:
	# Translation comparison section
	st.markdown("""
	<div style="margin: 20px 0;">
	<h4 style="font-family: 'Inter', sans-serif; font-size: 1.2rem; font-weight: 600; color: #374151; margin: 0 0 16px 0;">
	🔍 Translation Comparison & Word Analysis for {selected_lang}
	</h4>
	</div>
	""".format(selected_lang=selected_lang), unsafe_allow_html=True)

	# Use the global language selection
	comp_code = code

	# Get available translation columns for selected language
	available_cols = []
	for col in df_translations.columns:
	if col.startswith(comp_code) and col != 'english':
	available_cols.append(col)

	if len(available_cols) >= 2:
	comp_col1, comp_col2, comp_col3 = st.columns([1, 1, 1])

	with comp_col1:
	col1_selection = st.selectbox(
	"First Translation:",
	available_cols,
	key="col1_select"
	)

	with comp_col2:
	col2_selection = st.selectbox(
	"Second Translation:",
	[col for col in available_cols if col != col1_selection],
	key="col2_select"
	)

	with comp_col3:
	# Add spacing to align button with selectboxes
	st.markdown('<div style="margin-top: 25px;"></div>', unsafe_allow_html=True)
	analyze_clicked = st.button(
	"🔍 Analyze",
	type="primary",
	use_container_width=True,
	key="analyze_word_diff_btn"
	)

	if analyze_clicked:
	# Perform word analysis with ALL available data
	def get_word_differences(text1, text2):
	# Handle missing data by using available text
	if pd.isna(text1) and pd.isna(text2):
	return set(), set(), set()

	# If one is missing, treat it as empty for comparison
	words1 = set(str(text1).lower().split()) if pd.notna(text1) else set()
	words2 = set(str(text2).lower().split()) if pd.notna(text2) else set()

	only_in_1 = words1 - words2
	only_in_2 = words2 - words1
	common = words1 & words2

	return only_in_1, only_in_2, common

	# Analyze ALL rows with available data
	unique_words_1 = []
	unique_words_2 = []
	common_words = []
	all_words_1 = [] # For frequency counting
	all_words_2 = [] # For frequency counting

	# Process all rows, including those with missing revisions
	for _, row in df_translations.iterrows():
	# Get text from columns, using original if revision is missing
	text1 = row[col1_selection] if pd.notna(row[col1_selection]) else None
	text2 = row[col2_selection] if pd.notna(row[col2_selection]) else None

	# Skip if both are missing
	if text1 is None and text2 is None:
	continue

	# Collect ALL words from each column for frequency analysis
	if text1 is not None:
	words_from_1 = str(text1).lower().split()
	all_words_1.extend(words_from_1)

	if text2 is not None:
	words_from_2 = str(text2).lower().split()
	all_words_2.extend(words_from_2)

	# Only do comparison if both texts exist
	if text1 is not None and text2 is not None:
	only_1, only_2, common = get_word_differences(text1, text2)
	unique_words_1.extend(list(only_1))
	unique_words_2.extend(list(only_2))
	common_words.extend(list(common))

	from collections import Counter

	# Count frequencies from ALL words
	all_freq_1 = Counter(all_words_1) # All words from column 1
	all_freq_2 = Counter(all_words_2) # All words from column 2
	unique_freq_1 = Counter(unique_words_1) # Only unique words
	unique_freq_2 = Counter(unique_words_2) # Only unique words
	common_freq = Counter(common_words) # Only common words

	# Display statistics
	st.markdown('<div class="comparison-container">', unsafe_allow_html=True)

	col_result1, col_result2, col_result3, col_result4 = st.columns(4)

	with col_result1:
	st.markdown(f"""
	<div style="text-align: center; padding: 15px;">
	<h5 style="color: #dc2626; margin-bottom: 10px;">Unique to {col1_selection.replace('_', ' ').title()}</h5>
	<div style="font-size: 1.3rem; font-weight: bold; color: #dc2626;">{len(unique_freq_1)}</div>
	<div style="color: #6b7280; font-size: 0.8rem;">unique words</div>
	</div>
	""", unsafe_allow_html=True)

	with col_result2:
	st.markdown(f"""
	<div style="text-align: center; padding: 15px;">
	<h5 style="color: #166534; margin-bottom: 10px;">Unique to {col2_selection.replace('_', ' ').title()}</h5>
	<div style="font-size: 1.3rem; font-weight: bold; color: #166534;">{len(unique_freq_2)}</div>
	<div style="color: #6b7280; font-size: 0.8rem;">unique words</div>
	</div>
	""", unsafe_allow_html=True)

	with col_result3:
	st.markdown(f"""
	<div style="text-align: center; padding: 15px;">
	<h5 style="color: #475569; margin-bottom: 10px;">Common Words</h5>
	<div style="font-size: 1.3rem; font-weight: bold; color: #475569;">{len(common_freq)}</div>
	<div style="color: #6b7280; font-size: 0.8rem;">shared words</div>
	</div>
	""", unsafe_allow_html=True)

	with col_result4:
	st.markdown(f"""
	<div style="text-align: center; padding: 15px;">
	<h5 style="color: #7c3aed; margin-bottom: 10px;">Total Vocabulary</h5>
	<div style="font-size: 1.3rem; font-weight: bold; color: #7c3aed;">{len(set(all_words_1 + all_words_2))}</div>
	<div style="color: #6b7280; font-size: 0.8rem;">total unique words</div>
	</div>
	""", unsafe_allow_html=True)

	st.markdown('</div>', unsafe_allow_html=True)

	# Word Clouds Section
	st.markdown("""
	<h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
	☁️ Word Clouds Visualization
	</h4>
	""", unsafe_allow_html=True)

	# Generate word clouds using matplotlib and wordcloud
	try:
	# Show loading spinner while generating word clouds
	with st.spinner("🎨 Generating word clouds... This may take a moment."):
	import matplotlib.pyplot as plt
	from wordcloud import WordCloud
	import io
	import base64

	# Function to create word cloud image (optimized)
	def create_wordcloud_image(word_freq, title, color_scheme='viridis'):
	if not word_freq or len(word_freq) == 0:
	return None

	try:
	# Create word cloud with all frequency data, but limit max_words to 25
	wordcloud = WordCloud(
	width=300, # Reduced size
	height=200, # Reduced size
	background_color='white',
	colormap=color_scheme,
	max_words=25, # Display top 25 words
	relative_scaling=0.6,
	random_state=42,
	min_font_size=8,
	max_font_size=60,
	prefer_horizontal=0.9,
	collocations=False # Avoid word combinations
	).generate_from_frequencies(word_freq) # Use ALL frequency data

	# Create matplotlib figure with smaller size
	fig, ax = plt.subplots(figsize=(5, 3)) # Smaller figure
	ax.imshow(wordcloud, interpolation='bilinear')
	ax.axis('off')
	ax.set_title(title, fontsize=10, fontweight='bold', pad=10)

	# Convert to base64 for HTML display
	buffer = io.BytesIO()
	plt.savefig(buffer, format='png', bbox_inches='tight', dpi=100, facecolor='white') # Lower DPI
	buffer.seek(0)
	image_base64 = base64.b64encode(buffer.getvalue()).decode()
	plt.close(fig) # Important: close figure to free memory

	return image_base64
	except Exception as e:
	st.warning(f"Error creating word cloud for {title}: {str(e)}")
	return None

	# Create all word clouds in one row
	cloud_col1, cloud_col2, cloud_col3 = st.columns(3)

	with cloud_col1:
	if unique_freq_1 and len(unique_freq_1) > 0:
	# Use ALL unique words but display top 25 in cloud
	img1 = create_wordcloud_image(
	dict(unique_freq_1), # Use ALL unique words for frequency
	f"Unique: {col1_selection.replace('_', ' ').title()}",
	'Reds'
	)
	if img1:
	st.markdown(f'''
	<div style="text-align: center; margin: 10px 0;">
	<img src="data:image/png;base64,{img1}" style="max-width: 100%; height: auto; border-radius: 6px; box-shadow: 0 1px 4px rgba(0,0,0,0.1);">
	</div>
	<div style="text-align: center; font-size: 0.8rem; color: #6b7280;">
	Showing top 25 of {len(unique_freq_1)} unique words
	</div>
	''', unsafe_allow_html=True)
	else:
	st.markdown("""
	<div style="text-align: center; padding: 40px; background: #fef2f2; border-radius: 6px; color: #dc2626;">
	<div style="font-size: 2rem;">📝</div>
	<div style="font-size: 0.9rem; margin-top: 8px;">No unique words</div>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.markdown("""
	<div style="text-align: center; padding: 40px; background: #f9fafb; border-radius: 6px; color: #6b7280;">
	<div style="font-size: 2rem;">📝</div>
	<div style="font-size: 0.9rem; margin-top: 8px;">No unique words found</div>
	</div>
	""", unsafe_allow_html=True)

	with cloud_col2:
	if unique_freq_2 and len(unique_freq_2) > 0:
	# Use ALL unique words but display top 25 in cloud
	img2 = create_wordcloud_image(
	dict(unique_freq_2), # Use ALL unique words for frequency
	f"Unique: {col2_selection.replace('_', ' ').title()}",
	'Greens'
	)
	if img2:
	st.markdown(f'''
	<div style="text-align: center; margin: 10px 0;">
	<img src="data:image/png;base64,{img2}" style="max-width: 100%; height: auto; border-radius: 6px; box-shadow: 0 1px 4px rgba(0,0,0,0.1);">
	</div>
	<div style="text-align: center; font-size: 0.8rem; color: #6b7280;">
	Showing top 25 of {len(unique_freq_2)} unique words
	</div>
	''', unsafe_allow_html=True)
	else:
	st.markdown("""
	<div style="text-align: center; padding: 40px; background: #f0fdf4; border-radius: 6px; color: #166534;">
	<div style="font-size: 2rem;">📝</div>
	<div style="font-size: 0.9rem; margin-top: 8px;">No unique words</div>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.markdown("""
	<div style="text-align: center; padding: 40px; background: #f9fafb; border-radius: 6px; color: #6b7280;">
	<div style="font-size: 2rem;">📝</div>
	<div style="font-size: 0.9rem; margin-top: 8px;">No unique words found</div>
	</div>
	""", unsafe_allow_html=True)

	with cloud_col3:
	if common_freq and len(common_freq) > 0:
	# Use ALL common words but display top 25 in cloud
	img3 = create_wordcloud_image(
	dict(common_freq), # Use ALL common words for frequency
	"Common Words",
	'Blues'
	)
	if img3:
	st.markdown(f'''
	<div style="text-align: center; margin: 10px 0;">
	<img src="data:image/png;base64,{img3}" style="max-width: 100%; height: auto; border-radius: 6px; box-shadow: 0 1px 4px rgba(0,0,0,0.1);">
	</div>
	<div style="text-align: center; font-size: 0.8rem; color: #6b7280;">
	Showing top 25 of {len(common_freq)} common words
	</div>
	''', unsafe_allow_html=True)
	else:
	st.markdown("""
	<div style="text-align: center; padding: 40px; background: #eff6ff; border-radius: 6px; color: #1d4ed8;">
	<div style="font-size: 2rem;">📝</div>
	<div style="font-size: 0.9rem; margin-top: 8px;">No common words</div>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.markdown("""
	<div style="text-align: center; padding: 40px; background: #f9fafb; border-radius: 6px; color: #6b7280;">
	<div style="font-size: 2rem;">🤝</div>
	<div style="font-size: 0.9rem; margin-top: 8px;">No common words found</div>
	</div>
	""", unsafe_allow_html=True)

	except ImportError:
	st.warning("📦 WordCloud library not available. Install with: `pip install wordcloud`")

	# Fallback to top words lists
	st.markdown("📋 Top Unique Words (Fallback)")

	fallback_col1, fallback_col2, fallback_col3 = st.columns(3)

	with fallback_col1:
	st.markdown(f"🔴 Unique to {col1_selection.replace('_', ' ').title()}")
	if unique_freq_1:
	for word, count in unique_freq_1.most_common(10):
	st.markdown(f"• {word} ({count})")
	else:
	st.markdown("No unique words")

	with fallback_col2:
	st.markdown(f"🟢 Unique to {col2_selection.replace('_', ' ').title()}")
	if unique_freq_2:
	for word, count in unique_freq_2.most_common(10):
	st.markdown(f"• {word} ({count})")
	else:
	st.markdown("No unique words")

	with fallback_col3:
	st.markdown("🔵 Common Words")
	if common_freq:
	for word, count in common_freq.most_common(10):
	st.markdown(f"• {word} ({count})")
	else:
	st.markdown("No common words")

	# Word frequency bar charts as additional analysis
	st.markdown("""
	<h4 style="font-family: 'Inter', sans-serif; font-weight: 600; color: #374151; margin: 30px 0 16px 0;">
	📊 Top Words Frequency Comparison
	</h4>
	""", unsafe_allow_html=True)

	freq_col1, freq_col2 = st.columns(2)

	with freq_col1:
	if unique_freq_1:
	top_words_1 = dict(unique_freq_1.most_common(10))
	fig_freq1 = px.bar(
	x=list(top_words_1.values()),
	y=list(top_words_1.keys()),
	orientation='h',
	title=f"Top Unique Words: {col1_selection.replace('_', ' ').title()}",
	color=list(top_words_1.values()),
	color_continuous_scale='Reds'
	)
	fig_freq1.update_layout(
	height=400,
	xaxis_title="Frequency",
	yaxis_title="Words",
	font=dict(family="Inter", size=10)
	)
	st.plotly_chart(fig_freq1, use_container_width=True)

	with freq_col2:
	if unique_freq_2:
	top_words_2 = dict(unique_freq_2.most_common(10))
	fig_freq2 = px.bar(
	x=list(top_words_2.values()),
	y=list(top_words_2.keys()),
	orientation='h',
	title=f"Top Unique Words: {col2_selection.replace('_', ' ').title()}",
	color=list(top_words_2.values()),
	color_continuous_scale='Greens'
	)
	fig_freq2.update_layout(
	height=400,
	xaxis_title="Frequency",
	yaxis_title="Words",
	font=dict(family="Inter", size=10)
	)
	st.plotly_chart(fig_freq2, use_container_width=True)
	else:
	st.warning("⚠️ Need at least 2 translation columns for comparison analysis.")

	else:
	st.markdown("""
	<div style="background: #fef2f2; border: 1px solid #fecaca; border-radius: 8px; padding: 24px; margin: 16px 0; text-align: center;">
	<h3 style="font-family: 'Inter', sans-serif; color: #dc2626; margin: 0 0 12px 0;">❌ No Data Available</h3>
	<p style="font-family: 'Inter', sans-serif; color: #7f1d1d; margin: 0;">
	Please ensure translation data files are available in the data directory.
	</p>
	</div>
	""", unsafe_allow_html=True)

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style="text-align: center; color: #6b7280; font-family: 'Inter', sans-serif; font-size: 0.875rem;">
	Built for DSFSI using Streamlit • Translation APIs: Gemini, GPT, NLLB (hosted locally) • Data Science for Social Impact
	</div>
	""", unsafe_allow_html=True)

	if __name__ == "__main__":
	main()