Spaces:

entropy25
/

multilingual-sentiment-analyzer

Sleeping

App Files Files Community

entropy25 commited on Jul 28

Commit

0c511f2

verified ·

1 Parent(s): 061ab6f

Update app.py

Browse files

Files changed (1) hide show

app.py +614 -1032

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import torch
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import plotly.graph_objects as go
 import plotly.express as px
 from plotly.subplots import make_subplots
@@ -18,16 +18,8 @@ from functools import lru_cache, wraps
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple, Any, Callable
 from contextlib import contextmanager
-import nltk
-from nltk.corpus import stopwords
-import langdetect
-import pandas as pd
 import gc
-# Advanced analysis imports
-import shap
-import lime
-from lime.lime_text import LimeTextExplainer
 # Configuration
 @dataclass
@@ -39,45 +31,52 @@ class Config:
     CACHE_SIZE: int = 128
     BATCH_PROCESSING_SIZE: int = 8
-    # Supported languages and models
-    SUPPORTED_LANGUAGES = {
-        'auto': 'Auto Detect',
-        'en': 'English',
-        'zh': 'Chinese',
-        'es': 'Spanish',
-        'fr': 'French',
-        'de': 'German',
-        'sv': 'Swedish'
     }
     MODELS = {
-        'en': "cardiffnlp/twitter-roberta-base-sentiment-latest",
-        'multilingual': "cardiffnlp/twitter-xlm-roberta-base-sentiment",
-        'zh': "uer/roberta-base-finetuned-dianping-chinese"
     }
-    # Color themes for Plotly
-    THEMES = {
-        'default': {'pos': '#4CAF50', 'neg': '#F44336', 'neu': '#FF9800'},
-        'ocean': {'pos': '#0077BE', 'neg': '#FF6B35', 'neu': '#00BCD4'},
-        'dark': {'pos': '#66BB6A', 'neg': '#EF5350', 'neu': '#FFA726'},
-        'rainbow': {'pos': '#9C27B0', 'neg': '#E91E63', 'neu': '#FF5722'}
     }
 config = Config()
-# Logging setup
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Initialize NLTK
-try:
-    nltk.download('stopwords', quiet=True)
-    nltk.download('punkt', quiet=True)
-    STOP_WORDS = set(stopwords.words('english'))
-except:
-    STOP_WORDS = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by'}
 # Decorators and Context Managers
 def handle_errors(default_return=None):
     """Centralized error handling decorator"""
@@ -92,166 +91,113 @@ def handle_errors(default_return=None):
         return wrapper
     return decorator
-@contextmanager
-def memory_cleanup():
-    """Context manager for memory cleanup"""
-    try:
-        yield
-    finally:
-        gc.collect()
 class ThemeContext:
     """Theme management context"""
     def __init__(self, theme: str = 'default'):
         self.theme = theme
         self.colors = config.THEMES.get(theme, config.THEMES['default'])
-# Enhanced Model Manager with Multi-language Support
 class ModelManager:
     """Multi-language model manager with lazy loading"""
     _instance = None
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super().__new__(cls)
-            cls._instance._initialized = False
         return cls._instance
-    def __init__(self):
-        if not self._initialized:
-            self.models = {}
-            self.tokenizers = {}
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-            self._load_default_models()
-            self._initialized = True
-    def _load_default_models(self):
-        """Load default models"""
-        try:
-            # Load multilingual model as default
-            model_name = config.MODELS['multilingual']
-            self.tokenizers['default'] = AutoTokenizer.from_pretrained(model_name)
-            self.models['default'] = AutoModelForSequenceClassification.from_pretrained(model_name)
-            self.models['default'].to(self.device)
-            logger.info(f"Default model loaded: {model_name}")
-            # Load Chinese model
-            zh_model_name = config.MODELS['zh']
-            self.tokenizers['zh'] = AutoTokenizer.from_pretrained(zh_model_name)
-            self.models['zh'] = AutoModelForSequenceClassification.from_pretrained(zh_model_name)
-            self.models['zh'].to(self.device)
-            logger.info(f"Chinese model loaded: {zh_model_name}")
-        except Exception as e:
-            logger.error(f"Failed to load models: {e}")
-            raise
-    def get_model(self, language='en'):
-        """Get model for specific language"""
-        if language == 'zh':
-            return self.models['zh'], self.tokenizers['zh']
-        return self.models['default'], self.tokenizers['default']
     @staticmethod
     def detect_language(text: str) -> str:
-        """Detect text language"""
-        try:
-            detected = langdetect.detect(text)
-            language_mapping = {
-                'zh-cn': 'zh',
-                'zh-tw': 'zh'
-            }
-            detected = language_mapping.get(detected, detected)
-            return detected if detected in config.SUPPORTED_LANGUAGES else 'en'
-        except:
-            return 'en'
-# Simplified Text Processing
 class TextProcessor:
     """Optimized text processing with multi-language support"""
     @staticmethod
     @lru_cache(maxsize=config.CACHE_SIZE)
-    def clean_text(text: str, remove_punctuation: bool = True, remove_numbers: bool = False) -> str:
-        """Clean text with language awareness"""
-        text = text.strip()
-        # Don't clean Chinese text aggressively
-        if re.search(r'[\u4e00-\u9fff]', text):
-            return text
-        text = text.lower()
-        if remove_numbers:
-            text = re.sub(r'\d+', '', text)
-        if remove_punctuation:
-            text = re.sub(r'[^\w\s]', '', text)
-        words = text.split()
-        cleaned_words = [w for w in words if w not in STOP_WORDS and len(w) >= config.MIN_WORD_LENGTH]
-        return ' '.join(cleaned_words)
-    @staticmethod
-    def extract_keywords(text: str, top_k: int = 5) -> List[str]:
-        """Extract keywords with language support"""
-        if re.search(r'[\u4e00-\u9fff]', text):
-            # Chinese text processing
-            words = re.findall(r'[\u4e00-\u9fff]+', text)
-            all_chars = ''.join(words)
-            char_freq = Counter(all_chars)
-            return [char for char, _ in char_freq.most_common(top_k)]
-        else:
-            # Other languages
-            cleaned = TextProcessor.clean_text(text)
-            words = cleaned.split()
-            word_freq = Counter(words)
-            return [word for word, _ in word_freq.most_common(top_k)]
-    @staticmethod
-    def parse_batch_input(text: str) -> List[str]:
-        """Parse batch input from textarea"""
-        lines = text.strip().split('\n')
-        return [line.strip() for line in lines if line.strip()]
-# Enhanced History Manager
 class HistoryManager:
-    """Enhanced history management with filtering"""
     def __init__(self):
         self._history = []
     def add(self, entry: Dict):
-        """Add entry with timestamp"""
-        entry['timestamp'] = datetime.now().isoformat()
-        self._history.append(entry)
         if len(self._history) > config.MAX_HISTORY_SIZE:
             self._history = self._history[-config.MAX_HISTORY_SIZE:]
-    def add_batch(self, entries: List[Dict]):
-        """Add multiple entries"""
-        for entry in entries:
-            self.add(entry)
     def get_all(self) -> List[Dict]:
         return self._history.copy()
-    def get_recent(self, n: int = 10) -> List[Dict]:
-        return self._history[-n:] if self._history else []
-    def filter_by(self, sentiment: str = None, language: str = None,
-                  min_confidence: float = None) -> List[Dict]:
-        """Filter history by criteria"""
-        filtered = self._history
-        if sentiment:
-            filtered = [h for h in filtered if h['sentiment'] == sentiment]
-        if language:
-            filtered = [h for h in filtered if h.get('language', 'en') == language]
-        if min_confidence:
-            filtered = [h for h in filtered if h['confidence'] >= min_confidence]
-        return filtered
     def clear(self) -> int:
         count = len(self._history)
         self._history.clear()
@@ -259,404 +205,207 @@ class HistoryManager:
     def size(self) -> int:
         return len(self._history)
-    def get_stats(self) -> Dict:
-        """Get comprehensive statistics"""
-        if not self._history:
-            return {}
-        sentiments = [item['sentiment'] for item in self._history]
-        confidences = [item['confidence'] for item in self._history]
-        languages = [item.get('language', 'en') for item in self._history]
-        return {
-            'total_analyses': len(self._history),
-            'positive_count': sentiments.count('Positive'),
-            'negative_count': sentiments.count('Negative'),
-            'neutral_count': sentiments.count('Neutral'),
-            'avg_confidence': np.mean(confidences),
-            'max_confidence': np.max(confidences),
-            'min_confidence': np.min(confidences),
-            'languages_detected': len(set(languages)),
-            'most_common_language': Counter(languages).most_common(1)[0][0] if languages else 'en'
-        }
-# Core Sentiment Analysis Engine (Modified - removed attention analysis)
 class SentimentEngine:
-    """Multi-language sentiment analysis engine"""
     def __init__(self):
         self.model_manager = ModelManager()
-    @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'keywords': []})
-    def analyze_single(self, text: str, language: str = 'auto', preprocessing_options: Dict = None) -> Dict:
-        """Analyze single text with basic features"""
-        if not text.strip():
-            raise ValueError("Empty text provided")
-        # Detect language
-        if language == 'auto':
-            detected_lang = self.model_manager.detect_language(text)
-        else:
-            detected_lang = language
-        # Get appropriate model
-        model, tokenizer = self.model_manager.get_model(detected_lang)
-        # Preprocessing
-        options = preprocessing_options or {}
-        processed_text = text
-        if options.get('clean_text', False) and not re.search(r'[\u4e00-\u9fff]', text):
-            processed_text = TextProcessor.clean_text(
-                text,
-                options.get('remove_punctuation', True),
-                options.get('remove_numbers', False)
-            )
-        # Tokenize and analyze
-        inputs = tokenizer(processed_text, return_tensors="pt", padding=True,
-                         truncation=True, max_length=config.MAX_TEXT_LENGTH).to(self.model_manager.device)
-        with torch.no_grad():
-            outputs = model(**inputs)
-            probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
-        # Handle different model outputs
-        if len(probs) == 3:  # negative, neutral, positive
-            sentiment_idx = np.argmax(probs)
-            sentiment_labels = ['Negative', 'Neutral', 'Positive']
-            sentiment = sentiment_labels[sentiment_idx]
-            confidence = float(probs[sentiment_idx])
-            result = {
-                'sentiment': sentiment,
-                'confidence': confidence,
-                'neg_prob': float(probs[0]),
-                'neu_prob': float(probs[1]),
-                'pos_prob': float(probs[2]),
-                'has_neutral': True
-            }
-        else:  # negative, positive
-            pred = np.argmax(probs)
-            sentiment = "Positive" if pred == 1 else "Negative"
-            confidence = float(probs[pred])
-            result = {
-                'sentiment': sentiment,
-                'confidence': confidence,
-                'neg_prob': float(probs[0]),
-                'pos_prob': float(probs[1]),
-                'neu_prob': 0.0,
-                'has_neutral': False
-            }
-        # Extract basic keywords
-        keywords = TextProcessor.extract_keywords(text, 10)
-        keyword_tuples = [(word, 0.1) for word in keywords]  # Simple keyword extraction
-        # Add metadata
-        result.update({
-            'language': detected_lang,
-            'keywords': keyword_tuples,
-            'word_count': len(text.split()),
-            'char_count': len(text)
-        })
-        return result
-    @handle_errors(default_return=[])
-    def analyze_batch(self, texts: List[str], language: str = 'auto',
-                     preprocessing_options: Dict = None, progress_callback=None) -> List[Dict]:
-        """Optimized batch processing"""
-        if len(texts) > config.BATCH_SIZE_LIMIT:
-            texts = texts[:config.BATCH_SIZE_LIMIT]
-        results = []
-        batch_size = config.BATCH_PROCESSING_SIZE
-        for i in range(0, len(texts), batch_size):
-            batch = texts[i:i+batch_size]
-            if progress_callback:
-                progress_callback((i + len(batch)) / len(texts))
-            for text in batch:
-                try:
-                    result = self.analyze_single(text, language, preprocessing_options)
-                    result['batch_index'] = len(results)
-                    result['text'] = text[:100] + '...' if len(text) > 100 else text
-                    result['full_text'] = text
-                    results.append(result)
-                except Exception as e:
-                    results.append({
-                        'sentiment': 'Error',
-                        'confidence': 0.0,
-                        'error': str(e),
-                        'batch_index': len(results),
-                        'text': text[:100] + '...' if len(text) > 100 else text,
-                        'full_text': text
-                    })
-        return results
-# Advanced Analysis Engine (NEW)
-class AdvancedAnalysisEngine:
-    """Advanced analysis using SHAP and LIME"""
-    def __init__(self):
-        self.model_manager = ModelManager()
-    def create_prediction_function(self, model, tokenizer, device):
-        """Create prediction function for LIME/SHAP"""
-        def predict_proba(texts):
-            results = []
-            for text in texts:
-                inputs = tokenizer(text, return_tensors="pt", padding=True,
-                                 truncation=True, max_length=config.MAX_TEXT_LENGTH).to(device)
-                with torch.no_grad():
-                    outputs = model(**inputs)
-                    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()[0]
-                results.append(probs)
-            return np.array(results)
-        return predict_proba
-    @handle_errors(default_return=("Analysis failed", None, None))
-    def analyze_with_shap(self, text: str, language: str = 'auto') -> Tuple[str, go.Figure, Dict]:
-        """Perform SHAP analysis"""
-        if not text.strip():
-            return "Please enter text for analysis", None, {}
-        # Detect language and get model
-        if language == 'auto':
-            detected_lang = self.model_manager.detect_language(text)
-        else:
-            detected_lang = language
-        model, tokenizer = self.model_manager.get_model(detected_lang)
-        # Create prediction function
-        predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
         try:
-            # Initialize SHAP explainer
-            explainer = shap.Explainer(predict_fn, tokenizer)
-            # Get SHAP values
-            shap_values = explainer([text])
-            # Extract token importance
-            tokens = shap_values.data[0]
-            values = shap_values.values[0]
-            # Create visualization data
-            if len(values.shape) > 1:
-                # Multi-class case
-                pos_values = values[:, -1] if values.shape[1] == 3 else values[:, 1]
-            else:
-                pos_values = values
-            # Create SHAP plot
-            fig = go.Figure()
-            colors = ['red' if v < 0 else 'green' for v in pos_values]
-            fig.add_trace(go.Bar(
-                x=list(range(len(tokens))),
-                y=pos_values,
-                text=tokens,
-                textposition='outside',
-                marker_color=colors,
-                name='SHAP Values'
-            ))
-            fig.update_layout(
-                title="SHAP Analysis - Token Importance",
-                xaxis_title="Token Index",
-                yaxis_title="SHAP Value",
-                height=500,
-                xaxis=dict(tickmode='array', tickvals=list(range(len(tokens))), ticktext=tokens)
-            )
-            # Create analysis summary
-            analysis_data = {
-                'method': 'SHAP',
-                'language': detected_lang,
-                'total_tokens': len(tokens),
-                'positive_influence': sum(1 for v in pos_values if v > 0),
-                'negative_influence': sum(1 for v in pos_values if v < 0),
-                'most_important_tokens': [(tokens[i], float(pos_values[i]))
-                                        for i in np.argsort(np.abs(pos_values))[-5:]]
             }
-            summary_text = f"""
-**SHAP Analysis Results:**
-- **Language:** {detected_lang.upper()}
-- **Total Tokens:** {analysis_data['total_tokens']}
-- **Positive Influence Tokens:** {analysis_data['positive_influence']}
-- **Negative Influence Tokens:** {analysis_data['negative_influence']}
-- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
-            """
-            return summary_text, fig, analysis_data
         except Exception as e:
-            logger.error(f"SHAP analysis failed: {e}")
-            return f"SHAP analysis failed: {str(e)}", None, {}
-    @handle_errors(default_return=("Analysis failed", None, None))
-    def analyze_with_lime(self, text: str, language: str = 'auto') -> Tuple[str, go.Figure, Dict]:
-        """Perform LIME analysis"""
         if not text.strip():
-            return "Please enter text for analysis", None, {}
-        # Detect language and get model
-        if language == 'auto':
-            detected_lang = self.model_manager.detect_language(text)
         else:
-            detected_lang = language
-        model, tokenizer = self.model_manager.get_model(detected_lang)
-        # Create prediction function
-        predict_fn = self.create_prediction_function(model, tokenizer, self.model_manager.device)
-        try:
-            # Initialize LIME explainer
-            explainer = LimeTextExplainer(class_names=['Negative', 'Neutral', 'Positive'])
-            # Get LIME explanation
-            exp = explainer.explain_instance(text, predict_fn, num_features=20)
-            # Extract feature importance
-            lime_data = exp.as_list()
-            # Create visualization
-            words = [item[0] for item in lime_data]
-            scores = [item[1] for item in lime_data]
-            fig = go.Figure()
-            colors = ['red' if s < 0 else 'green' for s in scores]
-            fig.add_trace(go.Bar(
-                y=words,
-                x=scores,
-                orientation='h',
-                marker_color=colors,
-                text=[f'{s:.3f}' for s in scores],
-                textposition='auto',
-                name='LIME Importance'
-            ))
-            fig.update_layout(
-                title="LIME Analysis - Feature Importance",
-                xaxis_title="Importance Score",
-                yaxis_title="Words/Phrases",
-                height=500
-            )
-            # Create analysis summary
-            analysis_data = {
-                'method': 'LIME',
-                'language': detected_lang,
-                'features_analyzed': len(lime_data),
-                'positive_features': sum(1 for _, score in lime_data if score > 0),
-                'negative_features': sum(1 for _, score in lime_data if score < 0),
-                'feature_importance': lime_data
-            }
-            summary_text = f"""
-**LIME Analysis Results:**
-- **Language:** {detected_lang.upper()}
-- **Features Analyzed:** {analysis_data['features_analyzed']}
-- **Positive Features:** {analysis_data['positive_features']}
-- **Negative Features:** {analysis_data['negative_features']}
-- **Top Features:** {', '.join([f"{word}({score:.3f})" for word, score in lime_data[:5]])}
-            """
-            return summary_text, fig, analysis_data
-        except Exception as e:
-            logger.error(f"LIME analysis failed: {e}")
-            return f"LIME analysis failed: {str(e)}", None, {}
-# Advanced Plotly Visualization System (Updated - removed attention visualization)
-class PlotlyVisualizer:
-    """Enhanced Plotly visualizations"""
-    @staticmethod
-    @handle_errors(default_return=None)
-    def create_sentiment_gauge(result: Dict, theme: ThemeContext) -> go.Figure:
-        """Create animated sentiment gauge"""
-        colors = theme.colors
-        if result.get('has_neutral', False):
-            # Three-way gauge
-            fig = go.Figure(go.Indicator(
-                mode="gauge+number+delta",
-                value=result['pos_prob'] * 100,
-                domain={'x': [0, 1], 'y': [0, 1]},
-                title={'text': f"Sentiment: {result['sentiment']}"},
-                delta={'reference': 50},
-                gauge={
-                    'axis': {'range': [None, 100]},
-                    'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
-                    'steps': [
-                        {'range': [0, 33], 'color': colors['neg']},
-                        {'range': [33, 67], 'color': colors['neu']},
-                        {'range': [67, 100], 'color': colors['pos']}
-                    ],
-                    'threshold': {
-                        'line': {'color': "red", 'width': 4},
-                        'thickness': 0.75,
-                        'value': 90
-                    }
-                }
-            ))
-        else:
-            # Two-way gauge
-            fig = go.Figure(go.Indicator(
-                mode="gauge+number",
-                value=result['confidence'] * 100,
-                domain={'x': [0, 1], 'y': [0, 1]},
-                title={'text': f"Confidence: {result['sentiment']}"},
-                gauge={
-                    'axis': {'range': [None, 100]},
-                    'bar': {'color': colors['pos'] if result['sentiment'] == 'Positive' else colors['neg']},
-                    'steps': [
-                        {'range': [0, 50], 'color': "lightgray"},
-                        {'range': [50, 100], 'color': "gray"}
-                    ]
-                }
-            ))
-        fig.update_layout(height=400, font={'size': 16})
-        return fig
     @staticmethod
     @handle_errors(default_return=None)
-    def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure:
-        """Create probability bar chart"""
-        colors = theme.colors
-        if result.get('has_neutral', False):
-            labels = ['Negative', 'Neutral', 'Positive']
-            values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
-            bar_colors = [colors['neg'], colors['neu'], colors['pos']]
-        else:
-            labels = ['Negative', 'Positive']
-            values = [result['neg_prob'], result['pos_prob']]
-            bar_colors = [colors['neg'], colors['pos']]
         fig = go.Figure(data=[
-            go.Bar(x=labels, y=values, marker_color=bar_colors,
-                   text=[f'{v:.3f}' for v in values], textposition='outside')
         ])
         fig.update_layout(
             title="Sentiment Probabilities",
             yaxis_title="Probability",
-            height=400,
             showlegend=False
         )
@@ -664,160 +413,171 @@ class PlotlyVisualizer:
     @staticmethod
     @handle_errors(default_return=None)
-    def create_keyword_chart(keywords: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> go.Figure:
-        """Create basic keyword chart"""
-        if not keywords:
-            fig = go.Figure()
-            fig.add_annotation(text="No keywords extracted",
-                             xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
-            fig.update_layout(height=400, title="Keywords")
-            return fig
-        words = [word for word, score in keywords]
-        scores = [score for word, score in keywords]
-        color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg']
-        fig = go.Figure(data=[
-            go.Bar(
-                y=words,
-                x=scores,
-                orientation='h',
-                marker_color=color,
-                text=[f'{score:.3f}' for score in scores],
-                textposition='auto'
-            )
-        ])
         fig.update_layout(
-            title=f"Top Keywords ({sentiment})",
-            xaxis_title="Frequency Score",
-            yaxis_title="Keywords",
-            height=400,
-            showlegend=False
         )
         return fig
     @staticmethod
     @handle_errors(default_return=None)
-    def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure:
-        """Create batch analysis summary"""
-        colors = theme.colors
-        # Count sentiments
-        sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
-        sentiment_counts = Counter(sentiments)
-        # Create pie chart
-        fig = go.Figure(data=[go.Pie(
-            labels=list(sentiment_counts.keys()),
-            values=list(sentiment_counts.values()),
-            marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
-            textinfo='label+percent',
-            hole=0.3
-        )])
         fig.update_layout(
-            title=f"Batch Analysis Summary ({len(results)} texts)",
-            height=400
         )
         return fig
     @staticmethod
     @handle_errors(default_return=None)
-    def create_confidence_distribution(results: List[Dict]) -> go.Figure:
-        """Create confidence distribution plot"""
-        confidences = [r['confidence'] for r in results if 'confidence' in r and r['sentiment'] != 'Error']
-        if not confidences:
-            return go.Figure()
-        fig = go.Figure(data=[go.Histogram(
-            x=confidences,
-            nbinsx=20,
-            marker_color='skyblue',
-            opacity=0.7
-        )])
-        fig.update_layout(
-            title="Confidence Distribution",
-            xaxis_title="Confidence Score",
-            yaxis_title="Frequency",
-            height=400
-        )
-        return fig
     @staticmethod
     @handle_errors(default_return=None)
-    def create_history_dashboard(history: List[Dict], theme: ThemeContext) -> go.Figure:
-        """Create comprehensive history dashboard"""
-        if len(history) < 2:
-            return go.Figure()
-        # Create subplots
         fig = make_subplots(
             rows=2, cols=2,
-            subplot_titles=['Sentiment Timeline', 'Confidence Distribution',
-                           'Language Distribution', 'Sentiment Summary'],
-            specs=[[{"secondary_y": False}, {"secondary_y": False}],
-                   [{"type": "pie"}, {"type": "bar"}]]
         )
-        # Extract data
-        indices = list(range(len(history)))
-        pos_probs = [item.get('pos_prob', 0) for item in history]
-        confidences = [item['confidence'] for item in history]
-        sentiments = [item['sentiment'] for item in history]
-        languages = [item.get('language', 'en') for item in history]
-        # Sentiment timeline
-        colors_map = {'Positive': theme.colors['pos'], 'Negative': theme.colors['neg'], 'Neutral': theme.colors['neu']}
-        colors = [colors_map.get(s, '#999999') for s in sentiments]
         fig.add_trace(
-            go.Scatter(x=indices, y=pos_probs, mode='lines+markers',
-                      marker=dict(color=colors, size=8),
-                      name='Positive Probability'),
             row=1, col=1
         )
-        # Confidence distribution
         fig.add_trace(
-            go.Histogram(x=confidences, nbinsx=10, name='Confidence'),
             row=1, col=2
         )
-        # Language distribution
-        lang_counts = Counter(languages)
         fig.add_trace(
-            go.Pie(labels=list(lang_counts.keys()), values=list(lang_counts.values()),
-                   name="Languages"),
             row=2, col=1
         )
-        # Sentiment summary
-        sent_counts = Counter(sentiments)
-        sent_colors = [colors_map.get(k, '#999999') for k in sent_counts.keys()]
-        fig.add_trace(
-            go.Bar(x=list(sent_counts.keys()), y=list(sent_counts.values()),
-                   marker_color=sent_colors),
-            row=2, col=2
         )
-        fig.update_layout(height=800, showlegend=False)
         return fig
-# Universal Data Handler
 class DataHandler:
-    """Enhanced data operations"""
     @staticmethod
     @handle_errors(default_return=(None, "Export failed"))
     def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
-        """Export data with comprehensive information"""
         if not data:
             return None, "No data to export"
@@ -826,21 +586,18 @@ class DataHandler:
         if format_type == 'csv':
             writer = csv.writer(temp_file)
-            writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Language',
-                           'Pos_Prob', 'Neg_Prob', 'Neu_Prob', 'Keywords', 'Word_Count'])
             for entry in data:
-                keywords_str = "|".join([f"{word}:{score:.3f}" for word, score in entry.get('keywords', [])])
                 writer.writerow([
                     entry.get('timestamp', ''),
                     entry.get('text', ''),
                     entry.get('sentiment', ''),
                     f"{entry.get('confidence', 0):.4f}",
-                    entry.get('language', 'en'),
                     f"{entry.get('pos_prob', 0):.4f}",
                     f"{entry.get('neg_prob', 0):.4f}",
-                    f"{entry.get('neu_prob', 0):.4f}",
-                    keywords_str,
-                    entry.get('word_count', 0)
                 ])
         elif format_type == 'json':
             json.dump(data, temp_file, indent=2, ensure_ascii=False)
@@ -851,26 +608,27 @@ class DataHandler:
     @staticmethod
     @handle_errors(default_return="")
     def process_file(file) -> str:
-        """Process uploaded files"""
         if not file:
             return ""
         content = file.read().decode('utf-8')
         if file.name.endswith('.csv'):
             csv_file = io.StringIO(content)
             reader = csv.reader(csv_file)
             try:
-                next(reader)  # Skip header
                 texts = []
                 for row in reader:
                     if row and row[0].strip():
                         text = row[0].strip().strip('"')
-                        if text:
                             texts.append(text)
                 return '\n'.join(texts)
-            except:
-                lines = content.strip().split('\n')[1:]
                 texts = []
                 for line in lines:
                     if line.strip():
@@ -878,271 +636,171 @@ class DataHandler:
                         if text:
                             texts.append(text)
                 return '\n'.join(texts)
         return content
-# Main Application Class
 class SentimentApp:
-    """Main multilingual sentiment analysis application"""
     def __init__(self):
         self.engine = SentimentEngine()
-        self.advanced_engine = AdvancedAnalysisEngine()  # NEW
         self.history = HistoryManager()
         self.data_handler = DataHandler()
         # Multi-language examples
         self.examples = [
-            ["This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout."],
-            ["The film was disappointing with poor character development and a confusing storyline."],
-            ["这部电影真的很棒！演技精湛，情节引人入胜。"],  # Chinese
-            ["Esta película fue increíble, me encantó la cinematografía."],  # Spanish
-            ["Ce film était magnifique, j'ai adoré la réalisation."],  # French
         ]
-    @handle_errors(default_return=("Please enter text", None, None, None))
-    def analyze_single(self, text: str, language: str, theme: str, clean_text: bool,
-                       remove_punct: bool, remove_nums: bool):
-        """Single text analysis with basic visualizations (removed attention analysis)"""
         if not text.strip():
-            return "Please enter text", None, None, None
-        # Map display names to language codes
-        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
-        language_code = language_map.get(language, 'auto')
-        preprocessing_options = {
-            'clean_text': clean_text,
-            'remove_punctuation': remove_punct,
-            'remove_numbers': remove_nums
-        }
-        with memory_cleanup():
-            result = self.engine.analyze_single(text, language_code, preprocessing_options)
-            # Add to history
-            history_entry = {
-                'text': text[:100] + '...' if len(text) > 100 else text,
-                'full_text': text,
-                'sentiment': result['sentiment'],
-                'confidence': result['confidence'],
-                'pos_prob': result.get('pos_prob', 0),
-                'neg_prob': result.get('neg_prob', 0),
-                'neu_prob': result.get('neu_prob', 0),
-                'language': result['language'],
-                'keywords': result['keywords'],
-                'word_count': result['word_count'],
-                'analysis_type': 'single'
-            }
-            self.history.add(history_entry)
-            # Create visualizations
-            theme_ctx = ThemeContext(theme)
-            gauge_fig = PlotlyVisualizer.create_sentiment_gauge(result, theme_ctx)
-            bars_fig = PlotlyVisualizer.create_probability_bars(result, theme_ctx)
-            keyword_fig = PlotlyVisualizer.create_keyword_chart(result['keywords'], result['sentiment'], theme_ctx)
-            # Create comprehensive result text
-            keywords_str = ", ".join([f"{word}({score:.3f})" for word, score in result['keywords'][:5]])
-            info_text = f"""
-**Analysis Results:**
-- **Sentiment:** {result['sentiment']} ({result['confidence']:.3f} confidence)
-- **Language:** {result['language'].upper()}
-- **Keywords:** {keywords_str}
-- **Statistics:** {result['word_count']} words, {result['char_count']} characters
-            """
-            return info_text, gauge_fig, bars_fig, keyword_fig
-    @handle_errors(default_return=("Please enter texts", None, None, None))
-    def analyze_batch(self, batch_text: str, language: str, theme: str,
-                     clean_text: bool, remove_punct: bool, remove_nums: bool):
-        """Enhanced batch analysis"""
-        if not batch_text.strip():
-            return "Please enter texts (one per line)", None, None, None
-        # Parse batch input
-        texts = TextProcessor.parse_batch_input(batch_text)
-        if len(texts) > config.BATCH_SIZE_LIMIT:
-            return f"Too many texts. Maximum {config.BATCH_SIZE_LIMIT} allowed.", None, None, None
-        if not texts:
-            return "No valid texts found", None, None, None
-        # Map display names to language codes
-        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
-        language_code = language_map.get(language, 'auto')
-        preprocessing_options = {
-            'clean_text': clean_text,
-            'remove_punctuation': remove_punct,
-            'remove_numbers': remove_nums
-        }
-        with memory_cleanup():
-            results = self.engine.analyze_batch(texts, language_code, preprocessing_options)
-            # Add to history
-            batch_entries = []
-            for result in results:
-                if 'error' not in result:
-                    entry = {
-                        'text': result['text'],
-                        'full_text': result['full_text'],
-                        'sentiment': result['sentiment'],
-                        'confidence': result['confidence'],
-                        'pos_prob': result.get('pos_prob', 0),
-                        'neg_prob': result.get('neg_prob', 0),
-                        'neu_prob': result.get('neu_prob', 0),
-                        'language': result['language'],
-                        'keywords': result['keywords'],
-                        'word_count': result['word_count'],
-                        'analysis_type': 'batch',
-                        'batch_index': result['batch_index']
-                    }
-                    batch_entries.append(entry)
-            self.history.add_batch(batch_entries)
-            # Create visualizations
-            theme_ctx = ThemeContext(theme)
-            summary_fig = PlotlyVisualizer.create_batch_summary(results, theme_ctx)
-            confidence_fig = PlotlyVisualizer.create_confidence_distribution(results)
-            # Create results DataFrame
-            df_data = []
-            for result in results:
-                if 'error' in result:
-                    df_data.append({
-                        'Index': result['batch_index'] + 1,
-                        'Text': result['text'],
-                        'Sentiment': 'Error',
-                        'Confidence': 0.0,
-                        'Language': 'Unknown',
-                        'Error': result['error']
-                    })
-                else:
-                    keywords_str = ', '.join([word for word, _ in result['keywords'][:3]])
-                    df_data.append({
-                        'Index': result['batch_index'] + 1,
-                        'Text': result['text'],
-                        'Sentiment': result['sentiment'],
-                        'Confidence': f"{result['confidence']:.3f}",
-                        'Language': result['language'].upper(),
-                        'Keywords': keywords_str
-                    })
-            df = pd.DataFrame(df_data)
-            # Create summary text
-            successful_results = [r for r in results if 'error' not in r]
-            error_count = len(results) - len(successful_results)
-            if successful_results:
-                sentiment_counts = Counter([r['sentiment'] for r in successful_results])
-                avg_confidence = np.mean([r['confidence'] for r in successful_results])
-                languages = Counter([r['language'] for r in successful_results])
-                summary_text = f"""
-**Batch Analysis Summary:**
-- **Total Texts:** {len(texts)}
-- **Successful:** {len(successful_results)}
-- **Errors:** {error_count}
-- **Average Confidence:** {avg_confidence:.3f}
-- **Sentiments:** {dict(sentiment_counts)}
-- **Languages Detected:** {dict(languages)}
-                """
-            else:
-                summary_text = f"All {len(texts)} texts failed to analyze."
-            return summary_text, df, summary_fig, confidence_fig
-    # NEW: Advanced analysis methods
-    @handle_errors(default_return=("Please enter text", None))
-    def analyze_with_shap(self, text: str, language: str):
-        """Perform SHAP analysis"""
-        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
-        language_code = language_map.get(language, 'auto')
-        return self.advanced_engine.analyze_with_shap(text, language_code)
-    @handle_errors(default_return=("Please enter text", None))
-    def analyze_with_lime(self, text: str, language: str):
-        """Perform LIME analysis"""
-        language_map = {v: k for k, v in config.SUPPORTED_LANGUAGES.items()}
-        language_code = language_map.get(language, 'auto')
-        return self.advanced_engine.analyze_with_lime(text, language_code)
     @handle_errors(default_return=(None, "No history available"))
     def plot_history(self, theme: str = 'default'):
-        """Plot comprehensive history analysis"""
         history = self.history.get_all()
         if len(history) < 2:
             return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
         theme_ctx = ThemeContext(theme)
-        with memory_cleanup():
-            fig = PlotlyVisualizer.create_history_dashboard(history, theme_ctx)
-            stats = self.history.get_stats()
-            stats_text = f"""
-**History Statistics:**
-- **Total Analyses:** {stats.get('total_analyses', 0)}
-- **Positive:** {stats.get('positive_count', 0)}
-- **Negative:** {stats.get('negative_count', 0)}
-- **Neutral:** {stats.get('neutral_count', 0)}
-- **Average Confidence:** {stats.get('avg_confidence', 0):.3f}
-- **Languages:** {stats.get('languages_detected', 0)}
-- **Most Common Language:** {stats.get('most_common_language', 'N/A').upper()}
-            """
-            return fig, stats_text
-    @handle_errors(default_return=("No data available",))
-    def get_history_status(self):
-        """Get current history status"""
-        stats = self.history.get_stats()
-        if not stats:
-            return "No analyses performed yet"
-        return f"""
-**Current Status:**
-- **Total Analyses:** {stats['total_analyses']}
-- **Recent Sentiment Distribution:**
-  * Positive: {stats['positive_count']}
-  * Negative: {stats['negative_count']}
-  * Neutral: {stats['neutral_count']}
-- **Average Confidence:** {stats['avg_confidence']:.3f}
-- **Languages Detected:** {stats['languages_detected']}
-        """
-# Gradio Interface (Updated with Advanced Analysis tab)
 def create_interface():
-    """Create comprehensive Gradio interface with Advanced Analysis tab"""
     app = SentimentApp()
-    with gr.Blocks(theme=gr.themes.Soft(), title="Multilingual Sentiment Analyzer") as demo:
-        gr.Markdown("# 🌍 Advanced Multilingual Sentiment Analyzer")
-        gr.Markdown("AI-powered sentiment analysis with support for multiple languages, advanced visualizations, and explainable AI features")
         with gr.Tab("Single Analysis"):
             with gr.Row():
                 with gr.Column():
                     text_input = gr.Textbox(
-                        label="Enter Text for Analysis",
-                        placeholder="Enter your text in any supported language...",
                         lines=5
                     )
                     with gr.Row():
-                        language_selector = gr.Dropdown(
-                            choices=list(config.SUPPORTED_LANGUAGES.values()),
-                            value="Auto Detect",
-                            label="Language"
                         )
                         theme_selector = gr.Dropdown(
                             choices=list(config.THEMES.keys()),
@@ -1150,218 +808,142 @@ def create_interface():
                             label="Theme"
                         )
-                    with gr.Row():
-                        clean_text_cb = gr.Checkbox(label="Clean Text", value=False)
-                        remove_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
-                        remove_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
-                    analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
                     gr.Examples(
                         examples=app.examples,
                         inputs=text_input,
-                        cache_examples=False
                     )
                 with gr.Column():
-                    result_output = gr.Textbox(label="Analysis Results", lines=8)
             with gr.Row():
-                gauge_plot = gr.Plot(label="Sentiment Gauge")
-                probability_plot = gr.Plot(label="Probability Distribution")
             with gr.Row():
-                keyword_plot = gr.Plot(label="Basic Keywords")
-        # NEW: Advanced Analysis Tab
-        with gr.Tab("Advanced Analysis"):
-            gr.Markdown("## 🔬 Explainable AI Analysis")
-            gr.Markdown("Use SHAP and LIME to understand which words and phrases most influence the sentiment prediction.")
-            with gr.Row():
-                with gr.Column():
-                    advanced_text_input = gr.Textbox(
-                        label="Enter Text for Advanced Analysis",
-                        placeholder="Enter text to analyze with SHAP and LIME...",
-                        lines=6
-                    )
-                    advanced_language = gr.Dropdown(
-                        choices=list(config.SUPPORTED_LANGUAGES.values()),
-                        value="Auto Detect",
-                        label="Language"
-                    )
-                    with gr.Row():
-                        shap_btn = gr.Button("SHAP Analysis", variant="primary")
-                        lime_btn = gr.Button("LIME Analysis", variant="secondary")
-                    gr.Markdown("""
-                    **Analysis Methods:**
-                    - **SHAP**: Shows token-level importance scores
-                    - **LIME**: Explains predictions by perturbing input features
-                    """)
-                with gr.Column():
-                    advanced_results = gr.Textbox(label="Analysis Summary", lines=10)
-            with gr.Row():
-                advanced_plot = gr.Plot(label="Feature Importance Visualization")
         with gr.Tab("Batch Analysis"):
             with gr.Row():
                 with gr.Column():
-                    file_upload = gr.File(
-                        label="Upload File (CSV/TXT)",
-                        file_types=[".csv", ".txt"]
-                    )
                     batch_input = gr.Textbox(
-                        label="Batch Input (one text per line)",
-                        placeholder="Enter multiple texts, one per line...",
-                        lines=10
                     )
-                    with gr.Row():
-                        batch_language = gr.Dropdown(
-                            choices=list(config.SUPPORTED_LANGUAGES.values()),
-                            value="Auto Detect",
-                            label="Language"
-                        )
-                        batch_theme = gr.Dropdown(
-                            choices=list(config.THEMES.keys()),
-                            value="default",
-                            label="Theme"
-                        )
-                    with gr.Row():
-                        batch_clean_cb = gr.Checkbox(label="Clean Text", value=False)
-                        batch_punct_cb = gr.Checkbox(label="Remove Punctuation", value=False)
-                        batch_nums_cb = gr.Checkbox(label="Remove Numbers", value=False)
-                    with gr.Row():
-                        load_file_btn = gr.Button("Load File")
-                        analyze_batch_btn = gr.Button("Analyze Batch", variant="primary")
                 with gr.Column():
-                    batch_summary = gr.Textbox(label="Batch Summary", lines=8)
-                    batch_results_df = gr.Dataframe(
-                        label="Detailed Results",
-                        headers=["Index", "Text", "Sentiment", "Confidence", "Language", "Keywords"],
-                        datatype=["number", "str", "str", "str", "str", "str"]
-                    )
-            with gr.Row():
-                batch_plot = gr.Plot(label="Batch Analysis Summary")
-                confidence_dist_plot = gr.Plot(label="Confidence Distribution")
-        with gr.Tab("History & Analytics"):
             with gr.Row():
-                with gr.Column():
-                    with gr.Row():
-                        refresh_history_btn = gr.Button("Refresh History")
-                        clear_history_btn = gr.Button("Clear History", variant="stop")
-                        status_btn = gr.Button("Get Status")
-                    history_theme = gr.Dropdown(
-                        choices=list(config.THEMES.keys()),
-                        value="default",
-                        label="Dashboard Theme"
-                    )
-                    with gr.Row():
-                        export_csv_btn = gr.Button("Export CSV")
-                        export_json_btn = gr.Button("Export JSON")
-                with gr.Column():
-                    history_status = gr.Textbox(label="History Status", lines=8)
-            history_dashboard = gr.Plot(label="History Analytics Dashboard")
             with gr.Row():
-                csv_download = gr.File(label="CSV Download", visible=True)
-                json_download = gr.File(label="JSON Download", visible=True)
-        # Event Handlers
-        # Single Analysis
         analyze_btn.click(
             app.analyze_single,
-            inputs=[text_input, language_selector, theme_selector,
-                   clean_text_cb, remove_punct_cb, remove_nums_cb],
-            outputs=[result_output, gauge_plot, probability_plot, keyword_plot]
-        )
-        # Advanced Analysis (NEW)
-        shap_btn.click(
-            app.analyze_with_shap,
-            inputs=[advanced_text_input, advanced_language],
-            outputs=[advanced_results, advanced_plot]
         )
-        lime_btn.click(
-            app.analyze_with_lime,
-            inputs=[advanced_text_input, advanced_language],
-            outputs=[advanced_results, advanced_plot]
-        )
-        # Batch Analysis
-        load_file_btn.click(
-            app.data_handler.process_file,
-            inputs=file_upload,
             outputs=batch_input
         )
-        analyze_batch_btn.click(
-            app.analyze_batch,
-            inputs=[batch_input, batch_language, batch_theme,
-                   batch_clean_cb, batch_punct_cb, batch_nums_cb],
-            outputs=[batch_summary, batch_results_df, batch_plot, confidence_dist_plot]
         )
-        # History & Analytics
-        refresh_history_btn.click(
-            app.plot_history,
-            inputs=history_theme,
-            outputs=[history_dashboard, history_status]
         )
-        clear_history_btn.click(
             lambda: f"Cleared {app.history.clear()} entries",
             outputs=history_status
         )
         status_btn.click(
-            app.get_history_status,
             outputs=history_status
         )
-        export_csv_btn.click(
             lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
-            outputs=[csv_download, history_status]
         )
-        export_json_btn.click(
             lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
-            outputs=[json_download, history_status]
         )
     return demo
 # Application Entry Point
 if __name__ == "__main__":
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    )
-    try:
-        demo = create_interface()
-        demo.launch(
-            share=True,
-            server_name="0.0.0.0",
-            server_port=7860,
-            show_error=True
-        )
-    except Exception as e:
-        logger.error(f"Failed to launch application: {e}")
-        raise

 import torch
 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import plotly.graph_objects as go
 import plotly.express as px
 from plotly.subplots import make_subplots
 from dataclasses import dataclass
 from typing import List, Dict, Optional, Tuple, Any, Callable
 from contextlib import contextmanager
 import gc
+import base64
 # Configuration
 @dataclass
     CACHE_SIZE: int = 128
     BATCH_PROCESSING_SIZE: int = 8
+    # Visualization settings
+    FIGURE_WIDTH: int = 800
+    FIGURE_HEIGHT: int = 500
+    WORDCLOUD_SIZE: Tuple[int, int] = (800, 400)
+    THEMES = {
+        'default': {'pos': '#4ecdc4', 'neg': '#ff6b6b'},
+        'ocean': {'pos': '#0077be', 'neg': '#ff6b35'},
+        'forest': {'pos': '#228b22', 'neg': '#dc143c'},
+        'sunset': {'pos': '#ff8c00', 'neg': '#8b0000'}
     }
+    # Multi-language models
     MODELS = {
+        'multilingual': {
+            'name': 'cardiffnlp/twitter-xlm-roberta-base-sentiment',
+            'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
+        },
+        'english': {
+            'name': 'cardiffnlp/twitter-roberta-base-sentiment-latest',
+            'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
+        },
+        'chinese': {
+            'name': 'uer/roberta-base-finetuned-chinanews-chinese',
+            'labels': ['NEGATIVE', 'POSITIVE']
+        },
+        'spanish': {
+            'name': 'finiteautomata/beto-sentiment-analysis',
+            'labels': ['NEGATIVE', 'NEUTRAL', 'POSITIVE']
+        },
+        'french': {
+            'name': 'tblard/tf-allocine',
+            'labels': ['NEGATIVE', 'POSITIVE']
+        }
     }
+    STOP_WORDS = {
+        'en': {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'will', 'would', 'could', 'should'},
+        'zh': {'的', '了', '在', '是', '我', '有', '和', '就', '不', '人', '都', '一', '一个', '上', '也', '很', '到', '说', '要', '去', '你', '会', '着', '没有', '看'},
+        'es': {'el', 'la', 'de', 'que', 'y', 'a', 'en', 'un', 'es', 'se', 'no', 'te', 'lo', 'le', 'da', 'su', 'por', 'son', 'con', 'para', 'al', 'del', 'los', 'las'},
+        'fr': {'le', 'la', 'les', 'de', 'un', 'une', 'du', 'des', 'et', 'à', 'ce', 'il', 'que', 'qui', 'ne', 'se', 'pas', 'tout', 'être', 'avoir', 'sur', 'avec', 'par'},
     }
 config = Config()
 logger = logging.getLogger(__name__)
 # Decorators and Context Managers
 def handle_errors(default_return=None):
     """Centralized error handling decorator"""
         return wrapper
     return decorator
 class ThemeContext:
     """Theme management context"""
     def __init__(self, theme: str = 'default'):
         self.theme = theme
         self.colors = config.THEMES.get(theme, config.THEMES['default'])
+# Enhanced Model Manager for Multi-language Support
 class ModelManager:
     """Multi-language model manager with lazy loading"""
     _instance = None
+    _models = {}
+    _tokenizers = {}
+    _pipelines = {}
+    _device = None
     def __new__(cls):
         if cls._instance is None:
             cls._instance = super().__new__(cls)
         return cls._instance
+    @property
+    def device(self):
+        if self._device is None:
+            self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        return self._device
+    def get_pipeline(self, model_key: str = 'multilingual'):
+        """Get or create sentiment analysis pipeline for specified model"""
+        if model_key not in self._pipelines:
+            try:
+                model_config = config.MODELS[model_key]
+                self._pipelines[model_key] = pipeline(
+                    "sentiment-analysis",
+                    model=model_config['name'],
+                    tokenizer=model_config['name'],
+                    device=0 if torch.cuda.is_available() else -1,
+                    top_k=None
+                )
+                logger.info(f"Model {model_key} loaded successfully")
+            except Exception as e:
+                logger.error(f"Failed to load model {model_key}: {e}")
+                # Fallback to multilingual model
+                if model_key != 'multilingual':
+                    return self.get_pipeline('multilingual')
+                raise
+        return self._pipelines[model_key]
+    def get_model_and_tokenizer(self, model_key: str = 'multilingual'):
+        """Get model and tokenizer for attention extraction"""
+        if model_key not in self._models:
+            try:
+                model_config = config.MODELS[model_key]
+                self._tokenizers[model_key] = AutoTokenizer.from_pretrained(model_config['name'])
+                self._models[model_key] = AutoModelForSequenceClassification.from_pretrained(model_config['name'])
+                self._models[model_key].to(self.device)
+                logger.info(f"Model and tokenizer {model_key} loaded for attention extraction")
+            except Exception as e:
+                logger.error(f"Failed to load model/tokenizer {model_key}: {e}")
+                if model_key != 'multilingual':
+                    return self.get_model_and_tokenizer('multilingual')
+                raise
+        return self._models[model_key], self._tokenizers[model_key]
+# Language Detection
+class LanguageDetector:
+    """Simple language detection based on character patterns"""
     @staticmethod
     def detect_language(text: str) -> str:
+        """Detect language based on character patterns"""
+        # Chinese characters
+        if re.search(r'[\u4e00-\u9fff]', text):
+            return 'chinese'
+        # Spanish patterns
+        elif re.search(r'[ñáéíóúü]', text.lower()):
+            return 'spanish'
+        # French patterns
+        elif re.search(r'[àâäçéèêëïîôùûüÿ]', text.lower()):
+            return 'french'
+        # Default to English/Multilingual
+        else:
+            return 'multilingual'
+# Simplified Core Classes
 class TextProcessor:
     """Optimized text processing with multi-language support"""
     @staticmethod
     @lru_cache(maxsize=config.CACHE_SIZE)
+    def clean_text(text: str, language: str = 'en') -> Tuple[str, ...]:
+        """Single-pass text cleaning with language-specific stop words"""
+        words = re.findall(r'\b\w{2,}\b', text.lower())
+        stop_words = config.STOP_WORDS.get(language, config.STOP_WORDS['en'])
+        return tuple(w for w in words if w not in stop_words and len(w) >= config.MIN_WORD_LENGTH)
 class HistoryManager:
+    """Simplified history management"""
     def __init__(self):
         self._history = []
     def add(self, entry: Dict):
+        self._history.append({**entry, 'timestamp': datetime.now().isoformat()})
         if len(self._history) > config.MAX_HISTORY_SIZE:
             self._history = self._history[-config.MAX_HISTORY_SIZE:]
     def get_all(self) -> List[Dict]:
         return self._history.copy()
     def clear(self) -> int:
         count = len(self._history)
         self._history.clear()
     def size(self) -> int:
         return len(self._history)
+# Core Analysis Engine with Multi-language Support
 class SentimentEngine:
+    """Multi-language sentiment analysis with attention-based keyword extraction"""
     def __init__(self):
         self.model_manager = ModelManager()
+        self.language_detector = LanguageDetector()
+    def extract_key_words(self, text: str, model_key: str = 'multilingual', top_k: int = 10) -> List[Tuple[str, float]]:
+        """Extract contributing words using attention weights"""
         try:
+            model, tokenizer = self.model_manager.get_model_and_tokenizer(model_key)
+            inputs = tokenizer(
+                text, return_tensors="pt", padding=True,
+                truncation=True, max_length=config.MAX_TEXT_LENGTH
+            ).to(self.model_manager.device)
+            # Get model outputs with attention weights
+            with torch.no_grad():
+                outputs = model(**inputs, output_attentions=True)
+                attention = outputs.attentions
+                # Use the last layer's attention, average over all heads
+                last_attention = attention[-1]
+                avg_attention = last_attention.mean(dim=1)
+                # Focus on attention to [CLS] token
+                cls_attention = avg_attention[0, 0, :]
+            # Get tokens and their attention scores
+            tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
+            attention_scores = cls_attention.cpu().numpy()
+            # Filter out special tokens and combine subword tokens
+            word_scores = {}
+            current_word = ""
+            current_score = 0.0
+            for i, (token, score) in enumerate(zip(tokens, attention_scores)):
+                if token in ['[CLS]', '[SEP]', '[PAD]', '<s>', '</s>', '<pad>']:
+                    continue
+                if token.startswith('##') or token.startswith('▁'):
+                    # Subword token
+                    current_word += token[2:] if token.startswith('##') else token[1:]
+                    current_score = max(current_score, score)
+                else:
+                    # New word, save previous if exists
+                    if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
+                        word_scores[current_word.lower()] = current_score
+                    current_word = token
+                    current_score = score
+            # Don't forget the last word
+            if current_word and len(current_word) >= config.MIN_WORD_LENGTH:
+                word_scores[current_word.lower()] = current_score
+            # Filter out stop words and sort by attention score
+            lang_code = 'zh' if model_key == 'chinese' else 'es' if model_key == 'spanish' else 'fr' if model_key == 'french' else 'en'
+            stop_words = config.STOP_WORDS.get(lang_code, config.STOP_WORDS['en'])
+            filtered_words = {
+                word: score for word, score in word_scores.items()
+                if word not in stop_words and len(word) >= config.MIN_WORD_LENGTH
             }
+            # Sort by attention score and return top_k
+            sorted_words = sorted(filtered_words.items(), key=lambda x: x[1], reverse=True)
+            return sorted_words[:top_k]
         except Exception as e:
+            logger.error(f"Key word extraction failed: {e}")
+            return []
+    @handle_errors(default_return={'sentiment': 'Unknown', 'confidence': 0.0, 'key_words': []})
+    def analyze_single(self, text: str, model_key: str = None) -> Dict:
+        """Analyze single text with automatic language detection"""
         if not text.strip():
+            raise ValueError("Empty text")
+        # Auto-detect language if not specified
+        if model_key is None:
+            detected_lang = self.language_detector.detect_language(text)
+            model_key = detected_lang if detected_lang in config.MODELS else 'multilingual'
+        # Get sentiment analysis pipeline
+        classifier = self.model_manager.get_pipeline(model_key)
+        results = classifier(text)
+        # Process results based on model output format
+        if isinstance(results[0], list):
+            results = results[0]
+        # Map results to standardized format
+        sentiment_map = {'POSITIVE': 'Positive', 'NEGATIVE': 'Negative', 'NEUTRAL': 'Neutral'}
+        # Find positive and negative scores
+        pos_score = 0.0
+        neg_score = 0.0
+        neutral_score = 0.0
+        for result in results:
+            label = result['label']
+            score = result['score']
+            if 'POSITIVE' in label:
+                pos_score = score
+            elif 'NEGATIVE' in label:
+                neg_score = score
+            elif 'NEUTRAL' in label:
+                neutral_score = score
+        # Determine final sentiment
+        if pos_score > neg_score and pos_score > neutral_score:
+            sentiment = 'Positive'
+            confidence = pos_score
+        elif neg_score > pos_score and neg_score > neutral_score:
+            sentiment = 'Negative'
+            confidence = neg_score
         else:
+            sentiment = 'Neutral'
+            confidence = neutral_score
+        # Extract key contributing words
+        key_words = self.extract_key_words(text, model_key)
+        return {
+            'sentiment': sentiment,
+            'confidence': float(confidence),
+            'pos_prob': float(pos_score),
+            'neg_prob': float(neg_score),
+            'neutral_prob': float(neutral_score),
+            'key_words': key_words,
+            'language': model_key
+        }
+    @handle_errors(default_return=[])
+    def analyze_batch(self, texts: List[str], model_key: str = None, progress_callback=None) -> List[Dict]:
+        """Optimized batch processing with key words"""
+        if len(texts) > config.BATCH_SIZE_LIMIT:
+            texts = texts[:config.BATCH_SIZE_LIMIT]
+        results = []
+        for i, text in enumerate(texts):
+            if progress_callback:
+                progress_callback((i + 1) / len(texts))
+            result = self.analyze_single(text, model_key)
+            result['text'] = text[:50] + '...' if len(text) > 50 else text
+            result['full_text'] = text
+            results.append(result)
+        return results
+# Plotly Visualization System
+class PlotFactory:
+    """Factory for creating Plotly visualizations"""
     @staticmethod
     @handle_errors(default_return=None)
+    def create_sentiment_bars(result: Dict, theme: ThemeContext) -> go.Figure:
+        """Create sentiment probability bars using Plotly"""
+        labels = []
+        values = []
+        colors = []
+        if 'neg_prob' in result and result['neg_prob'] > 0:
+            labels.append("Negative")
+            values.append(result['neg_prob'])
+            colors.append(theme.colors['neg'])
+        if 'neutral_prob' in result and result['neutral_prob'] > 0:
+            labels.append("Neutral")
+            values.append(result['neutral_prob'])
+            colors.append('#FFA500')  # Orange for neutral
+        if 'pos_prob' in result and result['pos_prob'] > 0:
+            labels.append("Positive")
+            values.append(result['pos_prob'])
+            colors.append(theme.colors['pos'])
         fig = go.Figure(data=[
+            go.Bar(
+                x=labels,
+                y=values,
+                marker_color=colors,
+                text=[f'{v:.3f}' for v in values],
+                textposition='auto',
+            )
         ])
         fig.update_layout(
             title="Sentiment Probabilities",
+            xaxis_title="Sentiment",
             yaxis_title="Probability",
+            yaxis=dict(range=[0, 1]),
+            width=config.FIGURE_WIDTH,
+            height=config.FIGURE_HEIGHT,
             showlegend=False
         )
     @staticmethod
     @handle_errors(default_return=None)
+    def create_confidence_gauge(confidence: float, sentiment: str, theme: ThemeContext) -> go.Figure:
+        """Create confidence gauge using Plotly"""
+        color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] if sentiment == 'Negative' else '#FFA500'
+        fig = go.Figure(go.Indicator(
+            mode = "gauge+number+delta",
+            value = confidence,
+            domain = {'x': [0, 1], 'y': [0, 1]},
+            title = {'text': f"{sentiment} Confidence"},
+            delta = {'reference': 0.5},
+            gauge = {
+                'axis': {'range': [None, 1]},
+                'bar': {'color': color},
+                'steps': [
+                    {'range': [0, 0.5], 'color': "lightgray"},
+                    {'range': [0.5, 1], 'color': "gray"}
+                ],
+                'threshold': {
+                    'line': {'color': "red", 'width': 4},
+                    'thickness': 0.75,
+                    'value': 0.9
+                }
+            }
+        ))
         fig.update_layout(
+            width=config.FIGURE_WIDTH,
+            height=config.FIGURE_HEIGHT
         )
         return fig
     @staticmethod
     @handle_errors(default_return=None)
+    def create_keyword_chart(key_words: List[Tuple[str, float]], sentiment: str, theme: ThemeContext) -> Optional[go.Figure]:
+        """Create horizontal bar chart for key contributing words"""
+        if not key_words:
+            return None
+        words = [word for word, score in key_words]
+        scores = [score for word, score in key_words]
+        # Choose color based on sentiment
+        color = theme.colors['pos'] if sentiment == 'Positive' else theme.colors['neg'] if sentiment == 'Negative' else '#FFA500'
+        fig = go.Figure(go.Bar(
+            x=scores,
+            y=words,
+            orientation='h',
+            marker_color=color,
+            text=[f'{score:.3f}' for score in scores],
+            textposition='auto',
+        ))
         fig.update_layout(
+            title=f'Top Contributing Words ({sentiment})',
+            xaxis_title='Attention Weight',
+            yaxis_title='Words',
+            width=config.FIGURE_WIDTH,
+            height=config.FIGURE_HEIGHT,
+            yaxis={'categoryorder': 'total ascending'}
         )
         return fig
     @staticmethod
     @handle_errors(default_return=None)
+    def create_wordcloud_plot(text: str, sentiment: str, theme: ThemeContext) -> Optional[go.Figure]:
+        """Create word cloud visualization"""
+        if len(text.split()) < 3:
+            return None
+        try:
+            colormap = 'Greens' if sentiment == 'Positive' else 'Reds' if sentiment == 'Negative' else 'Blues'
+            wc = WordCloud(
+                width=config.WORDCLOUD_SIZE[0],
+                height=config.WORDCLOUD_SIZE[1],
+                background_color='white',
+                colormap=colormap,
+                max_words=30
+            ).generate(text)
+            # Convert to image
+            img_array = wc.to_array()
+            fig = go.Figure()
+            fig.add_trace(go.Image(z=img_array))
+            fig.update_layout(
+                title=f'{sentiment} Word Cloud',
+                xaxis={'visible': False},
+                yaxis={'visible': False},
+                width=config.FIGURE_WIDTH,
+                height=config.FIGURE_HEIGHT,
+                margin=dict(l=0, r=0, t=30, b=0)
+            )
+            return fig
+        except Exception as e:
+            logger.error(f"Word cloud generation failed: {e}")
+            return None
     @staticmethod
     @handle_errors(default_return=None)
+    def create_batch_analysis(results: List[Dict], theme: ThemeContext) -> go.Figure:
+        """Create comprehensive batch visualization using Plotly subplots"""
         fig = make_subplots(
             rows=2, cols=2,
+            subplot_titles=['Sentiment Distribution', 'Confidence Distribution',
+                           'Sentiment Progression', 'Language Distribution'],
+            specs=[[{"type": "pie"}, {"type": "histogram"}],
+                   [{"type": "scatter", "colspan": 2}, None]]
         )
+        # Sentiment distribution (pie chart)
+        sent_counts = Counter([r['sentiment'] for r in results])
+        colors_pie = [theme.colors['pos'] if s == 'Positive' else theme.colors['neg'] if s == 'Negative' else '#FFA500' for s in sent_counts.keys()]
         fig.add_trace(
+            go.Pie(labels=list(sent_counts.keys()), values=list(sent_counts.values()),
+                   marker_colors=colors_pie, name="Sentiment"),
             row=1, col=1
         )
+        # Confidence histogram
+        confs = [r['confidence'] for r in results]
         fig.add_trace(
+            go.Histogram(x=confs, nbinsx=8, marker_color='skyblue', name="Confidence"),
             row=1, col=2
         )
+        # Sentiment progression
+        pos_probs = [r.get('pos_prob', 0) for r in results]
+        indices = list(range(len(results)))
+        colors_scatter = [theme.colors['pos'] if r['sentiment'] == 'Positive'
+                         else theme.colors['neg'] if r['sentiment'] == 'Negative'
+                         else '#FFA500' for r in results]
         fig.add_trace(
+            go.Scatter(x=indices, y=pos_probs, mode='markers',
+                      marker=dict(color=colors_scatter, size=8),
+                      name="Sentiment Progression"),
             row=2, col=1
         )
+        # Add horizontal line at 0.5
+        fig.add_hline(y=0.5, line_dash="dash", line_color="gray", row=2, col=1)
+        fig.update_layout(
+            height=800,
+            width=1000,
+            showlegend=False,
+            title_text="Batch Analysis Results"
         )
         return fig
+# Unified Data Handler (unchanged)
 class DataHandler:
+    """Handles all data operations"""
     @staticmethod
     @handle_errors(default_return=(None, "Export failed"))
     def export_data(data: List[Dict], format_type: str) -> Tuple[Optional[str], str]:
+        """Universal data export"""
         if not data:
             return None, "No data to export"
         if format_type == 'csv':
             writer = csv.writer(temp_file)
+            writer.writerow(['Timestamp', 'Text', 'Sentiment', 'Confidence', 'Pos_Prob', 'Neg_Prob', 'Neutral_Prob', 'Language', 'Key_Words'])
             for entry in data:
                 writer.writerow([
                     entry.get('timestamp', ''),
                     entry.get('text', ''),
                     entry.get('sentiment', ''),
                     f"{entry.get('confidence', 0):.4f}",
                     f"{entry.get('pos_prob', 0):.4f}",
                     f"{entry.get('neg_prob', 0):.4f}",
+                    f"{entry.get('neutral_prob', 0):.4f}",
+                    entry.get('language', ''),
+                    "|".join([f"{word}:{score:.3f}" for word, score in entry.get('key_words', [])])
                 ])
         elif format_type == 'json':
             json.dump(data, temp_file, indent=2, ensure_ascii=False)
     @staticmethod
     @handle_errors(default_return="")
     def process_file(file) -> str:
+        """Process uploaded file"""
         if not file:
             return ""
         content = file.read().decode('utf-8')
         if file.name.endswith('.csv'):
+            import io
             csv_file = io.StringIO(content)
             reader = csv.reader(csv_file)
             try:
+                next(reader)
                 texts = []
                 for row in reader:
                     if row and row[0].strip():
                         text = row[0].strip().strip('"')
+                        if text:
                             texts.append(text)
                 return '\n'.join(texts)
+            except Exception as e:
+                lines = content.strip().split('\n')[1:]
                 texts = []
                 for line in lines:
                     if line.strip():
                         if text:
                             texts.append(text)
                 return '\n'.join(texts)
         return content
+# Main Application with Multi-language Support
 class SentimentApp:
+    """Main application orchestrator with multi-language support"""
     def __init__(self):
         self.engine = SentimentEngine()
         self.history = HistoryManager()
         self.data_handler = DataHandler()
         # Multi-language examples
         self.examples = [
+            ["While the film's visual effects were undeniably impressive, the story lacked emotional weight, and the pacing felt inconsistent throughout."],
+            ["这部电影的视觉效果令人印象深刻，但故事缺乏情感深度，节奏感也不够连贯。"],
+            ["Aunque los efectos visuales de la película fueron innegablemente impresionantes, la historia carecía de peso emocional."],
+            ["Bien que les effets visuels du film soient indéniablement impressionnants, l'histoire manquait de poids émotionnel."],
+            ["An extraordinary achievement in filmmaking — the direction was masterful, the script was sharp, and every performance added depth and realism."]
         ]
+    @handle_errors(default_return=("Please enter text", None, None, None, None))
+    def analyze_single(self, text: str, model_key: str = 'multilingual', theme: str = 'default'):
+        """Single text analysis with multi-language support"""
         if not text.strip():
+            return "Please enter text", None, None, None, None
+        result = self.engine.analyze_single(text, model_key)
+        # Add to history
+        self.history.add({
+            'text': text[:100],
+            'full_text': text,
+            **result
+        })
+        # Create visualizations
+        theme_ctx = ThemeContext(theme)
+        prob_plot = PlotFactory.create_sentiment_bars(result, theme_ctx)
+        gauge_plot = PlotFactory.create_confidence_gauge(result['confidence'], result['sentiment'], theme_ctx)
+        cloud_plot = PlotFactory.create_wordcloud_plot(text, result['sentiment'], theme_ctx)
+        keyword_plot = PlotFactory.create_keyword_chart(result['key_words'], result['sentiment'], theme_ctx)
+        # Format result text with key words
+        key_words_str = ", ".join([f"{word}({score:.3f})" for word, score in result['key_words'][:5]])
+        result_text = (f"Sentiment: {result['sentiment']} (Confidence: {result['confidence']:.3f})\n"
+                      f"Language: {result['language']}\n"
+                      f"Key Words: {key_words_str}")
+        return result_text, prob_plot, gauge_plot, cloud_plot, keyword_plot
+    @handle_errors(default_return=None)
+    def analyze_batch(self, reviews: str, model_key: str = 'multilingual', progress=None):
+        """Batch analysis with multi-language support"""
+        if not reviews.strip():
+            return None
+        texts = [r.strip() for r in reviews.split('\n') if r.strip()]
+        if len(texts) < 2:
+            return None
+        results = self.engine.analyze_batch(texts, model_key, progress)
+        # Add to history
+        for result in results:
+            self.history.add(result)
+        # Create visualization
+        theme_ctx = ThemeContext('default')
+        return PlotFactory.create_batch_analysis(results, theme_ctx)
     @handle_errors(default_return=(None, "No history available"))
     def plot_history(self, theme: str = 'default'):
+        """Plot analysis history using Plotly"""
         history = self.history.get_all()
         if len(history) < 2:
             return None, f"Need at least 2 analyses for trends. Current: {len(history)}"
         theme_ctx = ThemeContext(theme)
+        # Create subplots
+        fig = make_subplots(
+            rows=2, cols=1,
+            subplot_titles=['Sentiment History', 'Confidence Over Time'],
+            vertical_spacing=0.12
+        )
+        indices = list(range(len(history)))
+        pos_probs = [item.get('pos_prob', 0) for item in history]
+        confs = [item['confidence'] for item in history]
+        # Sentiment trend
+        colors = [theme_ctx.colors['pos'] if p > 0.5 else theme_ctx.colors['neg'] for p in pos_probs]
+        fig.add_trace(
+            go.Scatter(
+                x=indices,
+                y=pos_probs,
+                mode='markers+lines',
+                marker=dict(color=colors, size=8),
+                line=dict(color='gray', width=2),
+                name='Sentiment Trend'
+            ),
+            row=1, col=1
+        )
+        # Add horizontal line at 0.5
+        fig.add_hline(y=0.5, line_dash="dash", line_color="gray", row=1, col=1)
+        # Confidence trend
+        fig.add_trace(
+            go.Bar(
+                x=indices,
+                y=confs,
+                marker_color='lightblue',
+                marker_line_color='navy',
+                marker_line_width=1,
+                name='Confidence'
+            ),
+            row=2, col=1
+        )
+        fig.update_layout(
+            height=800,
+            width=1000,
+            showlegend=False,
+            title_text="Analysis History"
+        )
+        fig.update_xaxes(title_text="Analysis Number", row=2, col=1)
+        fig.update_yaxes(title_text="Positive Probability", row=1, col=1)
+        fig.update_yaxes(title_text="Confidence", row=2, col=1)
+        return fig, f"History: {len(history)} analyses"
+# Gradio Interface Setup with Multi-language Support
 def create_interface():
+    """Create streamlined Gradio interface with multi-language support"""
     app = SentimentApp()
+    with gr.Blocks(theme=gr.themes.Soft(), title="Multi-language Sentiment Analyzer") as demo:
+        gr.Markdown("# 🌍 AI Multi-language Sentiment Analyzer")
+        gr.Markdown("Advanced sentiment analysis supporting multiple languages with Plotly visualizations and key word extraction")
         with gr.Tab("Single Analysis"):
             with gr.Row():
                 with gr.Column():
                     text_input = gr.Textbox(
+                        label="Review Text (Multiple Languages Supported)",
+                        placeholder="Enter your review in any supported language...",
                         lines=5
                     )
                     with gr.Row():
+                        analyze_btn = gr.Button("Analyze", variant="primary")
+                        model_selector = gr.Dropdown(
+                            choices=[
+                                ('Auto-detect', 'multilingual'),
+                                ('Multilingual', 'multilingual'),
+                                ('English', 'english'),
+                                ('Chinese 中文', 'chinese'),
+                                ('Spanish Español', 'spanish'),
+                                ('French Français', 'french')
+                            ],
+                            value="multilingual",
+                            label="Language Model"
                         )
                         theme_selector = gr.Dropdown(
                             choices=list(config.THEMES.keys()),
                             label="Theme"
                         )
                     gr.Examples(
                         examples=app.examples,
                         inputs=text_input,
+                        label="Multi-language Examples"
                     )
                 with gr.Column():
+                    result_output = gr.Textbox(label="Analysis Result", lines=4)
             with gr.Row():
+                prob_plot = gr.Plot(label="Sentiment Probabilities")
+                gauge_plot = gr.Plot(label="Confidence Gauge")
             with gr.Row():
+                wordcloud_plot = gr.Plot(label="Word Cloud")
+                keyword_plot = gr.Plot(label="Key Contributing Words")
         with gr.Tab("Batch Analysis"):
             with gr.Row():
                 with gr.Column():
+                    file_upload = gr.File(label="Upload File", file_types=[".csv", ".txt"])
                     batch_input = gr.Textbox(
+                        label="Reviews (one per line, mixed languages supported)",
+                        lines=8,
+                        placeholder="Enter multiple reviews, one per line...\nSupports mixed languages in the same batch!"
                     )
                 with gr.Column():
+                    load_btn = gr.Button("Load File")
+                    with gr.Row():
+                        batch_btn = gr.Button("Analyze Batch", variant="primary")
+                        batch_model_selector = gr.Dropdown(
+                            choices=[
+                                ('Auto-detect', 'multilingual'),
+                                ('Multilingual', 'multilingual'),
+                                ('English', 'english'),
+                                ('Chinese 中文', 'chinese'),
+                                ('Spanish Español', 'spanish'),
+                                ('French Français', 'french')
+                            ],
+                            value="multilingual",
+                            label="Batch Model"
+                        )
+            batch_plot = gr.Plot(label="Batch Analysis Results")
+        with gr.Tab("History & Export"):
             with gr.Row():
+                refresh_btn = gr.Button("Refresh History")
+                clear_btn = gr.Button("Clear History", variant="stop")
+                status_btn = gr.Button("Show Status")
             with gr.Row():
+                csv_btn = gr.Button("Export CSV")
+                json_btn = gr.Button("Export JSON")
+            history_status = gr.Textbox(label="Status Information")
+            history_plot = gr.Plot(label="History Trends")
+            csv_file = gr.File(label="CSV Download", visible=True)
+            json_file = gr.File(label="JSON Download", visible=True)
+        with gr.Tab("Model Information"):
+            gr.Markdown("""
+            ## Supported Languages and Models
+            | Language | Model | Description |
+            |----------|-------|-------------|
+            | **Multilingual** | XLM-RoBERTa | Supports 100+ languages automatically |
+            | **English** | RoBERTa-base | Optimized for English text |
+            | **Chinese 中文** | RoBERTa-Chinese | Specialized for Chinese language |
+            | **Spanish Español** | BETO | Fine-tuned for Spanish sentiment |
+            | **French Français** | tf-allocine | Trained on French movie reviews |
+            ### Features:
+            - **Automatic Language Detection**: The system can automatically detect the input language
+            - **Attention-based Keywords**: Extract words that contribute most to sentiment prediction
+            - **Interactive Visualizations**: Plotly-powered charts and graphs
+            - **Batch Processing**: Analyze multiple texts at once
+            - **Export Capabilities**: Save results in CSV or JSON format
+            - **Multi-language Support**: Mix different languages in batch analysis
+            """)
+        # Event bindings
         analyze_btn.click(
             app.analyze_single,
+            inputs=[text_input, model_selector, theme_selector],
+            outputs=[result_output, prob_plot, gauge_plot, wordcloud_plot, keyword_plot]
         )
+        load_btn.click(
+            app.data_handler.process_file,
+            inputs=file_upload,
             outputs=batch_input
         )
+        batch_btn.click(
+            app.analyze_batch,
+            inputs=[batch_input, batch_model_selector],
+            outputs=batch_plot
         )
+        refresh_btn.click(
+            lambda theme: app.plot_history(theme),
+            inputs=theme_selector,
+            outputs=[history_plot, history_status]
         )
+        clear_btn.click(
             lambda: f"Cleared {app.history.clear()} entries",
             outputs=history_status
         )
         status_btn.click(
+            lambda: f"History: {app.history.size()} entries | Available Models: {', '.join(config.MODELS.keys())}",
             outputs=history_status
         )
+        csv_btn.click(
             lambda: app.data_handler.export_data(app.history.get_all(), 'csv'),
+            outputs=[csv_file, history_status]
         )
+        json_btn.click(
             lambda: app.data_handler.export_data(app.history.get_all(), 'json'),
+            outputs=[json_file, history_status]
         )
     return demo
 # Application Entry Point
 if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    demo = create_interface()
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )