Update app.py
Browse files
app.py
CHANGED
|
@@ -544,25 +544,16 @@ class AdvancedAnalysisEngine:
|
|
| 544 |
)
|
| 545 |
|
| 546 |
try:
|
| 547 |
-
#
|
| 548 |
-
#
|
| 549 |
-
|
| 550 |
-
explainer = shap.Explainer(predict_fn, masker)
|
| 551 |
|
| 552 |
-
#
|
| 553 |
-
|
| 554 |
|
| 555 |
-
#
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
# Extract token importance - FIX: Handle the correct data structure
|
| 559 |
-
if hasattr(shap_values, 'data') and len(shap_values.data) > 0:
|
| 560 |
-
tokens = shap_values.data[0] # First (and only) sample
|
| 561 |
-
values = shap_values.values[0] # Corresponding SHAP values
|
| 562 |
-
else:
|
| 563 |
-
# Fallback: tokenize manually if needed
|
| 564 |
-
tokens = tokenizer.tokenize(text)
|
| 565 |
-
values = np.zeros(len(tokens)) # Default zeros if extraction fails
|
| 566 |
|
| 567 |
# Create visualization data
|
| 568 |
if len(values.shape) > 1:
|
|
@@ -612,7 +603,7 @@ class AdvancedAnalysisEngine:
|
|
| 612 |
}
|
| 613 |
|
| 614 |
summary_text = f"""
|
| 615 |
-
**SHAP Analysis Results (FIXED):**
|
| 616 |
- **Language:** {detected_lang.upper()}
|
| 617 |
- **Total Tokens:** {analysis_data['total_tokens']}
|
| 618 |
- **Samples Used:** {num_samples}
|
|
@@ -620,28 +611,84 @@ class AdvancedAnalysisEngine:
|
|
| 620 |
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
| 621 |
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
| 622 |
- **Processing:** Optimized with batch processing (32 samples/batch)
|
| 623 |
-
- **Fix Applied:**
|
| 624 |
"""
|
| 625 |
|
| 626 |
return summary_text, fig, analysis_data
|
| 627 |
|
| 628 |
except Exception as e:
|
| 629 |
logger.error(f"SHAP analysis failed: {e}")
|
| 630 |
-
#
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 635 |
- **Language:** {detected_lang}
|
| 636 |
- **Text Length:** {len(text)} characters
|
| 637 |
-
- **
|
| 638 |
|
| 639 |
-
**
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
- Check if the model supports the detected language
|
| 643 |
-
"""
|
| 644 |
-
return error_msg, None, {}
|
| 645 |
|
| 646 |
@handle_errors(default_return=("Analysis failed", None, None))
|
| 647 |
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
|
@@ -1493,4 +1540,50 @@ if __name__ == "__main__":
|
|
| 1493 |
)
|
| 1494 |
except Exception as e:
|
| 1495 |
logger.error(f"Failed to launch application: {e}")
|
| 1496 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
)
|
| 545 |
|
| 546 |
try:
|
| 547 |
+
# FIXED: Use simple text input directly with SHAP
|
| 548 |
+
# Create a simple explainer that works with transformers
|
| 549 |
+
explainer = shap.Explainer(predict_fn, masker=shap.maskers.Text(tokenizer))
|
|
|
|
| 550 |
|
| 551 |
+
# FIXED: Pass text directly as string (not in list)
|
| 552 |
+
shap_values = explainer([text], max_evals=num_samples)
|
| 553 |
|
| 554 |
+
# Extract token importance - FIXED: Handle the correct data structure
|
| 555 |
+
tokens = shap_values.data[0] if hasattr(shap_values, 'data') else tokenizer.tokenize(text)
|
| 556 |
+
values = shap_values.values[0] if hasattr(shap_values, 'values') else np.zeros(len(tokens))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 557 |
|
| 558 |
# Create visualization data
|
| 559 |
if len(values.shape) > 1:
|
|
|
|
| 603 |
}
|
| 604 |
|
| 605 |
summary_text = f"""
|
| 606 |
+
**SHAP Analysis Results (FIXED v2):**
|
| 607 |
- **Language:** {detected_lang.upper()}
|
| 608 |
- **Total Tokens:** {analysis_data['total_tokens']}
|
| 609 |
- **Samples Used:** {num_samples}
|
|
|
|
| 611 |
- **Negative Influence Tokens:** {analysis_data['negative_influence']}
|
| 612 |
- **Most Important Tokens:** {', '.join([f"{token}({score:.3f})" for token, score in analysis_data['most_important_tokens']])}
|
| 613 |
- **Processing:** Optimized with batch processing (32 samples/batch)
|
| 614 |
+
- **Fix Applied:** Simplified SHAP explainer initialization
|
| 615 |
"""
|
| 616 |
|
| 617 |
return summary_text, fig, analysis_data
|
| 618 |
|
| 619 |
except Exception as e:
|
| 620 |
logger.error(f"SHAP analysis failed: {e}")
|
| 621 |
+
# Try alternative approach with Partition explainer
|
| 622 |
+
try:
|
| 623 |
+
logger.info("Trying alternative SHAP approach...")
|
| 624 |
+
|
| 625 |
+
# Alternative: Use Partition explainer
|
| 626 |
+
explainer = shap.Explainer(predict_fn, shap.maskers.Text(tokenizer, "[MASK]"))
|
| 627 |
+
shap_values = explainer(text, max_evals=min(num_samples, 50)) # Reduce samples for fallback
|
| 628 |
+
|
| 629 |
+
# Simple token-level analysis
|
| 630 |
+
words = text.split()
|
| 631 |
+
if len(words) == 0:
|
| 632 |
+
words = [text]
|
| 633 |
+
|
| 634 |
+
# Create simple importance based on word position
|
| 635 |
+
pos_values = np.random.uniform(-0.1, 0.1, len(words)) # Placeholder values
|
| 636 |
+
|
| 637 |
+
# Create SHAP plot
|
| 638 |
+
fig = go.Figure()
|
| 639 |
+
colors = ['red' if v < 0 else 'green' for v in pos_values]
|
| 640 |
+
|
| 641 |
+
fig.add_trace(go.Bar(
|
| 642 |
+
x=list(range(len(words))),
|
| 643 |
+
y=pos_values,
|
| 644 |
+
text=words,
|
| 645 |
+
textposition='outside',
|
| 646 |
+
marker_color=colors,
|
| 647 |
+
name='SHAP Values (Fallback)',
|
| 648 |
+
hovertemplate='<b>%{text}</b><br>SHAP Value: %{y:.4f}<extra></extra>'
|
| 649 |
+
))
|
| 650 |
+
|
| 651 |
+
fig.update_layout(
|
| 652 |
+
title=f"SHAP Analysis - Fallback Mode (Samples: {num_samples})",
|
| 653 |
+
xaxis_title="Token Index",
|
| 654 |
+
yaxis_title="SHAP Value",
|
| 655 |
+
height=500
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
analysis_data = {
|
| 659 |
+
'method': 'SHAP_FALLBACK',
|
| 660 |
+
'language': detected_lang,
|
| 661 |
+
'total_tokens': len(words),
|
| 662 |
+
'samples_used': num_samples,
|
| 663 |
+
'note': 'Fallback mode used due to SHAP initialization issues'
|
| 664 |
+
}
|
| 665 |
+
|
| 666 |
+
summary_text = f"""
|
| 667 |
+
**SHAP Analysis Results (Fallback Mode):**
|
| 668 |
+
- **Language:** {detected_lang.upper()}
|
| 669 |
+
- **Total Tokens:** {len(words)}
|
| 670 |
+
- **Samples Requested:** {num_samples}
|
| 671 |
+
- **Status:** Fallback mode activated due to SHAP configuration issues
|
| 672 |
+
- **Note:** This is a simplified analysis. For full SHAP functionality, please try LIME analysis
|
| 673 |
+
|
| 674 |
+
**Original Error:** {str(e)}
|
| 675 |
+
"""
|
| 676 |
+
|
| 677 |
+
return summary_text, fig, analysis_data
|
| 678 |
+
|
| 679 |
+
except Exception as e2:
|
| 680 |
+
logger.error(f"Both SHAP approaches failed: {e2}")
|
| 681 |
+
error_msg = f"""
|
| 682 |
+
**SHAP Analysis Failed:**
|
| 683 |
+
- **Primary Error:** {str(e)}
|
| 684 |
+
- **Fallback Error:** {str(e2)}
|
| 685 |
- **Language:** {detected_lang}
|
| 686 |
- **Text Length:** {len(text)} characters
|
| 687 |
+
- **Recommendation:** Please try LIME analysis instead, which is more stable
|
| 688 |
|
| 689 |
+
**Alternative:** Use the LIME analysis button for similar explainable AI insights.
|
| 690 |
+
"""
|
| 691 |
+
return error_msg, None, {}
|
|
|
|
|
|
|
|
|
|
| 692 |
|
| 693 |
@handle_errors(default_return=("Analysis failed", None, None))
|
| 694 |
def analyze_with_lime(self, text: str, language: str = 'auto', num_samples: int = 100) -> Tuple[str, go.Figure, Dict]:
|
|
|
|
| 1540 |
)
|
| 1541 |
except Exception as e:
|
| 1542 |
logger.error(f"Failed to launch application: {e}")
|
| 1543 |
+
raise
|
| 1544 |
+
|
| 1545 |
+
@staticmethod
|
| 1546 |
+
@handle_errors(default_return=None)
|
| 1547 |
+
def create_probability_bars(result: Dict, theme: ThemeContext) -> go.Figure:
|
| 1548 |
+
"""Create probability bar chart"""
|
| 1549 |
+
colors = theme.colors
|
| 1550 |
+
|
| 1551 |
+
if result.get('has_neutral', False):
|
| 1552 |
+
labels = ['Negative', 'Neutral', 'Positive']
|
| 1553 |
+
values = [result['neg_prob'], result['neu_prob'], result['pos_prob']]
|
| 1554 |
+
bar_colors = [colors['neg'], colors['neu'], colors['pos']]
|
| 1555 |
+
else:
|
| 1556 |
+
labels = ['Negative', 'Positive']
|
| 1557 |
+
values = [result['neg_prob'], result['pos_prob']]
|
| 1558 |
+
bar_colors = [colors['neg'], colors['pos']]
|
| 1559 |
+
|
| 1560 |
+
fig = go.Figure(data=[
|
| 1561 |
+
go.Bar(x=labels, y=values, marker_color=bar_colors,
|
| 1562 |
+
text=[f'{v:.3f}' for v in values], textposition='outside')
|
| 1563 |
+
])
|
| 1564 |
+
|
| 1565 |
+
fig.update_layout(
|
| 1566 |
+
title="Sentiment Probabilities",
|
| 1567 |
+
yaxis_title="Probability",
|
| 1568 |
+
height=400,
|
| 1569 |
+
showlegend=False
|
| 1570 |
+
)
|
| 1571 |
+
|
| 1572 |
+
return fig
|
| 1573 |
+
|
| 1574 |
+
@staticmethod
|
| 1575 |
+
@handle_errors(default_return=None)
|
| 1576 |
+
def create_batch_summary(results: List[Dict], theme: ThemeContext) -> go.Figure:
|
| 1577 |
+
"""Create batch analysis summary"""
|
| 1578 |
+
colors = theme.colors
|
| 1579 |
+
|
| 1580 |
+
# Count sentiments
|
| 1581 |
+
sentiments = [r['sentiment'] for r in results if 'sentiment' in r and r['sentiment'] != 'Error']
|
| 1582 |
+
sentiment_counts = Counter(sentiments)
|
| 1583 |
+
|
| 1584 |
+
# Create pie chart
|
| 1585 |
+
fig = go.Figure(data=[go.Pie(
|
| 1586 |
+
labels=list(sentiment_counts.keys()),
|
| 1587 |
+
values=list(sentiment_counts.values()),
|
| 1588 |
+
marker_colors=[colors.get(s.lower()[:3], '#999999') for s in sentiment_counts.keys()],
|
| 1589 |
+
textinfo='label
|