Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +33 -27
src/streamlit_app.py
CHANGED
|
@@ -78,12 +78,7 @@ def highlight_entities(text, df_entities):
|
|
| 78 |
entity_text = entity['text']
|
| 79 |
color = entity_color_map.get(label, '#000000')
|
| 80 |
# Create a span with background color and tooltip
|
| 81 |
-
|
| 82 |
-
# Force text color to BLACK and add a clear, contrasting border
|
| 83 |
-
# In the 'highlight_entities' function:
|
| 84 |
-
highlight_html = f'<span style="background-color: {color}; color: #FF0000; padding: 2px 4px; border-radius: 3px; cursor: help; border: 1px solid #FFFFFF;" title="{label}">{entity_text}</span>'
|
| 85 |
-
|
| 86 |
-
|
| 87 |
# Replace the original text segment with the highlighted HTML
|
| 88 |
highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
|
| 89 |
# Use a div to mimic the Streamlit input box style for the report
|
|
@@ -124,23 +119,29 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
|
|
| 124 |
except Exception as e:
|
| 125 |
st.error(f"Topic modeling failed: {e}")
|
| 126 |
return None
|
|
|
|
| 127 |
def create_topic_word_bubbles(df_topic_data):
|
| 128 |
-
"""Generates a Plotly Bubble Chart for top words across
|
|
|
|
| 129 |
# Renaming columns to match the output of perform_topic_modeling
|
| 130 |
-
df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic',
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
df_topic_data['Exaggerated_Size'] = df_topic_data['weight'] * 100
|
| 134 |
-
|
| 135 |
if df_topic_data.empty:
|
| 136 |
return None
|
|
|
|
| 137 |
fig = px.scatter(
|
| 138 |
df_topic_data,
|
| 139 |
x='x_pos',
|
| 140 |
y='weight',
|
| 141 |
-
size='
|
| 142 |
color='topic',
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
size_max=80,
|
| 145 |
title='Topic Word Weights (Bubble Chart)',
|
| 146 |
color_discrete_sequence=px.colors.qualitative.Bold,
|
|
@@ -151,20 +152,32 @@ def create_topic_word_bubbles(df_topic_data):
|
|
| 151 |
},
|
| 152 |
custom_data=['word', 'weight', 'topic']
|
| 153 |
)
|
|
|
|
| 154 |
fig.update_layout(
|
| 155 |
xaxis_title="Entity/Word (Bubble size = Word Weight)",
|
| 156 |
yaxis_title="Word Weight",
|
| 157 |
-
xaxis={'tickangle': -45, 'showgrid': False, 'showticklabels': False,
|
| 158 |
yaxis={'showgrid': True},
|
| 159 |
showlegend=True,
|
| 160 |
-
plot_bgcolor='#f9f9f9',
|
| 161 |
-
paper_bgcolor='#f9f9f9',
|
| 162 |
-
height=
|
| 163 |
-
autosize=False,
|
| 164 |
margin=dict(t=50, b=100, l=50, r=10),
|
| 165 |
)
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
return fig
|
|
|
|
|
|
|
|
|
|
| 168 |
def generate_network_graph(df, raw_text):
|
| 169 |
"""
|
| 170 |
Generates a network graph visualization (Node Plot) with edges
|
|
@@ -390,7 +403,6 @@ st.markdown(
|
|
| 390 |
"""
|
| 391 |
<style>
|
| 392 |
/* ... (Keep your existing styles for main, stApp, stTextArea, stButton) ... */
|
| 393 |
-
|
| 394 |
/* --- FIX: Tab Label Colors for Visibility --- */
|
| 395 |
/* Target the container for the tab labels (the buttons) */
|
| 396 |
[data-testid="stConfigurableTabs"] button {
|
|
@@ -398,7 +410,6 @@ st.markdown(
|
|
| 398 |
background-color: #f0f0f0; /* Light gray background for inactive tabs */
|
| 399 |
border: 1px solid #cccccc;
|
| 400 |
}
|
| 401 |
-
|
| 402 |
/* Target the ACTIVE tab label */
|
| 403 |
[data-testid="stConfigurableTabs"] button[aria-selected="true"] {
|
| 404 |
color: #FFFFFF !important; /* White text for active tab */
|
|
@@ -410,7 +421,6 @@ st.markdown(
|
|
| 410 |
.streamlit-expanderHeader {
|
| 411 |
color: #007bff; /* Blue text for Expander header */
|
| 412 |
}
|
| 413 |
-
|
| 414 |
</style>
|
| 415 |
""",
|
| 416 |
unsafe_allow_html=True
|
|
@@ -449,9 +459,7 @@ with tab2:
|
|
| 449 |
**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
|
| 450 |
|
| 451 |
**Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
|
| 452 |
-
|
| 453 |
**How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
|
| 454 |
-
|
| 455 |
**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
|
| 456 |
""")
|
| 457 |
|
|
@@ -689,5 +697,3 @@ if st.session_state.show_results:
|
|
| 689 |
mime="text/csv",
|
| 690 |
type="secondary"
|
| 691 |
)
|
| 692 |
-
|
| 693 |
-
|
|
|
|
| 78 |
entity_text = entity['text']
|
| 79 |
color = entity_color_map.get(label, '#000000')
|
| 80 |
# Create a span with background color and tooltip
|
| 81 |
+
highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Replace the original text segment with the highlighted HTML
|
| 83 |
highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
|
| 84 |
# Use a div to mimic the Streamlit input box style for the report
|
|
|
|
| 119 |
except Exception as e:
|
| 120 |
st.error(f"Topic modeling failed: {e}")
|
| 121 |
return None
|
| 122 |
+
|
| 123 |
def create_topic_word_bubbles(df_topic_data):
|
| 124 |
+
"""Generates a Plotly Bubble Chart for top words across
|
| 125 |
+
all topics, displaying the word directly on the bubble."""
|
| 126 |
# Renaming columns to match the output of perform_topic_modeling
|
| 127 |
+
df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic',
|
| 128 |
+
'Word': 'word', 'Weight': 'weight'})
|
| 129 |
+
df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position
|
|
|
|
|
|
|
| 130 |
if df_topic_data.empty:
|
| 131 |
return None
|
| 132 |
+
|
| 133 |
fig = px.scatter(
|
| 134 |
df_topic_data,
|
| 135 |
x='x_pos',
|
| 136 |
y='weight',
|
| 137 |
+
size='weight',
|
| 138 |
color='topic',
|
| 139 |
+
# --- MODIFICATION START ---
|
| 140 |
+
# 1. Use 'word' for the text label on the chart
|
| 141 |
+
text='word',
|
| 142 |
+
# 2. Keep hover_name for tooltip on hover (optional, but good practice)
|
| 143 |
+
hover_name='word',
|
| 144 |
+
# --- MODIFICATION END ---
|
| 145 |
size_max=80,
|
| 146 |
title='Topic Word Weights (Bubble Chart)',
|
| 147 |
color_discrete_sequence=px.colors.qualitative.Bold,
|
|
|
|
| 152 |
},
|
| 153 |
custom_data=['word', 'weight', 'topic']
|
| 154 |
)
|
| 155 |
+
|
| 156 |
fig.update_layout(
|
| 157 |
xaxis_title="Entity/Word (Bubble size = Word Weight)",
|
| 158 |
yaxis_title="Word Weight",
|
| 159 |
+
xaxis={'tickangle': -45, 'showgrid': False, 'showticklabels': False}, # Hide x-axis labels since words are now labels
|
| 160 |
yaxis={'showgrid': True},
|
| 161 |
showlegend=True,
|
| 162 |
+
plot_bgcolor='#f9f9f9',
|
| 163 |
+
paper_bgcolor='#f9f9f9',
|
| 164 |
+
height=600,
|
|
|
|
| 165 |
margin=dict(t=50, b=100, l=50, r=10),
|
| 166 |
)
|
| 167 |
+
|
| 168 |
+
# Update traces to show the word text and adjust hover template
|
| 169 |
+
fig.update_traces(
|
| 170 |
+
# Position the text on top of the bubble
|
| 171 |
+
textposition='middle center',
|
| 172 |
+
# Customize hover template for better readability
|
| 173 |
+
hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',
|
| 174 |
+
marker=dict(line=dict(width=1, color='DarkSlateGrey'))
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
return fig
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
|
| 181 |
def generate_network_graph(df, raw_text):
|
| 182 |
"""
|
| 183 |
Generates a network graph visualization (Node Plot) with edges
|
|
|
|
| 403 |
"""
|
| 404 |
<style>
|
| 405 |
/* ... (Keep your existing styles for main, stApp, stTextArea, stButton) ... */
|
|
|
|
| 406 |
/* --- FIX: Tab Label Colors for Visibility --- */
|
| 407 |
/* Target the container for the tab labels (the buttons) */
|
| 408 |
[data-testid="stConfigurableTabs"] button {
|
|
|
|
| 410 |
background-color: #f0f0f0; /* Light gray background for inactive tabs */
|
| 411 |
border: 1px solid #cccccc;
|
| 412 |
}
|
|
|
|
| 413 |
/* Target the ACTIVE tab label */
|
| 414 |
[data-testid="stConfigurableTabs"] button[aria-selected="true"] {
|
| 415 |
color: #FFFFFF !important; /* White text for active tab */
|
|
|
|
| 421 |
.streamlit-expanderHeader {
|
| 422 |
color: #007bff; /* Blue text for Expander header */
|
| 423 |
}
|
|
|
|
| 424 |
</style>
|
| 425 |
""",
|
| 426 |
unsafe_allow_html=True
|
|
|
|
| 459 |
**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
|
| 460 |
|
| 461 |
**Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
|
|
|
|
| 462 |
**How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
|
|
|
|
| 463 |
**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
|
| 464 |
""")
|
| 465 |
|
|
|
|
| 697 |
mime="text/csv",
|
| 698 |
type="secondary"
|
| 699 |
)
|
|
|
|
|
|