AIEcosystem commited on
Commit
57aeb9d
·
verified ·
1 Parent(s): 2cfe6c8

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +33 -27
src/streamlit_app.py CHANGED
@@ -78,12 +78,7 @@ def highlight_entities(text, df_entities):
78
  entity_text = entity['text']
79
  color = entity_color_map.get(label, '#000000')
80
  # Create a span with background color and tooltip
81
- # Change the foreground text color to BLACK, and use a white shadow for readability
82
- # Force text color to BLACK and add a clear, contrasting border
83
- # In the 'highlight_entities' function:
84
- highlight_html = f'<span style="background-color: {color}; color: #FF0000; padding: 2px 4px; border-radius: 3px; cursor: help; border: 1px solid #FFFFFF;" title="{label}">{entity_text}</span>'
85
-
86
-
87
  # Replace the original text segment with the highlighted HTML
88
  highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
89
  # Use a div to mimic the Streamlit input box style for the report
@@ -124,23 +119,29 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
124
  except Exception as e:
125
  st.error(f"Topic modeling failed: {e}")
126
  return None
 
127
  def create_topic_word_bubbles(df_topic_data):
128
- """Generates a Plotly Bubble Chart for top words across all topics."""
 
129
  # Renaming columns to match the output of perform_topic_modeling
130
- df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
131
- df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
132
-
133
- df_topic_data['Exaggerated_Size'] = df_topic_data['weight'] * 100
134
-
135
  if df_topic_data.empty:
136
  return None
 
137
  fig = px.scatter(
138
  df_topic_data,
139
  x='x_pos',
140
  y='weight',
141
- size='Exaggerated_Size',
142
  color='topic',
143
- hover_name='word',
 
 
 
 
 
144
  size_max=80,
145
  title='Topic Word Weights (Bubble Chart)',
146
  color_discrete_sequence=px.colors.qualitative.Bold,
@@ -151,20 +152,32 @@ def create_topic_word_bubbles(df_topic_data):
151
  },
152
  custom_data=['word', 'weight', 'topic']
153
  )
 
154
  fig.update_layout(
155
  xaxis_title="Entity/Word (Bubble size = Word Weight)",
156
  yaxis_title="Word Weight",
157
- xaxis={'tickangle': -45, 'showgrid': False, 'showticklabels': False, 'range': [0, 10]},
158
  yaxis={'showgrid': True},
159
  showlegend=True,
160
- plot_bgcolor='#f9f9f9', # Changed from pink
161
- paper_bgcolor='#f9f9f9', # Changed from pink
162
- height=1000,
163
- autosize=False,
164
  margin=dict(t=50, b=100, l=50, r=10),
165
  )
166
- fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>', marker=dict(line=dict(width=1, color='DarkSlateGrey')))
 
 
 
 
 
 
 
 
 
167
  return fig
 
 
 
168
  def generate_network_graph(df, raw_text):
169
  """
170
  Generates a network graph visualization (Node Plot) with edges
@@ -390,7 +403,6 @@ st.markdown(
390
  """
391
  <style>
392
  /* ... (Keep your existing styles for main, stApp, stTextArea, stButton) ... */
393
-
394
  /* --- FIX: Tab Label Colors for Visibility --- */
395
  /* Target the container for the tab labels (the buttons) */
396
  [data-testid="stConfigurableTabs"] button {
@@ -398,7 +410,6 @@ st.markdown(
398
  background-color: #f0f0f0; /* Light gray background for inactive tabs */
399
  border: 1px solid #cccccc;
400
  }
401
-
402
  /* Target the ACTIVE tab label */
403
  [data-testid="stConfigurableTabs"] button[aria-selected="true"] {
404
  color: #FFFFFF !important; /* White text for active tab */
@@ -410,7 +421,6 @@ st.markdown(
410
  .streamlit-expanderHeader {
411
  color: #007bff; /* Blue text for Expander header */
412
  }
413
-
414
  </style>
415
  """,
416
  unsafe_allow_html=True
@@ -449,9 +459,7 @@ with tab2:
449
  **Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
450
 
451
  **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
452
-
453
  **How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
454
-
455
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
456
  """)
457
 
@@ -689,5 +697,3 @@ if st.session_state.show_results:
689
  mime="text/csv",
690
  type="secondary"
691
  )
692
-
693
-
 
78
  entity_text = entity['text']
79
  color = entity_color_map.get(label, '#000000')
80
  # Create a span with background color and tooltip
81
+ highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
 
 
 
 
 
82
  # Replace the original text segment with the highlighted HTML
83
  highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
84
  # Use a div to mimic the Streamlit input box style for the report
 
119
  except Exception as e:
120
  st.error(f"Topic modeling failed: {e}")
121
  return None
122
+
123
  def create_topic_word_bubbles(df_topic_data):
124
+ """Generates a Plotly Bubble Chart for top words across
125
+ all topics, displaying the word directly on the bubble."""
126
  # Renaming columns to match the output of perform_topic_modeling
127
+ df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic',
128
+ 'Word': 'word', 'Weight': 'weight'})
129
+ df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position
 
 
130
  if df_topic_data.empty:
131
  return None
132
+
133
  fig = px.scatter(
134
  df_topic_data,
135
  x='x_pos',
136
  y='weight',
137
+ size='weight',
138
  color='topic',
139
+ # --- MODIFICATION START ---
140
+ # 1. Use 'word' for the text label on the chart
141
+ text='word',
142
+ # 2. Keep hover_name for tooltip on hover (optional, but good practice)
143
+ hover_name='word',
144
+ # --- MODIFICATION END ---
145
  size_max=80,
146
  title='Topic Word Weights (Bubble Chart)',
147
  color_discrete_sequence=px.colors.qualitative.Bold,
 
152
  },
153
  custom_data=['word', 'weight', 'topic']
154
  )
155
+
156
  fig.update_layout(
157
  xaxis_title="Entity/Word (Bubble size = Word Weight)",
158
  yaxis_title="Word Weight",
159
+ xaxis={'tickangle': -45, 'showgrid': False, 'showticklabels': False}, # Hide x-axis labels since words are now labels
160
  yaxis={'showgrid': True},
161
  showlegend=True,
162
+ plot_bgcolor='#f9f9f9',
163
+ paper_bgcolor='#f9f9f9',
164
+ height=600,
 
165
  margin=dict(t=50, b=100, l=50, r=10),
166
  )
167
+
168
+ # Update traces to show the word text and adjust hover template
169
+ fig.update_traces(
170
+ # Position the text on top of the bubble
171
+ textposition='middle center',
172
+ # Customize hover template for better readability
173
+ hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',
174
+ marker=dict(line=dict(width=1, color='DarkSlateGrey'))
175
+ )
176
+
177
  return fig
178
+
179
+
180
+
181
  def generate_network_graph(df, raw_text):
182
  """
183
  Generates a network graph visualization (Node Plot) with edges
 
403
  """
404
  <style>
405
  /* ... (Keep your existing styles for main, stApp, stTextArea, stButton) ... */
 
406
  /* --- FIX: Tab Label Colors for Visibility --- */
407
  /* Target the container for the tab labels (the buttons) */
408
  [data-testid="stConfigurableTabs"] button {
 
410
  background-color: #f0f0f0; /* Light gray background for inactive tabs */
411
  border: 1px solid #cccccc;
412
  }
 
413
  /* Target the ACTIVE tab label */
414
  [data-testid="stConfigurableTabs"] button[aria-selected="true"] {
415
  color: #FFFFFF !important; /* White text for active tab */
 
421
  .streamlit-expanderHeader {
422
  color: #007bff; /* Blue text for Expander header */
423
  }
 
424
  </style>
425
  """,
426
  unsafe_allow_html=True
 
459
  **Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
460
 
461
  **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
 
462
  **How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
 
463
  **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
464
  """)
465
 
 
697
  mime="text/csv",
698
  type="secondary"
699
  )