Spaces:

AIEcosystem
/

DataHarvest

Running

App Files Files Community

AIEcosystem commited on Oct 11

Commit

57aeb9d

verified ·

1 Parent(s): 2cfe6c8

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +33 -27

src/streamlit_app.py CHANGED Viewed

@@ -78,12 +78,7 @@ def highlight_entities(text, df_entities):
         entity_text = entity['text']
         color = entity_color_map.get(label, '#000000')
         # Create a span with background color and tooltip
-      # Change the foreground text color to BLACK, and use a white shadow for readability
-        # Force text color to BLACK and add a clear, contrasting border
-        # In the 'highlight_entities' function:
-        highlight_html = f'<span style="background-color: {color}; color: #FF0000; padding: 2px 4px; border-radius: 3px; cursor: help; border: 1px solid #FFFFFF;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
@@ -124,23 +119,29 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     except Exception as e:
         st.error(f"Topic modeling failed: {e}")
         return None
 def create_topic_word_bubbles(df_topic_data):
-    """Generates a Plotly Bubble Chart for top words across all topics."""
     # Renaming columns to match the output of perform_topic_modeling
-    df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
-    df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
-    df_topic_data['Exaggerated_Size'] = df_topic_data['weight'] * 100
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         df_topic_data,
         x='x_pos',
         y='weight',
-        size='Exaggerated_Size',
         color='topic',
-        hover_name='word',
         size_max=80,
         title='Topic Word Weights (Bubble Chart)',
         color_discrete_sequence=px.colors.qualitative.Bold,
@@ -151,20 +152,32 @@ def create_topic_word_bubbles(df_topic_data):
         },
         custom_data=['word', 'weight', 'topic']
     )
     fig.update_layout(
         xaxis_title="Entity/Word (Bubble size = Word Weight)",
         yaxis_title="Word Weight",
-        xaxis={'tickangle': -45, 'showgrid': False, 'showticklabels': False, 'range': [0, 10]},
         yaxis={'showgrid': True},
         showlegend=True,
-        plot_bgcolor='#f9f9f9', # Changed from pink
-        paper_bgcolor='#f9f9f9', # Changed from pink
-        height=1000,
-        autosize=False,
         margin=dict(t=50, b=100, l=50, r=10),
     )
-    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>', marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
     Generates a network graph visualization (Node Plot) with edges
@@ -390,7 +403,6 @@ st.markdown(
     """
     <style>
     /* ... (Keep your existing styles for main, stApp, stTextArea, stButton) ... */
     /* --- FIX: Tab Label Colors for Visibility --- */
     /* Target the container for the tab labels (the buttons) */
     [data-testid="stConfigurableTabs"] button {
@@ -398,7 +410,6 @@ st.markdown(
         background-color: #f0f0f0; /* Light gray background for inactive tabs */
         border: 1px solid #cccccc;
     }
     /* Target the ACTIVE tab label */
     [data-testid="stConfigurableTabs"] button[aria-selected="true"] {
         color: #FFFFFF !important; /* White text for active tab */
@@ -410,7 +421,6 @@ st.markdown(
     .streamlit-expanderHeader {
         color: #007bff; /* Blue text for Expander header */
     }
     </style>
     """,
     unsafe_allow_html=True
@@ -449,9 +459,7 @@ with tab2:
     **Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
     **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
     **How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
     **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
     """)
@@ -689,5 +697,3 @@ if st.session_state.show_results:
             mime="text/csv",
             type="secondary"
         )

         entity_text = entity['text']
         color = entity_color_map.get(label, '#000000')
         # Create a span with background color and tooltip
+        highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
     except Exception as e:
         st.error(f"Topic modeling failed: {e}")
         return None
 def create_topic_word_bubbles(df_topic_data):
+    """Generates a Plotly Bubble Chart for top words across
+    all topics, displaying the word directly on the bubble."""
     # Renaming columns to match the output of perform_topic_modeling
+    df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic',
+'Word': 'word', 'Weight': 'weight'})
+    df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         df_topic_data,
         x='x_pos',
         y='weight',
+        size='weight',
         color='topic',
+        # --- MODIFICATION START ---
+        # 1. Use 'word' for the text label on the chart
+        text='word',
+        # 2. Keep hover_name for tooltip on hover (optional, but good practice)
+        hover_name='word',
+        # --- MODIFICATION END ---
         size_max=80,
         title='Topic Word Weights (Bubble Chart)',
         color_discrete_sequence=px.colors.qualitative.Bold,
         },
         custom_data=['word', 'weight', 'topic']
     )
     fig.update_layout(
         xaxis_title="Entity/Word (Bubble size = Word Weight)",
         yaxis_title="Word Weight",
+        xaxis={'tickangle': -45, 'showgrid': False, 'showticklabels': False}, # Hide x-axis labels since words are now labels
         yaxis={'showgrid': True},
         showlegend=True,
+        plot_bgcolor='#f9f9f9',
+        paper_bgcolor='#f9f9f9',
+        height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
+    # Update traces to show the word text and adjust hover template
+    fig.update_traces(
+        # Position the text on top of the bubble
+        textposition='middle center',
+        # Customize hover template for better readability
+        hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',
+        marker=dict(line=dict(width=1, color='DarkSlateGrey'))
+    )
     return fig
 def generate_network_graph(df, raw_text):
     """
     Generates a network graph visualization (Node Plot) with edges
     """
     <style>
     /* ... (Keep your existing styles for main, stApp, stTextArea, stButton) ... */
     /* --- FIX: Tab Label Colors for Visibility --- */
     /* Target the container for the tab labels (the buttons) */
     [data-testid="stConfigurableTabs"] button {
         background-color: #f0f0f0; /* Light gray background for inactive tabs */
         border: 1px solid #cccccc;
     }
     /* Target the ACTIVE tab label */
     [data-testid="stConfigurableTabs"] button[aria-selected="true"] {
         color: #FFFFFF !important; /* White text for active tab */
     .streamlit-expanderHeader {
         color: #007bff; /* Blue text for Expander header */
     }
     </style>
     """,
     unsafe_allow_html=True
     **Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
     **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
     **How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
     **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
     """)
             mime="text/csv",
             type="secondary"
         )