AIEcosystem commited on
Commit
d83d227
·
verified ·
1 Parent(s): e3e4595

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +125 -155
src/streamlit_app.py CHANGED
@@ -8,8 +8,6 @@ import plotly.express as px
8
  import zipfile
9
  import json
10
  import hashlib
11
-
12
-
13
  from typing import Optional
14
  from gliner import GLiNER
15
  from comet_ml import Experiment
@@ -18,8 +16,6 @@ from comet_ml import Experiment
18
  st.set_page_config(layout="wide", page_title="NER")
19
  st.subheader("HR.ai", divider="green")
20
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
21
-
22
-
23
  st.markdown(
24
  """
25
  <style>
@@ -73,37 +69,9 @@ st.markdown(
73
  }
74
  </style>
75
  """,
76
- unsafe_allow_html=True
77
- )
78
-
79
  expander = st.expander("**Important notes**")
80
- expander.write("""
81
- **How to Use the HR.ai web app:**
82
- 1. Type or paste your text into the text area, then press Ctrl + Enter.
83
- 2. Click the 'Results' button to extract and tag entities in your text data.
84
-
85
- Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
86
-
87
-
88
- **How to Use the Question-Answering feature:**
89
- 1. Type or paste your text into the text area, then press Ctrl + Enter.
90
- 2. Click the 'Add Question' button to add your question to the Record of Questions. You can manage your questions by deleting them one by one.
91
- 3. Click the 'Extract Answers' button to extract the answer to your question.
92
-
93
- Results are presented in an easy-to-read table, visualized in an interactive tree map, and is available for download.
94
-
95
-
96
- **Entities:** "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
97
-
98
- **Usage Limits:** You can request results unlimited times for one (1) month.
99
-
100
- **Supported Languages:** English
101
-
102
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
103
-
104
- For any errors or inquiries, please contact us at info@nlpblogs.com""")
105
-
106
-
107
  with st.sidebar:
108
  st.write("Use the following code to embed the web app on your website. Feel free to adjust the width and height values to fit your page.")
109
  code = '''
@@ -126,12 +94,11 @@ COMET_API_KEY = os.environ.get("COMET_API_KEY")
126
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
127
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
128
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
129
-
130
  if not comet_initialized:
131
  st.warning("Comet ML not initialized. Check environment variables.")
132
 
133
  # --- Model Loading and Caching ---
134
- @st.cache_resource
135
  def load_gliner_model(model_name):
136
  """Initializes and caches the GLiNER model."""
137
  try:
@@ -145,7 +112,6 @@ def load_gliner_model(model_name):
145
 
146
  # --- HR_AI Model Labels and Mappings ---
147
  labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
148
-
149
  category_mapping = {
150
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
151
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
@@ -158,8 +124,7 @@ category_mapping = {
158
  "Deductions": ["Tax", "Deductions"],
159
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
160
  "Legal & Compliance": ["Offer_letter", "Agreement"],
161
- "Professional_Development": ["Certification", "Skill"]
162
- }
163
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
164
 
165
  # --- InfoFinder Helpers ---
@@ -173,17 +138,17 @@ def get_stable_color(label):
173
 
174
  # --- Main App with Tabs ---
175
  tab1, tab2 = st.tabs(["HR.ai", "Question-Answering"])
176
-
177
  with tab1:
178
-
179
-
180
  # Load model for this tab
181
  model_hr = load_gliner_model("HR_AI")
182
-
183
  text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_hr')
184
 
185
  def clear_text_hr():
186
  st.session_state['my_text_area_hr'] = ""
 
 
 
 
187
 
188
  st.button("Clear text", on_click=clear_text_hr, key="clear_hr")
189
 
@@ -191,6 +156,7 @@ with tab1:
191
  start_time = time.time()
192
  if not text.strip():
193
  st.warning("Please enter some text to extract entities.")
 
194
  else:
195
  with st.spinner("Extracting entities...", show_time=True):
196
  entities = model_hr.predict_entities(text, labels)
@@ -203,103 +169,108 @@ with tab1:
203
  experiment.log_parameter("input_text", text)
204
  experiment.log_table("predicted_entities", df)
205
 
206
- st.subheader("Grouped Entities by Category", divider="green")
207
- category_names = sorted(list(category_mapping.keys()))
208
- category_tabs_hr = st.tabs(category_names)
209
- for i, category_name in enumerate(category_names):
210
- with category_tabs_hr[i]:
211
- df_category_filtered = df[df['category'] == category_name]
212
- if not df_category_filtered.empty:
213
- st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
214
- else:
215
- st.info(f"No entities found for the '{category_name}' category.")
216
-
217
- with st.expander("See Glossary of tags"):
218
- st.write('''
219
- - **text**: ['entity extracted from your text data']
220
- - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
221
- - **label**: ['label (tag) assigned to a given extracted entity']
222
- - **start**: ['index of the start of the corresponding entity']
223
- - **end**: ['index of the end of the corresponding entity']
224
- ''')
225
- st.divider()
226
-
227
- st.subheader("Candidate Card", divider="green")
228
- fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
229
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
230
- st.plotly_chart(fig_treemap)
231
-
232
- col1, col2 = st.columns(2)
233
- with col1:
234
- st.subheader("Pie chart", divider="green")
235
- grouped_counts = df['category'].value_counts().reset_index()
236
- grouped_counts.columns = ['category', 'count']
237
- fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
238
- fig_pie.update_traces(textposition='inside', textinfo='percent+label')
239
- fig_pie.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
240
- st.plotly_chart(fig_pie)
241
-
242
- with col2:
243
- st.subheader("Bar chart", divider="green")
244
- fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
245
- fig_bar.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
246
- st.plotly_chart(fig_bar)
247
-
248
- st.subheader("Most Frequent Entities", divider="green")
249
- word_counts = df['text'].value_counts().reset_index()
250
- word_counts.columns = ['Entity', 'Count']
251
- repeating_entities = word_counts[word_counts['Count'] > 1]
252
- if not repeating_entities.empty:
253
- st.dataframe(repeating_entities, use_container_width=True)
254
- fig_repeating_bar = px.bar(repeating_entities, x='Entity', y='Count', color='Entity')
255
- fig_repeating_bar.update_layout(xaxis={'categoryorder': 'total descending'}, paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
256
- st.plotly_chart(fig_repeating_bar)
257
- else:
258
- st.warning("No entities were found that occur more than once.")
259
-
260
- st.divider()
261
-
262
  dfa = pd.DataFrame(data={'Column Name': ['text', 'label', 'score', 'start', 'end'], 'Description': ['entity extracted from your text data', 'label (tag) assigned to a given extracted entity', 'accuracy score; how accurately a tag has been assigned to a given entity', 'index of the start of the corresponding entity', 'index of the end of the corresponding entity']})
263
  buf = io.BytesIO()
264
  with zipfile.ZipFile(buf, "w") as myzip:
265
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
266
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
 
267
 
268
- st.download_button(
269
- label="Download results and glossary (zip)",
270
- data=buf.getvalue(),
271
- file_name="nlpblogs_results.zip",
272
- mime="application/zip",
273
- )
274
-
275
- if comet_initialized:
276
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
277
- experiment.end()
278
  else:
 
279
  st.warning("No entities were found in the provided text.")
280
-
281
- end_time = time.time()
282
- elapsed_time = end_time - start_time
283
- st.text("")
284
- st.text("")
285
- st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
286
 
287
- with tab2:
288
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
 
 
290
  # Load model for this tab
291
  model_qa = load_gliner_model("InfoFinder")
292
-
293
  user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_infofinder')
294
-
295
  def clear_text_qa():
296
  st.session_state['my_text_area_infofinder'] = ""
 
 
297
 
298
  st.button("Clear text", on_click=clear_text_qa, key="clear_qa")
299
-
300
  st.subheader("Question-Answering", divider="green")
301
  question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
302
-
303
  if st.button("Add Question"):
304
  if question_input:
305
  if question_input not in st.session_state.user_labels:
@@ -309,10 +280,10 @@ with tab2:
309
  st.warning("This question has already been added.")
310
  else:
311
  st.warning("Please enter a question.")
312
-
313
  st.markdown("---")
314
  st.subheader("Record of Questions", divider="green")
315
-
316
  if st.session_state.user_labels:
317
  for i, label in enumerate(st.session_state.user_labels):
318
  col_list, col_delete = st.columns([0.9, 0.1])
@@ -324,9 +295,9 @@ with tab2:
324
  st.rerun()
325
  else:
326
  st.info("No questions defined yet. Use the input above to add one.")
327
-
328
  st.divider()
329
-
330
  if st.button("Extract Answers"):
331
  if not user_text.strip():
332
  st.warning("Please enter some text to analyze.")
@@ -337,7 +308,7 @@ with tab2:
337
  experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
338
  experiment.log_parameter("input_text_length", len(user_text))
339
  experiment.log_parameter("defined_labels", st.session_state.user_labels)
340
-
341
  start_time = time.time()
342
  with st.spinner("Analyzing text...", show_time=True):
343
  try:
@@ -345,46 +316,45 @@ with tab2:
345
  end_time = time.time()
346
  elapsed_time = end_time - start_time
347
  st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
348
-
349
  if entities:
350
  df1 = pd.DataFrame(entities)
351
  df2 = df1[['label', 'text', 'score']]
352
  df = df2.rename(columns={'label': 'question', 'text': 'answer'})
 
 
353
 
354
- st.subheader("Extracted Answers", divider="green")
355
- st.dataframe(df, use_container_width=True)
356
-
357
- st.subheader("Tree map", divider="green")
358
- all_labels = df['question'].unique()
359
- label_color_map = {label: get_stable_color(label) for label in all_labels}
360
- fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
361
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
362
- st.plotly_chart(fig_treemap)
363
-
364
- csv_data = df.to_csv(index=False).encode('utf-8')
365
- st.download_button(
366
- label="Download CSV",
367
- data=csv_data,
368
- file_name="nlpblogs_questions_answers.csv",
369
- mime="text/csv",
370
- )
371
-
372
  if comet_initialized:
373
  experiment.log_metric("processing_time_seconds", elapsed_time)
374
  experiment.log_table("predicted_entities", df)
375
- experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")
376
- experiment.end()
377
  else:
378
- st.info("No answers were found in the text with the defined questions.")
379
- if comet_initialized:
380
- experiment.end()
381
  except Exception as e:
382
  st.error(f"An error occurred during processing: {e}")
383
  st.write(f"Error details: {e}")
 
384
  if comet_initialized:
385
- experiment.log_text(f"Error: {e}")
386
  experiment.end()
387
-
388
-
389
-
390
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  import zipfile
9
  import json
10
  import hashlib
 
 
11
  from typing import Optional
12
  from gliner import GLiNER
13
  from comet_ml import Experiment
 
16
  st.set_page_config(layout="wide", page_title="NER")
17
  st.subheader("HR.ai", divider="green")
18
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
 
19
  st.markdown(
20
  """
21
  <style>
 
69
  }
70
  </style>
71
  """,
72
+ unsafe_allow_html=True)
 
 
73
  expander = st.expander("**Important notes**")
74
+ expander.write(""" **How to Use the HR.ai web app:** 1. Type or paste your text into the text area, then press Ctrl + Enter.2. Click the 'Results' button to extract and tag entities in your text data. Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags. **How to Use the Question-Answering feature:**1. Type or paste your text into the text area, then press Ctrl + Enter. 2. Click the 'Add Question' button to add your question to the Record of Questions. You can manage your questions by deleting them one by one.3. Click the 'Extract Answers' button to extract the answer to your question.Results are presented in an easy-to-read table, visualized in an interactive tree map, and is available for download. **Entities:** "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"**Usage Limits:** You can request results unlimited times for one (1) month. **Supported Languages:** English **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  with st.sidebar:
76
  st.write("Use the following code to embed the web app on your website. Feel free to adjust the width and height values to fit your page.")
77
  code = '''
 
94
  COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
95
  COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
96
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 
97
  if not comet_initialized:
98
  st.warning("Comet ML not initialized. Check environment variables.")
99
 
100
  # --- Model Loading and Caching ---
101
+ @st.cache_resourced
102
  def load_gliner_model(model_name):
103
  """Initializes and caches the GLiNER model."""
104
  try:
 
112
 
113
  # --- HR_AI Model Labels and Mappings ---
114
  labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
 
115
  category_mapping = {
116
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
117
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
 
124
  "Deductions": ["Tax", "Deductions"],
125
  "Recruitment & Sourcing": ["Interview_type", "Applicant", "Referral", "Job_board", "Recruiter"],
126
  "Legal & Compliance": ["Offer_letter", "Agreement"],
127
+ "Professional_Development": ["Certification", "Skill"]}
 
128
  reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
129
 
130
  # --- InfoFinder Helpers ---
 
138
 
139
  # --- Main App with Tabs ---
140
  tab1, tab2 = st.tabs(["HR.ai", "Question-Answering"])
 
141
  with tab1:
 
 
142
  # Load model for this tab
143
  model_hr = load_gliner_model("HR_AI")
 
144
  text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_hr')
145
 
146
  def clear_text_hr():
147
  st.session_state['my_text_area_hr'] = ""
148
+ st.session_state.hr_data_to_download = None
149
+
150
+ if 'hr_data_to_download' not in st.session_state:
151
+ st.session_state.hr_data_to_download = None
152
 
153
  st.button("Clear text", on_click=clear_text_hr, key="clear_hr")
154
 
 
156
  start_time = time.time()
157
  if not text.strip():
158
  st.warning("Please enter some text to extract entities.")
159
+ st.session_state.hr_data_to_download = None
160
  else:
161
  with st.spinner("Extracting entities...", show_time=True):
162
  entities = model_hr.predict_entities(text, labels)
 
169
  experiment.log_parameter("input_text", text)
170
  experiment.log_table("predicted_entities", df)
171
 
172
+ # Prepare data for download and store it in session state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  dfa = pd.DataFrame(data={'Column Name': ['text', 'label', 'score', 'start', 'end'], 'Description': ['entity extracted from your text data', 'label (tag) assigned to a given extracted entity', 'accuracy score; how accurately a tag has been assigned to a given entity', 'index of the start of the corresponding entity', 'index of the end of the corresponding entity']})
174
  buf = io.BytesIO()
175
  with zipfile.ZipFile(buf, "w") as myzip:
176
  myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
177
  myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
178
+ st.session_state.hr_data_to_download = buf.getvalue()
179
 
180
+ st.session_state.hr_df = df
 
 
 
 
 
 
 
 
 
181
  else:
182
+ st.session_state.hr_data_to_download = None
183
  st.warning("No entities were found in the provided text.")
 
 
 
 
 
 
184
 
185
+ end_time = time.time()
186
+ elapsed_time = end_time - start_time
187
+ st.text("")
188
+ st.text("")
189
+ st.info(f"Results processed in **{elapsed_time:.2f} seconds**.")
190
+
191
+ # Display logic for HR tab (always runs if data is in session state)
192
+ if 'hr_df' in st.session_state and not st.session_state.hr_df.empty:
193
+ df = st.session_state.hr_df
194
+ st.subheader("Grouped Entities by Category", divider="green")
195
+ category_names = sorted(list(category_mapping.keys()))
196
+ category_tabs_hr = st.tabs(category_names)
197
+ for i, category_name in enumerate(category_names):
198
+ with category_tabs_hr[i]:
199
+ df_category_filtered = df[df['category'] == category_name]
200
+ if not df_category_filtered.empty:
201
+ st.dataframe(df_category_filtered.drop(columns=['category']), use_container_width=True)
202
+ else:
203
+ st.info(f"No entities found for the '{category_name}' category.")
204
+
205
+ with st.expander("See Glossary of tags"):
206
+ st.write('''
207
+ - **text**: ['entity extracted from your text data']
208
+ - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
209
+ - **label**: ['label (tag) assigned to a given extracted entity']
210
+ - **start**: ['index of the start of the corresponding entity']
211
+ - **end**: ['index of the end of the corresponding entity']
212
+ ''')
213
+ st.divider()
214
+
215
+ st.subheader("Candidate Card", divider="green")
216
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
217
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
218
+ st.plotly_chart(fig_treemap)
219
+
220
+ col1, col2 = st.columns(2)
221
+ with col1:
222
+ st.subheader("Pie chart", divider="green")
223
+ grouped_counts = df['category'].value_counts().reset_index()
224
+ grouped_counts.columns = ['category', 'count']
225
+ fig_pie = px.pie(grouped_counts, values='count', names='category', hover_data=['count'], labels={'count': 'count'}, title='Percentage of predicted categories')
226
+ fig_pie.update_traces(textposition='inside', textinfo='percent+label')
227
+ fig_pie.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
228
+ st.plotly_chart(fig_pie)
229
+
230
+ with col2:
231
+ st.subheader("Bar chart", divider="green")
232
+ fig_bar = px.bar(grouped_counts, x="count", y="category", color="category", text_auto=True, title='Occurrences of predicted categories')
233
+ fig_bar.update_layout(paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
234
+ st.plotly_chart(fig_bar)
235
+
236
+ st.subheader("Most Frequent Entities", divider="green")
237
+ word_counts = df['text'].value_counts().reset_index()
238
+ word_counts.columns = ['Entity', 'Count']
239
+ repeating_entities = word_counts[word_counts['Count'] > 1]
240
+ if not repeating_entities.empty:
241
+ st.dataframe(repeating_entities, use_container_width=True)
242
+ fig_repeating_bar = px.bar(repeating_entities, x='Entity', y='Count', color='Entity')
243
+ fig_repeating_bar.update_layout(xaxis={'categoryorder': 'total descending'}, paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
244
+ st.plotly_chart(fig_repeating_bar)
245
+ else:
246
+ st.warning("No entities were found that occur more than once.")
247
+
248
+ st.divider()
249
+ if st.session_state.hr_data_to_download:
250
+ st.download_button(
251
+ label="Download results and glossary (zip)",
252
+ data=st.session_state.hr_data_to_download,
253
+ file_name="nlpblogs_results.zip",
254
+ mime="application/zip",
255
+ )
256
+ if comet_initialized:
257
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap_categories")
258
+ experiment.end()
259
 
260
+ with tab2:
261
  # Load model for this tab
262
  model_qa = load_gliner_model("InfoFinder")
 
263
  user_text = st.text_area("Type or paste your text below, and then press Ctrl + Enter", height=250, key='my_text_area_infofinder')
264
+
265
  def clear_text_qa():
266
  st.session_state['my_text_area_infofinder'] = ""
267
+ st.session_state.user_labels = []
268
+ st.session_state.qa_data_to_download = None
269
 
270
  st.button("Clear text", on_click=clear_text_qa, key="clear_qa")
 
271
  st.subheader("Question-Answering", divider="green")
272
  question_input = st.text_input("Ask wh-questions. **Wh-questions begin with what, when, where, who, whom, which, whose, why and how. We use them to ask for specific information.**")
273
+
274
  if st.button("Add Question"):
275
  if question_input:
276
  if question_input not in st.session_state.user_labels:
 
280
  st.warning("This question has already been added.")
281
  else:
282
  st.warning("Please enter a question.")
283
+
284
  st.markdown("---")
285
  st.subheader("Record of Questions", divider="green")
286
+
287
  if st.session_state.user_labels:
288
  for i, label in enumerate(st.session_state.user_labels):
289
  col_list, col_delete = st.columns([0.9, 0.1])
 
295
  st.rerun()
296
  else:
297
  st.info("No questions defined yet. Use the input above to add one.")
298
+
299
  st.divider()
300
+
301
  if st.button("Extract Answers"):
302
  if not user_text.strip():
303
  st.warning("Please enter some text to analyze.")
 
308
  experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
309
  experiment.log_parameter("input_text_length", len(user_text))
310
  experiment.log_parameter("defined_labels", st.session_state.user_labels)
311
+
312
  start_time = time.time()
313
  with st.spinner("Analyzing text...", show_time=True):
314
  try:
 
316
  end_time = time.time()
317
  elapsed_time = end_time - start_time
318
  st.info(f"Processing took **{elapsed_time:.2f} seconds**.")
319
+
320
  if entities:
321
  df1 = pd.DataFrame(entities)
322
  df2 = df1[['label', 'text', 'score']]
323
  df = df2.rename(columns={'label': 'question', 'text': 'answer'})
324
+ st.session_state.qa_data_to_download = df.to_csv(index=False).encode('utf-8')
325
+ st.session_state.qa_df = df
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  if comet_initialized:
328
  experiment.log_metric("processing_time_seconds", elapsed_time)
329
  experiment.log_table("predicted_entities", df)
 
 
330
  else:
331
+ st.session_state.qa_data_to_download = None
332
+ st.warning("No answers were found in the text with the defined questions.")
 
333
  except Exception as e:
334
  st.error(f"An error occurred during processing: {e}")
335
  st.write(f"Error details: {e}")
336
+ finally:
337
  if comet_initialized:
 
338
  experiment.end()
339
+
340
+ # Display logic for QA tab (always runs if data is in session state)
341
+ if 'qa_df' in st.session_state and not st.session_state.qa_df.empty:
342
+ df = st.session_state.qa_df
343
+ st.subheader("Extracted Answers", divider="green")
344
+ st.dataframe(df, use_container_width=True)
345
+ st.subheader("Tree map", divider="green")
346
+ all_labels = df['question'].unique()
347
+ label_color_map = {label: get_stable_color(label) for label in all_labels}
348
+ fig_treemap = px.treemap(df, path=[px.Constant("all"), 'question', 'answer'], values='score', color='question', color_discrete_map=label_color_map)
349
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F3E5F5', plot_bgcolor='#F3E5F5')
350
+ st.plotly_chart(fig_treemap)
351
+
352
+ if st.session_state.qa_data_to_download:
353
+ st.download_button(
354
+ label="Download CSV",
355
+ data=st.session_state.qa_data_to_download,
356
+ file_name="nlpblogs_questions_answers.csv",
357
+ mime="text/csv",
358
+ )
359
+ if comet_initialized:
360
+ experiment.log_figure(figure=fig_treemap, figure_name="entity_treemap")