AIEcosystem commited on
Commit
ab3fa57
·
verified ·
1 Parent(s): 0b88ebc

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +82 -44
src/streamlit_app.py CHANGED
@@ -9,15 +9,14 @@ import io
9
  import plotly.express as px
10
  import zipfile
11
  import json
12
- from cryptography.fernet import Fernet
13
  from streamlit_extras.stylable_container import stylable_container
14
  from typing import Optional
15
  from gliner import GLiNER
16
  from comet_ml import Experiment
17
  import hashlib
18
 
19
- # Set up environment variables
20
- os.environ['HF_HOME'] = '/tmp'
21
 
22
  st.markdown(
23
  """
@@ -45,6 +44,15 @@ st.markdown(
45
  background-color: #D4F4D4; /* A light, soft green */
46
  color: #000000; /* Black for text */
47
  }
 
 
 
 
 
 
 
 
 
48
  /* Button background and text color */
49
  .stButton > button {
50
  background-color: #D4F4D4;
@@ -69,18 +77,31 @@ st.markdown(
69
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
70
  st.subheader("HR.ai", divider="green")
71
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 
72
  expander = st.expander("**Important notes**")
73
- expander.write("""**Named Entities:** This HR.ai predicts thirty-six (36) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
 
74
  Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
 
75
  **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
 
76
  **Usage Limits:** You can request results unlimited times for one (1) month.
 
77
  **Supported Languages:** English
78
- **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 
 
 
79
 
80
  with st.sidebar:
81
  st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
82
  code = '''
83
- <iframe src="https://aiecosystem-hr-ai.hf.space" frameborder="0" width="850" height="450" ></iframe>
 
 
 
 
 
84
  '''
85
  st.code(code, language="html")
86
  st.text("")
@@ -99,14 +120,14 @@ if not comet_initialized:
99
  st.warning("Comet ML not initialized. Check environment variables.")
100
 
101
  # --- Label Definitions ---
102
- labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Job_title", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
103
 
104
  # Create a mapping dictionary for labels to categories
105
  category_mapping = {
106
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
107
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
108
  "Employment Status": ["Full_time", "Part_time", "Contract", "Terminated", "Retired"],
109
- "Employment Information": ["Job_title", "Date", "Organization", "Role"],
110
  "Performance": ["Performance_score"],
111
  "Attendance": ["Leave_of_absence"],
112
  "Benefits": ["Retirement_plan", "Bonus", "Stock_options", "Health_insurance"],
@@ -197,7 +218,7 @@ if st.button("Results"):
197
  # --- Treemap Display Section ---
198
  if 'df' in st.session_state and not st.session_state.df.empty:
199
  st.divider()
200
- st.subheader("Tree map", divider="green")
201
  fig_treemap = px.treemap(st.session_state.df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
202
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
203
  st.plotly_chart(fig_treemap)
@@ -213,7 +234,7 @@ def load_gliner_model():
213
  st.stop()
214
 
215
  qa_model = load_gliner_model()
216
- st.subheader("Question-Answering", divider="violet")
217
 
218
  if 'user_labels' not in st.session_state:
219
  st.session_state.user_labels = []
@@ -231,7 +252,7 @@ if st.button("Add Question"):
231
  st.warning("Please enter a question.")
232
 
233
  st.markdown("---")
234
- st.subheader("Record of Questions", divider="violet")
235
  if st.session_state.user_labels:
236
  for i, label in enumerate(st.session_state.user_labels):
237
  col_list, col_delete = st.columns([0.9, 0.1])
@@ -274,39 +295,56 @@ if st.button("Extract Answers"):
274
  df2 = df1[['label', 'text', 'score']]
275
  df = df2.rename(columns={'label': 'question', 'text': 'answer'})
276
 
277
- st.subheader("Extracted Answers", divider="violet")
278
  st.dataframe(df, use_container_width=True)
279
  st.divider()
280
 
281
- dfa = pd.DataFrame(
282
- data={
283
- 'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
284
- 'Description': [
285
- 'entity extracted from your text data',
286
- 'label (tag) assigned to a given extracted entity',
287
- 'accuracy score; how accurately a tag has been assigned to a given entity',
288
- 'index of the start of the corresponding entity',
289
- 'index of the end of the corresponding entity',
290
- 'the broader category the entity belongs to',
291
- ]
292
- }
293
- )
294
- buf = io.BytesIO()
295
- with zipfile.ZipFile(buf, "w") as myzip:
296
- myzip.writestr("Summary of the results.csv", df.to_csv(index=False))
297
- myzip.writestr("Glossary of tags.csv", dfa.to_csv(index=False))
298
-
299
- with stylable_container(
300
- key="download_button",
301
- css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
302
- ):
303
- st.download_button(
304
- label="Download results and glossary (zip)",
305
- data=buf.getvalue(),
306
- file_name="nlpblogs_results.zip",
307
- mime="application/zip",
308
- )
309
- else:
310
- st.warning("No answers were found for the provided questions.")
311
- except Exception as e:
312
- st.error(f"An error occurred during answer extraction: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  import plotly.express as px
10
  import zipfile
11
  import json
12
+
13
  from streamlit_extras.stylable_container import stylable_container
14
  from typing import Optional
15
  from gliner import GLiNER
16
  from comet_ml import Experiment
17
  import hashlib
18
 
19
+
 
20
 
21
  st.markdown(
22
  """
 
44
  background-color: #D4F4D4; /* A light, soft green */
45
  color: #000000; /* Black for text */
46
  }
47
+
48
+ /* Text input background and text color */
49
+ .stTextInput textinput {
50
+ background-color: #D4F4D4; /* A light, soft green */
51
+ color: #000000; /* Black for text */
52
+ }
53
+
54
+
55
+
56
  /* Button background and text color */
57
  .stButton > button {
58
  background-color: #D4F4D4;
 
77
  st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
78
  st.subheader("HR.ai", divider="green")
79
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
80
+
81
  expander = st.expander("**Important notes**")
82
+ expander.write("""**Named Entities:** This HR.ai predicts thirty-six (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
83
+
84
  Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
85
+
86
  **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
87
+
88
  **Usage Limits:** You can request results unlimited times for one (1) month.
89
+
90
  **Supported Languages:** English
91
+
92
+ **Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
93
+
94
+ For any errors or inquiries, please contact us at info@nlpblogs.com""")
95
 
96
  with st.sidebar:
97
  st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
98
  code = '''
99
+ <iframe
100
+ src="https://aiecosystem-hr-ai.hf.space"
101
+ frameborder="0"
102
+ width="850"
103
+ height="450"
104
+ ></iframe>
105
  '''
106
  st.code(code, language="html")
107
  st.text("")
 
120
  st.warning("Comet ML not initialized. Check environment variables.")
121
 
122
  # --- Label Definitions ---
123
+ labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
124
 
125
  # Create a mapping dictionary for labels to categories
126
  category_mapping = {
127
  "Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
128
  "Personal Details": ["Date_of_birth", "Marital_status", "Person"],
129
  "Employment Status": ["Full_time", "Part_time", "Contract", "Terminated", "Retired"],
130
+ "Employment Information": ["Date", "Organization", "Role"],
131
  "Performance": ["Performance_score"],
132
  "Attendance": ["Leave_of_absence"],
133
  "Benefits": ["Retirement_plan", "Bonus", "Stock_options", "Health_insurance"],
 
218
  # --- Treemap Display Section ---
219
  if 'df' in st.session_state and not st.session_state.df.empty:
220
  st.divider()
221
+ st.subheader("Candidate Card", divider="green")
222
  fig_treemap = px.treemap(st.session_state.df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
223
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
224
  st.plotly_chart(fig_treemap)
 
234
  st.stop()
235
 
236
  qa_model = load_gliner_model()
237
+ st.subheader("Question-Answering", divider="green")
238
 
239
  if 'user_labels' not in st.session_state:
240
  st.session_state.user_labels = []
 
252
  st.warning("Please enter a question.")
253
 
254
  st.markdown("---")
255
+ st.subheader("Record of Questions", divider="green")
256
  if st.session_state.user_labels:
257
  for i, label in enumerate(st.session_state.user_labels):
258
  col_list, col_delete = st.columns([0.9, 0.1])
 
295
  df2 = df1[['label', 'text', 'score']]
296
  df = df2.rename(columns={'label': 'question', 'text': 'answer'})
297
 
298
+ st.subheader("Extracted Answers", divider="green")
299
  st.dataframe(df, use_container_width=True)
300
  st.divider()
301
 
302
+ def create_zip_file_and_get_bytes():
303
+ """Generates a zip file in memory with all available dataframes."""
304
+
305
+ # Define the glossary DataFrame here to ensure it's always available
306
+ dfa = pd.DataFrame(
307
+ data={
308
+ 'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
309
+ 'Description': [
310
+ 'entity extracted from your text data',
311
+ 'label (tag) assigned to a given extracted entity',
312
+ 'accuracy score; how accurately a tag has been assigned to a given entity',
313
+ 'index of the start of the corresponding entity',
314
+ 'index of the end of the corresponding entity',
315
+ 'the broader category the entity belongs to',
316
+ ]
317
+ }
318
+ )
319
+
320
+ if 'df_ner' not in st.session_state and 'df_qa' not in st.session_state:
321
+ return None, None
322
+
323
+ buf = io.BytesIO()
324
+ with zipfile.ZipFile(buf, "w") as myzip:
325
+ if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
326
+ myzip.writestr("Extracted_Entities.csv", st.session_state.df_ner.to_csv(index=False))
327
+ if 'df_qa' in st.session_state and not st.session_state.df_qa.empty:
328
+ myzip.writestr("Extracted_Answers.csv", st.session_state.df_qa.to_csv(index=False))
329
+ myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
330
+
331
+ return buf.getvalue(), "nlpblogs_results.zip"
332
+
333
+ st.divider()
334
+
335
+ if ('df_ner' in st.session_state and not st.session_state.df_ner.empty) or \
336
+ ('df_qa' in st.session_state and not st.session_state.df_qa.empty):
337
+ zip_data, file_name = create_zip_file_and_get_bytes()
338
+ if zip_data:
339
+ with stylable_container(
340
+ key="download_button",
341
+ css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
342
+ ):
343
+ st.download_button(
344
+ label="Download results and glossary (zip)",
345
+ data=zip_data,
346
+ file_name=file_name,
347
+ mime="application/zip",
348
+ )
349
+
350
+