Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +82 -44
src/streamlit_app.py
CHANGED
|
@@ -9,15 +9,14 @@ import io
|
|
| 9 |
import plotly.express as px
|
| 10 |
import zipfile
|
| 11 |
import json
|
| 12 |
-
|
| 13 |
from streamlit_extras.stylable_container import stylable_container
|
| 14 |
from typing import Optional
|
| 15 |
from gliner import GLiNER
|
| 16 |
from comet_ml import Experiment
|
| 17 |
import hashlib
|
| 18 |
|
| 19 |
-
|
| 20 |
-
os.environ['HF_HOME'] = '/tmp'
|
| 21 |
|
| 22 |
st.markdown(
|
| 23 |
"""
|
|
@@ -45,6 +44,15 @@ st.markdown(
|
|
| 45 |
background-color: #D4F4D4; /* A light, soft green */
|
| 46 |
color: #000000; /* Black for text */
|
| 47 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
/* Button background and text color */
|
| 49 |
.stButton > button {
|
| 50 |
background-color: #D4F4D4;
|
|
@@ -69,18 +77,31 @@ st.markdown(
|
|
| 69 |
st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
|
| 70 |
st.subheader("HR.ai", divider="green")
|
| 71 |
st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
|
|
|
|
| 72 |
expander = st.expander("**Important notes**")
|
| 73 |
-
expander.write("""**Named Entities:** This HR.ai predicts thirty-six (
|
|
|
|
| 74 |
Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
|
|
|
|
| 75 |
**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
|
|
|
|
| 76 |
**Usage Limits:** You can request results unlimited times for one (1) month.
|
|
|
|
| 77 |
**Supported Languages:** English
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
with st.sidebar:
|
| 81 |
st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
|
| 82 |
code = '''
|
| 83 |
-
<iframe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
'''
|
| 85 |
st.code(code, language="html")
|
| 86 |
st.text("")
|
|
@@ -99,14 +120,14 @@ if not comet_initialized:
|
|
| 99 |
st.warning("Comet ML not initialized. Check environment variables.")
|
| 100 |
|
| 101 |
# --- Label Definitions ---
|
| 102 |
-
labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "
|
| 103 |
|
| 104 |
# Create a mapping dictionary for labels to categories
|
| 105 |
category_mapping = {
|
| 106 |
"Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
|
| 107 |
"Personal Details": ["Date_of_birth", "Marital_status", "Person"],
|
| 108 |
"Employment Status": ["Full_time", "Part_time", "Contract", "Terminated", "Retired"],
|
| 109 |
-
"Employment Information": ["
|
| 110 |
"Performance": ["Performance_score"],
|
| 111 |
"Attendance": ["Leave_of_absence"],
|
| 112 |
"Benefits": ["Retirement_plan", "Bonus", "Stock_options", "Health_insurance"],
|
|
@@ -197,7 +218,7 @@ if st.button("Results"):
|
|
| 197 |
# --- Treemap Display Section ---
|
| 198 |
if 'df' in st.session_state and not st.session_state.df.empty:
|
| 199 |
st.divider()
|
| 200 |
-
st.subheader("
|
| 201 |
fig_treemap = px.treemap(st.session_state.df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
|
| 202 |
fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
|
| 203 |
st.plotly_chart(fig_treemap)
|
|
@@ -213,7 +234,7 @@ def load_gliner_model():
|
|
| 213 |
st.stop()
|
| 214 |
|
| 215 |
qa_model = load_gliner_model()
|
| 216 |
-
st.subheader("Question-Answering", divider="
|
| 217 |
|
| 218 |
if 'user_labels' not in st.session_state:
|
| 219 |
st.session_state.user_labels = []
|
|
@@ -231,7 +252,7 @@ if st.button("Add Question"):
|
|
| 231 |
st.warning("Please enter a question.")
|
| 232 |
|
| 233 |
st.markdown("---")
|
| 234 |
-
st.subheader("Record of Questions", divider="
|
| 235 |
if st.session_state.user_labels:
|
| 236 |
for i, label in enumerate(st.session_state.user_labels):
|
| 237 |
col_list, col_delete = st.columns([0.9, 0.1])
|
|
@@ -274,39 +295,56 @@ if st.button("Extract Answers"):
|
|
| 274 |
df2 = df1[['label', 'text', 'score']]
|
| 275 |
df = df2.rename(columns={'label': 'question', 'text': 'answer'})
|
| 276 |
|
| 277 |
-
st.subheader("Extracted Answers", divider="
|
| 278 |
st.dataframe(df, use_container_width=True)
|
| 279 |
st.divider()
|
| 280 |
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
import plotly.express as px
|
| 10 |
import zipfile
|
| 11 |
import json
|
| 12 |
+
|
| 13 |
from streamlit_extras.stylable_container import stylable_container
|
| 14 |
from typing import Optional
|
| 15 |
from gliner import GLiNER
|
| 16 |
from comet_ml import Experiment
|
| 17 |
import hashlib
|
| 18 |
|
| 19 |
+
|
|
|
|
| 20 |
|
| 21 |
st.markdown(
|
| 22 |
"""
|
|
|
|
| 44 |
background-color: #D4F4D4; /* A light, soft green */
|
| 45 |
color: #000000; /* Black for text */
|
| 46 |
}
|
| 47 |
+
|
| 48 |
+
/* Text input background and text color */
|
| 49 |
+
.stTextInput textinput {
|
| 50 |
+
background-color: #D4F4D4; /* A light, soft green */
|
| 51 |
+
color: #000000; /* Black for text */
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
|
| 56 |
/* Button background and text color */
|
| 57 |
.stButton > button {
|
| 58 |
background-color: #D4F4D4;
|
|
|
|
| 77 |
st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
|
| 78 |
st.subheader("HR.ai", divider="green")
|
| 79 |
st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
|
| 80 |
+
|
| 81 |
expander = st.expander("**Important notes**")
|
| 82 |
+
expander.write("""**Named Entities:** This HR.ai predicts thirty-six (35) labels: "Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"
|
| 83 |
+
|
| 84 |
Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
|
| 85 |
+
|
| 86 |
**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
|
| 87 |
+
|
| 88 |
**Usage Limits:** You can request results unlimited times for one (1) month.
|
| 89 |
+
|
| 90 |
**Supported Languages:** English
|
| 91 |
+
|
| 92 |
+
**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.
|
| 93 |
+
|
| 94 |
+
For any errors or inquiries, please contact us at info@nlpblogs.com""")
|
| 95 |
|
| 96 |
with st.sidebar:
|
| 97 |
st.write("Use the following code to embed the HR.ai web app on your website. Feel free to adjust the width and height values to fit your page.")
|
| 98 |
code = '''
|
| 99 |
+
<iframe
|
| 100 |
+
src="https://aiecosystem-hr-ai.hf.space"
|
| 101 |
+
frameborder="0"
|
| 102 |
+
width="850"
|
| 103 |
+
height="450"
|
| 104 |
+
></iframe>
|
| 105 |
'''
|
| 106 |
st.code(code, language="html")
|
| 107 |
st.text("")
|
|
|
|
| 120 |
st.warning("Comet ML not initialized. Check environment variables.")
|
| 121 |
|
| 122 |
# --- Label Definitions ---
|
| 123 |
+
labels = ["Email", "Phone_number", "Street_address", "City", "Country", "Date_of_birth", "Marital_status", "Person", "Full_time", "Part_time", "Contract", "Terminated", "Retired", "Date", "Organization", "Role", "Performance_score", "Leave_of_absence", "Retirement_plan", "Bonus", "Stock_options", "Health_insurance", "Pay_rate", "Annual_salary", "Tax", "Deductions", "Interview_type", "Applicant", "Referral", "Job_board", "Recruiter", "Offer_letter", "Agreement", "Certification", "Skill"]
|
| 124 |
|
| 125 |
# Create a mapping dictionary for labels to categories
|
| 126 |
category_mapping = {
|
| 127 |
"Contact Information": ["Email", "Phone_number", "Street_address", "City", "Country"],
|
| 128 |
"Personal Details": ["Date_of_birth", "Marital_status", "Person"],
|
| 129 |
"Employment Status": ["Full_time", "Part_time", "Contract", "Terminated", "Retired"],
|
| 130 |
+
"Employment Information": ["Date", "Organization", "Role"],
|
| 131 |
"Performance": ["Performance_score"],
|
| 132 |
"Attendance": ["Leave_of_absence"],
|
| 133 |
"Benefits": ["Retirement_plan", "Bonus", "Stock_options", "Health_insurance"],
|
|
|
|
| 218 |
# --- Treemap Display Section ---
|
| 219 |
if 'df' in st.session_state and not st.session_state.df.empty:
|
| 220 |
st.divider()
|
| 221 |
+
st.subheader("Candidate Card", divider="green")
|
| 222 |
fig_treemap = px.treemap(st.session_state.df, path=[px.Constant("all"), 'category', 'label', 'text'], values='score', color='category')
|
| 223 |
fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), paper_bgcolor='#F5FFFA', plot_bgcolor='#F5FFFA')
|
| 224 |
st.plotly_chart(fig_treemap)
|
|
|
|
| 234 |
st.stop()
|
| 235 |
|
| 236 |
qa_model = load_gliner_model()
|
| 237 |
+
st.subheader("Question-Answering", divider="green")
|
| 238 |
|
| 239 |
if 'user_labels' not in st.session_state:
|
| 240 |
st.session_state.user_labels = []
|
|
|
|
| 252 |
st.warning("Please enter a question.")
|
| 253 |
|
| 254 |
st.markdown("---")
|
| 255 |
+
st.subheader("Record of Questions", divider="green")
|
| 256 |
if st.session_state.user_labels:
|
| 257 |
for i, label in enumerate(st.session_state.user_labels):
|
| 258 |
col_list, col_delete = st.columns([0.9, 0.1])
|
|
|
|
| 295 |
df2 = df1[['label', 'text', 'score']]
|
| 296 |
df = df2.rename(columns={'label': 'question', 'text': 'answer'})
|
| 297 |
|
| 298 |
+
st.subheader("Extracted Answers", divider="green")
|
| 299 |
st.dataframe(df, use_container_width=True)
|
| 300 |
st.divider()
|
| 301 |
|
| 302 |
+
def create_zip_file_and_get_bytes():
|
| 303 |
+
"""Generates a zip file in memory with all available dataframes."""
|
| 304 |
+
|
| 305 |
+
# Define the glossary DataFrame here to ensure it's always available
|
| 306 |
+
dfa = pd.DataFrame(
|
| 307 |
+
data={
|
| 308 |
+
'Column Name': ['text', 'label', 'score', 'start', 'end', 'category'],
|
| 309 |
+
'Description': [
|
| 310 |
+
'entity extracted from your text data',
|
| 311 |
+
'label (tag) assigned to a given extracted entity',
|
| 312 |
+
'accuracy score; how accurately a tag has been assigned to a given entity',
|
| 313 |
+
'index of the start of the corresponding entity',
|
| 314 |
+
'index of the end of the corresponding entity',
|
| 315 |
+
'the broader category the entity belongs to',
|
| 316 |
+
]
|
| 317 |
+
}
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
if 'df_ner' not in st.session_state and 'df_qa' not in st.session_state:
|
| 321 |
+
return None, None
|
| 322 |
+
|
| 323 |
+
buf = io.BytesIO()
|
| 324 |
+
with zipfile.ZipFile(buf, "w") as myzip:
|
| 325 |
+
if 'df_ner' in st.session_state and not st.session_state.df_ner.empty:
|
| 326 |
+
myzip.writestr("Extracted_Entities.csv", st.session_state.df_ner.to_csv(index=False))
|
| 327 |
+
if 'df_qa' in st.session_state and not st.session_state.df_qa.empty:
|
| 328 |
+
myzip.writestr("Extracted_Answers.csv", st.session_state.df_qa.to_csv(index=False))
|
| 329 |
+
myzip.writestr("Glossary_of_tags.csv", dfa.to_csv(index=False))
|
| 330 |
+
|
| 331 |
+
return buf.getvalue(), "nlpblogs_results.zip"
|
| 332 |
+
|
| 333 |
+
st.divider()
|
| 334 |
+
|
| 335 |
+
if ('df_ner' in st.session_state and not st.session_state.df_ner.empty) or \
|
| 336 |
+
('df_qa' in st.session_state and not st.session_state.df_qa.empty):
|
| 337 |
+
zip_data, file_name = create_zip_file_and_get_bytes()
|
| 338 |
+
if zip_data:
|
| 339 |
+
with stylable_container(
|
| 340 |
+
key="download_button",
|
| 341 |
+
css_styles="""button { background-color: red; border: 1px solid black; padding: 5px; color: white; }""",
|
| 342 |
+
):
|
| 343 |
+
st.download_button(
|
| 344 |
+
label="Download results and glossary (zip)",
|
| 345 |
+
data=zip_data,
|
| 346 |
+
file_name=file_name,
|
| 347 |
+
mime="application/zip",
|
| 348 |
+
)
|
| 349 |
+
|
| 350 |
+
|