Spaces:

AIEcosystem
/

DataHarvest

Running

App Files Files Community

AIEcosystem commited on Sep 23

Commit

ecaff1f

verified ·

1 Parent(s): 0295c27

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +7 -9

src/streamlit_app.py CHANGED Viewed

@@ -13,7 +13,6 @@ from typing import Optional
 from gliner import GLiNER
 from comet_ml import Experiment
 # --- Page Configuration and UI Elements ---
 st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 st.subheader("DataHarvest", divider="violet")
@@ -21,7 +20,11 @@ st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 st.markdown(':rainbow[**Supported Languages: English**]')
 expander = st.expander("**Important notes**")
-expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.**Usage Limits:** You can request results unlimited times for one (1) month.**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 with st.sidebar:
     st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.")
@@ -89,11 +92,6 @@ def clear_text():
     st.session_state.results_df = pd.DataFrame()
     st.session_state.elapsed_time = 0.0
-def remove_punctuation(text):
-    """Removes punctuation from a string."""
-    translator = str.maketrans('', '', string.punctuation)
-    return text.translate(translator)
 st.button("Clear text", on_click=clear_text)
 # --- Results Section ---
@@ -111,8 +109,8 @@ if st.button("Results"):
             st.session_state.last_text = text
             start_time = time.time()
             with st.spinner("Extracting entities...", show_time=True):
-                cleaned_text = remove_punctuation(text)
-                entities = model.predict_entities(cleaned_text, labels)
                 df = pd.DataFrame(entities)
                 st.session_state.results_df = df
                 if not df.empty:

 from gliner import GLiNER
 from comet_ml import Experiment
 # --- Page Configuration and UI Elements ---
 st.set_page_config(layout="wide", page_title="Named Entity Recognition App")
 st.subheader("DataHarvest", divider="violet")
 st.markdown(':rainbow[**Supported Languages: English**]')
 expander = st.expander("**Important notes**")
+expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
+Results are presented in easy-to-read tables, visualized in an interactive tree map, pie chart and bar chart, and are available for download along with a Glossary of tags.
+**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract and tag entities in your text data.
+**Usage Limits:** You can request results unlimited times for one (1) month.
+**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL. For any errors or inquiries, please contact us at info@nlpblogs.com""")
 with st.sidebar:
     st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.")
     st.session_state.results_df = pd.DataFrame()
     st.session_state.elapsed_time = 0.0
 st.button("Clear text", on_click=clear_text)
 # --- Results Section ---
             st.session_state.last_text = text
             start_time = time.time()
             with st.spinner("Extracting entities...", show_time=True):
+                # Pass the raw text directly to the model
+                entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 st.session_state.results_df = df
                 if not df.empty: