Text_Summarzier

Sleeping

App Files Files Community

Sravan1214 commited on Apr 25, 2024

Commit

91d58be

verified ·

1 Parent(s): 41451bd

Made all the changes for app.py for working

Browse files

Files changed (1) hide show

app.py +13 -166

app.py CHANGED Viewed

@@ -150,17 +150,10 @@ def ner_inference(txt):
 def ner_inference_long_text(txt):
     entities = []
     doc = nlp(txt)
-    n_sents = len([_ for _ in doc.sents])
-    n = 0
-    progress_bar = st.progress(0, text=f'Processed 0 / {n_sents} sentences')
     for sent in doc.sents:
-        entities.extend(ner_inference(sent.text))
-        n += 1
-        progress_bar.progress(n / n_sents, text=f'Processed {n} / {n_sents} sentences')
-    # progress_bar.empty()
     return entities
 def get_ner_text(article_txt, ner_result):
     res_txt = ''
     start = 0
@@ -188,7 +181,7 @@ def get_ner_text(article_txt, ner_result):
 ############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
 SUMM_CHECKPOINT = "facebook/bart-base"
 SUMM_INPUT_N_TOKENS = 400
-SUMM_TARGET_N_TOKENS = 300
 @st.cache_resource
 def load_summarizer_models():
@@ -242,166 +235,20 @@ def summ_inference(txt: str):
 ############## ENTRY POINT START #######################
 def main():
-    st.markdown('''<h3>Text Summarizer</h3>
-    # <p><a href="https://huggingface.co/spaces/Sravan1214/news_summarizer_ner/blob/main/README.md#new-summarization-and-ner" target="_blank">README</a>
-    # <br>
-    # The app works best in summarizing <a href="https://edition.cnn.com/" target="_blank">CNN</a> and
-    # <a href="https://www.dailymail.co.uk/home/index.html" target="_blank">Daily Mail</a> news articles,
-    # as the BART model is fine-tuned on them.
-    # </p>
-    ''', unsafe_allow_html=True)
-    input_type = st.radio('Select an option:', ['Paste news URL', 'Paste news text'],
-                      horizontal=True)
-    scrape_error = None
-    summary_error = None
-    ner_error = None
-    summ_result = None
-    ner_result = None
-    ner_df = None
-    article_txt = None
-    if input_type == 'Paste news URL':
-        article_url = st.text_input("Paste the URL of a news article", "")
-        if (st.button("Submit")) or (article_url):
-            with st.status("Processing...", expanded=True) as status:
-                status.empty()
-                # Scraping data Start
-                try:
-                    st.info("Scraping data from the URL.", icon="ℹ️")
-                    article_txt = scrape_text(article_url)
-                    st.success("Successfully scraped the data.", icon="✅")
-                except Exception as e:
-                    article_txt = None
-                    scrape_error = str(e)
-                # Scraping data End
-                if article_txt is not None:
-                    article_txt = re.sub(r'\n+',' ', article_txt)
-                    # Generating summary start
-                    try:
-                        st.info("Generating the summary.", icon="ℹ️")
-                        summ_result = summ_inference(article_txt)
-                    except Exception as e:
-                        summ_result = None
-                        summary_error = str(e)
-                    if summ_result is not None:
-                        st.success("Successfully generated the summary.", icon="✅")
-                    else:
-                        st.error("Encountered an error while generating the summary.", icon="🚨")
-                    # Generating summary end
-                    # NER start
-                    try:
-                        st.info("Recognizing the entites.", icon="ℹ️")
-                        ner_result = [[ent, label.upper(), np.round(prob, 3)]
-                                      for ent, label, prob in ner_inference_long_text(article_txt)]
-                        ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
-                        ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
-                    except Exception as e:
-                        ner_result = None
-                        ner_error = str(e)
-                    if ner_result is not None:
-                        st.success("Successfully recognized the entites.", icon="✅")
-                    else:
-                        st.error("Encountered an error while recognizing the entites.", icon="🚨")
-                    # NER end
-                else:
-                    st.error("Encountered an error while scraping the data.", icon="🚨")
-                if (scrape_error is None) and (summary_error is None) and (ner_error is None):
-                    status.update(label="Done", state="complete", expanded=False)
-                else:
-                    status.update(label="Error", state="error", expanded=False)
-            if scrape_error is not None:
-                st.error(f"Scrape Error:  \n{scrape_error}", icon="🚨")
-            else:
-                if summary_error is not None:
-                    st.error(f"Summary Error:  \n{summary_error}", icon="🚨")
-                else:
-                    st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
-                if ner_error is not None:
-                    st.error(f"NER Error  \n{ner_error}", icon="🚨")
-                else:
-                    st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
-                    # st.dataframe(ner_df, use_container_width=True)
-                st.markdown(f"<h4>SCRAPED TEXT:</h4>{article_txt}", unsafe_allow_html=True)
-    else:
-        article_txt = st.text_area("Paste the text of a news article", "", height=150)
-        if (st.button("Submit")) or (article_txt):
-            with st.status("Processing...", expanded=True) as status:
-                article_txt = re.sub(r'\n+',' ', article_txt)
-                # Generating summary start
-                try:
-                    st.info("Generating the summary.", icon="ℹ️")
-                    summ_result = summ_inference(article_txt)
-                except Exception as e:
-                    summ_result = None
-                    summary_error = str(e)
-                if summ_result is not None:
-                    st.success("Successfully generated the summary.", icon="✅")
-                else:
-                    st.error("Encountered an error while generating the summary.", icon="🚨")
-                # Generating summary end
-                # NER start
-                try:
-                    st.info("Recognizing the entites.", icon="ℹ️")
-                    ner_result = [[ent, label.upper(), np.round(prob, 3)]
                                   for ent, label, prob in ner_inference_long_text(article_txt)]
-                    ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
-                    ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
-                except Exception as e:
-                    ner_result = None
-                    ner_error = str(e)
-                if ner_result is not None:
-                    st.success("Successfully recognized the entites.", icon="✅")
-                else:
-                    st.error("Encountered an error while recognizing the entites.", icon="🚨")
-                    # NER end
-                if (summary_error is None) and (ner_error is None):
-                    status.update(label="Done", state="complete", expanded=False)
-                else:
-                    status.update(label="Error", state="error", expanded=False)
-            if summary_error is not None:
-                st.error(f"Summary Error:  \n{summary_error}", icon="🚨")
-            else:
-                st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
-            if ner_error is not None:
-                st.error(f"NER Error  \n{ner_error}", icon="🚨")
-            else:
-                st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
-                # st.dataframe(ner_df, use_container_width=True)
 ############## ENTRY POINT END #######################

 def ner_inference_long_text(txt):
     entities = []
     doc = nlp(txt)
     for sent in doc.sents:
+        entities.extends(ner_inference(sent.text))
     return entities
 def get_ner_text(article_txt, ner_result):
     res_txt = ''
     start = 0
 ############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
 SUMM_CHECKPOINT = "facebook/bart-base"
 SUMM_INPUT_N_TOKENS = 400
+SUMM_TARGET_N_TOKENS = 100
 @st.cache_resource
 def load_summarizer_models():
 ############## ENTRY POINT START #######################
 def main():
+    st.markdown('''<h3>Text Summarizer</h3>
+    #<p><a href="https://huggingface.co/spaces/Sravan1214/news-summarizer-ner/blob/main/README.md" target="_blank">README</a></p>''', unsafe_allow_html=True)
+    article_txt = st.text_area("Paste the text (the longer, the better):", "", height=200)
+    article_txt = re.sub(r'\n+',' ', article_txt)
+    if st.button("Submit"):
+        ner_result = [[ent, label.upper(), np.round(prob, 3)]
                                   for ent, label, prob in ner_inference_long_text(article_txt)]
+        ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
+        summ_result = summ_inference(article_txt)
+        ner_txt = get_ner_text(article_txt, ner_result).replace('$', '\$')
+        st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
 ############## ENTRY POINT END #######################