Spaces:
Sleeping
Sleeping
Made all the changes for app.py for working
Browse files
app.py
CHANGED
|
@@ -150,17 +150,10 @@ def ner_inference(txt):
|
|
| 150 |
def ner_inference_long_text(txt):
|
| 151 |
entities = []
|
| 152 |
doc = nlp(txt)
|
| 153 |
-
n_sents = len([_ for _ in doc.sents])
|
| 154 |
-
n = 0
|
| 155 |
-
progress_bar = st.progress(0, text=f'Processed 0 / {n_sents} sentences')
|
| 156 |
for sent in doc.sents:
|
| 157 |
-
entities.
|
| 158 |
-
n += 1
|
| 159 |
-
progress_bar.progress(n / n_sents, text=f'Processed {n} / {n_sents} sentences')
|
| 160 |
-
# progress_bar.empty()
|
| 161 |
return entities
|
| 162 |
|
| 163 |
-
|
| 164 |
def get_ner_text(article_txt, ner_result):
|
| 165 |
res_txt = ''
|
| 166 |
start = 0
|
|
@@ -188,7 +181,7 @@ def get_ner_text(article_txt, ner_result):
|
|
| 188 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
|
| 189 |
SUMM_CHECKPOINT = "facebook/bart-base"
|
| 190 |
SUMM_INPUT_N_TOKENS = 400
|
| 191 |
-
SUMM_TARGET_N_TOKENS =
|
| 192 |
|
| 193 |
@st.cache_resource
|
| 194 |
def load_summarizer_models():
|
|
@@ -242,166 +235,20 @@ def summ_inference(txt: str):
|
|
| 242 |
|
| 243 |
############## ENTRY POINT START #######################
|
| 244 |
def main():
|
| 245 |
-
st.markdown('''<h3>Text Summarizer</h3>
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
# </p>
|
| 252 |
-
|
| 253 |
-
''', unsafe_allow_html=True)
|
| 254 |
-
input_type = st.radio('Select an option:', ['Paste news URL', 'Paste news text'],
|
| 255 |
-
horizontal=True)
|
| 256 |
-
|
| 257 |
-
scrape_error = None
|
| 258 |
-
summary_error = None
|
| 259 |
-
ner_error = None
|
| 260 |
-
summ_result = None
|
| 261 |
-
ner_result = None
|
| 262 |
-
ner_df = None
|
| 263 |
-
article_txt = None
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
if input_type == 'Paste news URL':
|
| 267 |
-
article_url = st.text_input("Paste the URL of a news article", "")
|
| 268 |
-
|
| 269 |
-
if (st.button("Submit")) or (article_url):
|
| 270 |
-
with st.status("Processing...", expanded=True) as status:
|
| 271 |
-
status.empty()
|
| 272 |
-
# Scraping data Start
|
| 273 |
-
try:
|
| 274 |
-
st.info("Scraping data from the URL.", icon="ℹ️")
|
| 275 |
-
article_txt = scrape_text(article_url)
|
| 276 |
-
st.success("Successfully scraped the data.", icon="✅")
|
| 277 |
-
except Exception as e:
|
| 278 |
-
article_txt = None
|
| 279 |
-
scrape_error = str(e)
|
| 280 |
-
|
| 281 |
-
# Scraping data End
|
| 282 |
-
|
| 283 |
-
if article_txt is not None:
|
| 284 |
-
article_txt = re.sub(r'\n+',' ', article_txt)
|
| 285 |
-
|
| 286 |
-
# Generating summary start
|
| 287 |
-
|
| 288 |
-
try:
|
| 289 |
-
st.info("Generating the summary.", icon="ℹ️")
|
| 290 |
-
summ_result = summ_inference(article_txt)
|
| 291 |
-
except Exception as e:
|
| 292 |
-
summ_result = None
|
| 293 |
-
summary_error = str(e)
|
| 294 |
-
if summ_result is not None:
|
| 295 |
-
st.success("Successfully generated the summary.", icon="✅")
|
| 296 |
-
else:
|
| 297 |
-
st.error("Encountered an error while generating the summary.", icon="🚨")
|
| 298 |
-
|
| 299 |
-
# Generating summary end
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
# NER start
|
| 303 |
-
try:
|
| 304 |
-
st.info("Recognizing the entites.", icon="ℹ️")
|
| 305 |
-
ner_result = [[ent, label.upper(), np.round(prob, 3)]
|
| 306 |
-
for ent, label, prob in ner_inference_long_text(article_txt)]
|
| 307 |
-
|
| 308 |
-
ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
|
| 309 |
-
|
| 310 |
-
ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
|
| 311 |
-
|
| 312 |
-
except Exception as e:
|
| 313 |
-
ner_result = None
|
| 314 |
-
ner_error = str(e)
|
| 315 |
-
if ner_result is not None:
|
| 316 |
-
st.success("Successfully recognized the entites.", icon="✅")
|
| 317 |
-
else:
|
| 318 |
-
st.error("Encountered an error while recognizing the entites.", icon="🚨")
|
| 319 |
-
|
| 320 |
-
# NER end
|
| 321 |
-
else:
|
| 322 |
-
st.error("Encountered an error while scraping the data.", icon="🚨")
|
| 323 |
-
|
| 324 |
-
if (scrape_error is None) and (summary_error is None) and (ner_error is None):
|
| 325 |
-
status.update(label="Done", state="complete", expanded=False)
|
| 326 |
-
else:
|
| 327 |
-
status.update(label="Error", state="error", expanded=False)
|
| 328 |
-
|
| 329 |
-
if scrape_error is not None:
|
| 330 |
-
st.error(f"Scrape Error: \n{scrape_error}", icon="🚨")
|
| 331 |
-
else:
|
| 332 |
-
if summary_error is not None:
|
| 333 |
-
st.error(f"Summary Error: \n{summary_error}", icon="🚨")
|
| 334 |
-
else:
|
| 335 |
-
st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
|
| 336 |
-
|
| 337 |
-
if ner_error is not None:
|
| 338 |
-
st.error(f"NER Error \n{ner_error}", icon="🚨")
|
| 339 |
-
else:
|
| 340 |
-
st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
|
| 341 |
-
# st.dataframe(ner_df, use_container_width=True)
|
| 342 |
-
|
| 343 |
-
st.markdown(f"<h4>SCRAPED TEXT:</h4>{article_txt}", unsafe_allow_html=True)
|
| 344 |
-
|
| 345 |
-
else:
|
| 346 |
-
article_txt = st.text_area("Paste the text of a news article", "", height=150)
|
| 347 |
-
|
| 348 |
-
if (st.button("Submit")) or (article_txt):
|
| 349 |
-
with st.status("Processing...", expanded=True) as status:
|
| 350 |
-
article_txt = re.sub(r'\n+',' ', article_txt)
|
| 351 |
-
|
| 352 |
-
# Generating summary start
|
| 353 |
-
|
| 354 |
-
try:
|
| 355 |
-
st.info("Generating the summary.", icon="ℹ️")
|
| 356 |
-
summ_result = summ_inference(article_txt)
|
| 357 |
-
except Exception as e:
|
| 358 |
-
summ_result = None
|
| 359 |
-
summary_error = str(e)
|
| 360 |
-
if summ_result is not None:
|
| 361 |
-
st.success("Successfully generated the summary.", icon="✅")
|
| 362 |
-
else:
|
| 363 |
-
st.error("Encountered an error while generating the summary.", icon="🚨")
|
| 364 |
-
|
| 365 |
-
# Generating summary end
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
# NER start
|
| 369 |
-
try:
|
| 370 |
-
st.info("Recognizing the entites.", icon="ℹ️")
|
| 371 |
-
ner_result = [[ent, label.upper(), np.round(prob, 3)]
|
| 372 |
for ent, label, prob in ner_inference_long_text(article_txt)]
|
| 373 |
-
|
| 374 |
-
ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
|
| 375 |
-
|
| 376 |
-
ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
|
| 377 |
-
|
| 378 |
-
except Exception as e:
|
| 379 |
-
ner_result = None
|
| 380 |
-
ner_error = str(e)
|
| 381 |
-
if ner_result is not None:
|
| 382 |
-
st.success("Successfully recognized the entites.", icon="✅")
|
| 383 |
-
else:
|
| 384 |
-
st.error("Encountered an error while recognizing the entites.", icon="🚨")
|
| 385 |
-
|
| 386 |
-
# NER end
|
| 387 |
-
|
| 388 |
-
if (summary_error is None) and (ner_error is None):
|
| 389 |
-
status.update(label="Done", state="complete", expanded=False)
|
| 390 |
-
else:
|
| 391 |
-
status.update(label="Error", state="error", expanded=False)
|
| 392 |
-
|
| 393 |
-
if summary_error is not None:
|
| 394 |
-
st.error(f"Summary Error: \n{summary_error}", icon="🚨")
|
| 395 |
-
else:
|
| 396 |
-
st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
|
| 397 |
-
|
| 398 |
-
if ner_error is not None:
|
| 399 |
-
st.error(f"NER Error \n{ner_error}", icon="🚨")
|
| 400 |
-
else:
|
| 401 |
-
st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
|
| 402 |
-
# st.dataframe(ner_df, use_container_width=True)
|
| 403 |
|
|
|
|
|
|
|
| 404 |
|
|
|
|
|
|
|
|
|
|
| 405 |
|
| 406 |
############## ENTRY POINT END #######################
|
| 407 |
|
|
|
|
| 150 |
def ner_inference_long_text(txt):
|
| 151 |
entities = []
|
| 152 |
doc = nlp(txt)
|
|
|
|
|
|
|
|
|
|
| 153 |
for sent in doc.sents:
|
| 154 |
+
entities.extends(ner_inference(sent.text))
|
|
|
|
|
|
|
|
|
|
| 155 |
return entities
|
| 156 |
|
|
|
|
| 157 |
def get_ner_text(article_txt, ner_result):
|
| 158 |
res_txt = ''
|
| 159 |
start = 0
|
|
|
|
| 181 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
|
| 182 |
SUMM_CHECKPOINT = "facebook/bart-base"
|
| 183 |
SUMM_INPUT_N_TOKENS = 400
|
| 184 |
+
SUMM_TARGET_N_TOKENS = 100
|
| 185 |
|
| 186 |
@st.cache_resource
|
| 187 |
def load_summarizer_models():
|
|
|
|
| 235 |
|
| 236 |
############## ENTRY POINT START #######################
|
| 237 |
def main():
|
| 238 |
+
st.markdown('''<h3>Text Summarizer</h3>
|
| 239 |
+
#<p><a href="https://huggingface.co/spaces/Sravan1214/news-summarizer-ner/blob/main/README.md" target="_blank">README</a></p>''', unsafe_allow_html=True)
|
| 240 |
+
article_txt = st.text_area("Paste the text (the longer, the better):", "", height=200)
|
| 241 |
+
article_txt = re.sub(r'\n+',' ', article_txt)
|
| 242 |
+
if st.button("Submit"):
|
| 243 |
+
ner_result = [[ent, label.upper(), np.round(prob, 3)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
for ent, label, prob in ner_inference_long_text(article_txt)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
+
ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
|
| 247 |
+
summ_result = summ_inference(article_txt)
|
| 248 |
|
| 249 |
+
ner_txt = get_ner_text(article_txt, ner_result).replace('$', '\$')
|
| 250 |
+
|
| 251 |
+
st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
|
| 252 |
|
| 253 |
############## ENTRY POINT END #######################
|
| 254 |
|