Spaces:

som11
/

named_entity_recognition

Sleeping

App Files Files Community

named_entity_recognition / app.py

som11

Update app.py

5e37eca verified over 1 year ago

raw

history blame contribute delete

3.63 kB

	import streamlit as st
	import spacy
	from spacy import displacy
	from transformers import AutoTokenizer, AutoModelForTokenClassification
	from transformers import pipeline
	import pandas as pd


	st.title('Named Entity Recognizer')

	st.write('Named Entity Recognition (NER) is like a smart highlighter that scans through text and highlights important words, such as people’s names, places, companies, and dates.')

	st.write('')

	with st.form(key='form_named_entity_recognition'):

	input_from_user = st.text_area('enter your input')

	model_options = st.selectbox('choose a model', ('Choose a model', 'Spacy\'s en_core_web_sm model', 'dslim/bert-base-NER model'))

	submit_button = st.form_submit_button('Submit')


	if submit_button:

	if input_from_user == '':

	st.error('empty form submitted')

	else:

	if model_options == 'Choose a model':

	st.error('Please choose a model for named entity recognition')

	else:

	st.subheader('Result Analysis')

	if model_options == 'Choose a model':

	st.error('Please choose a model for Named Entity Recognition')

	elif model_options == 'Spacy\'s en_core_web_sm model':

	st.write('Model Used for Named Entity Recognition:')
	st.success(model_options)

	spacy_model = spacy.load('en_core_web_sm')

	res = spacy_model(input_from_user)

	st.write(f'Analysis of the detected entities from the text ==>')
	st.markdown(f'{input_from_user}')

	entities = [{'Entity': entity.text, 'Label of the Entity': entity.label_, 'Description of the Label': spacy.explain(entity.label_)} for entity in res.ents]

	df = pd.DataFrame(entities)

	st.table(df)

	st.write('Entites marked in the input text:')
	st.markdown(displacy.render(res, style='ent'), unsafe_allow_html=True)

	elif model_options == 'dslim/bert-base-NER model':

	st.write('Model Used for Named Entity Recognition:')
	st.success(model_options)

	tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
	model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

	bert_ner_model = pipeline('ner', model=model, tokenizer=tokenizer)

	res = bert_ner_model(input_from_user)

	abbreviations = {
	"O": "Outside of a named entity",
	"B-MISC": "Beginning of a miscellaneous entity right after another miscellaneous entity",
	"I-MISC": "Miscellaneous entity",
	"B-PER": "Beginning of a person’s name right after another person’s name",
	"I-PER": "Person’s name",
	"B-ORG": "Beginning of an organization right after another organization",
	"I-ORG": "Organization",
	"B-LOC": "Beginning of a location right after another location",
	"I-LOC": "Location"
	}

	st.write(f'Analysis of the detected entities from the text ==>')
	st.markdown(f'{input_from_user}')

	entities = [{'Entity': input_from_user[entity['start']:entity['end']], 'Label of the Entity': entity['entity'], 'Description of the Label': abbreviations.get(entity['entity'])} for entity in res]

	df = pd.DataFrame(entities)

	st.table(df)