Spaces:
Running
Running
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| """ | |
| `spaCy-entity-linker` demo from | |
| <https://github.com/egerber/spaCy-entity-linker/issues/18> | |
| """ | |
| from icecream import ic # pylint: disable=E0401 | |
| import spacy # pylint: disable=E0401 | |
| import spacy_entity_linker as sel # pylint: disable=E0401 | |
| def link_wikidata ( | |
| doc: spacy.tokens.doc.Doc, | |
| ) -> None: | |
| """ | |
| Run an entity linking classifier for wikidata | |
| """ | |
| classifier = sel.EntityClassifier.EntityClassifier() | |
| for ent in doc.ents: | |
| print() | |
| ic(ent.text, ent.label_) | |
| # build a term (a simple span) then identify all | |
| # the candidate entities for it | |
| term: sel.TermCandidate = sel.TermCandidate.TermCandidate(ent) | |
| candidates: sel.EntityCandidates.EntityCandidates = term.get_entity_candidates() | |
| ic(candidates) | |
| if len(candidates) > 0: | |
| # select the best candidate | |
| entity: sel.EntityElement.EntityElement = classifier(candidates) | |
| ic(entity.__dict__) | |
| ic(entity.get_sub_entities(limit=10)) | |
| ic(entity.get_super_entities(limit=10)) | |
| if __name__ == "__main__": | |
| SRC_TEXT: str = """ | |
| Werner Herzog is a remarkable filmmaker and an intellectual originally from Germany, the son of Dietrich Herzog. | |
| After the war, Werner fled to America to become famous. | |
| """ | |
| # initialize language model | |
| nlp: spacy.Language = spacy.load("en_core_web_sm") | |
| sample_doc: spacy.tokens.doc.Doc = nlp(SRC_TEXT.strip()) | |
| link_wikidata(sample_doc) | |