Spaces:
Runtime error
Runtime error
| from transformers import AutoTokenizer, AutoModelForTokenClassification | |
| from transformers import pipeline | |
| class NamedEntityRecognition(): | |
| def __init__(self): | |
| tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-large-finetuned-conll03-english") | |
| model = AutoModelForTokenClassification.from_pretrained("xlm-roberta-large-finetuned-conll03-english") | |
| self.nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True) | |
| def get_annotation(self, preds, text): | |
| splits = [0] | |
| entities = {} | |
| for i in preds: | |
| splits.append(i['start']) | |
| splits.append(i['end']) | |
| entities[i['word']] = i['entity_group'] | |
| # Exclude bad preds | |
| exclude = ['', '.', '. ', ' '] | |
| for x in exclude: | |
| if x in entities.keys(): | |
| entities.pop(x) | |
| parts = [text[i:j] for i, j in zip(splits, splits[1:] + [None])] | |
| final_annotation = [(x, entities[x], "") if x in entities.keys() else x for x in parts] | |
| return final_annotation | |
| def classify(self, text): | |
| preds = self.nlp(text) | |
| ner_annotation = self.get_annotation(preds, text) | |
| return preds, ner_annotation |