Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| from presidio_analyzer import ( | |
| RecognizerResult, | |
| EntityRecognizer, | |
| AnalysisExplanation, | |
| ) | |
| from presidio_analyzer.nlp_engine import NlpEngine, SpacyNlpEngine, NlpArtifacts | |
| class TransformerRecognizer(EntityRecognizer): | |
| def __init__( | |
| self, | |
| model_id_or_path, | |
| mapping_labels, | |
| aggregation_strategy="simple", | |
| supported_language="fr", | |
| ignore_labels=["O", "MISC"], | |
| ): | |
| # inits transformers pipeline for given mode or path | |
| self.pipeline = pipeline( | |
| "token-classification", model=model_id_or_path, aggregation_strategy=aggregation_strategy, ignore_labels=ignore_labels | |
| ) | |
| # map labels to presidio labels | |
| self.label2presidio = mapping_labels | |
| # passes entities from model into parent class | |
| super().__init__(supported_entities=list(self.label2presidio.values()), supported_language=supported_language) | |
| def load(self) -> None: | |
| """No loading is required.""" | |
| pass | |
| def analyze( | |
| self, text: str, entities = None, nlp_artifacts: NlpArtifacts = None | |
| ): | |
| """ | |
| Extracts entities using Transformers pipeline | |
| """ | |
| results = [] | |
| predicted_entities = self.pipeline(text) | |
| if len(predicted_entities) > 0: | |
| for e in predicted_entities: | |
| if(e['entity_group'] not in self.label2presidio): | |
| continue | |
| converted_entity = self.label2presidio[e["entity_group"]] | |
| if converted_entity in entities or entities is None: | |
| results.append( | |
| RecognizerResult( | |
| entity_type=converted_entity, start=e["start"], end=e["end"], score=e["score"] | |
| ) | |
| ) | |
| return results |