Spaces:
Runtime error
Runtime error
| import logging | |
| from dataclasses import dataclass | |
| import pandas as pd | |
| from components.parser.abbreviations.abbreviation import Abbreviation, AbbreviationType | |
| logger = logging.getLogger(__name__) | |
| class AbbreviationsCollection: | |
| items: list[Abbreviation] | |
| def to_pandas(self) -> pd.DataFrame: | |
| """ | |
| Преобразование всех сокращений в DataFrame. | |
| Returns: | |
| pd.DataFrame: DataFrame с сокращениями | |
| """ | |
| logger.debug(f"Items: {self.items}") | |
| all_data = [ | |
| { | |
| 'ShortWord': abbr.short_form, | |
| 'LongText': abbr.full_form, | |
| 'AbbreviationType': abbr.abbreviation_type, | |
| 'DocumentId': abbr.document_id, | |
| } | |
| for abbr in self.items | |
| if abbr.abbreviation_type != AbbreviationType.UNKNOWN | |
| ] | |
| logger.info(f'Approved abbreviations: {len(all_data)}') | |
| logger.info(f'Rejected abbreviations: {len(self.items) - len(all_data)}') | |
| return pd.DataFrame(all_data) | |
| def from_pandas(cls, df: pd.DataFrame) -> 'AbbreviationsCollection': | |
| """ | |
| Создание коллекции аббревиатур из pandas DataFrame. | |
| """ | |
| all_data = [] | |
| for _, row in df.iterrows(): | |
| try: | |
| abbreviation = Abbreviation( | |
| short=row['short'], | |
| full=row['full'], | |
| document_id=row['document_id'], | |
| ) | |
| all_data.append(abbreviation) | |
| except Exception as e: | |
| logger.warning( | |
| f'Failed to create abbreviation from row: {row}. Error: {e}' | |
| ) | |
| continue | |
| logger.info(f'Created abbreviations collection with {len(all_data)} items') | |
| logger.debug( | |
| 'First 5 abbreviations: %s', ', '.join(str(abbr) for abbr in all_data[:5]) | |
| ) | |
| return cls(all_data) | |