Spaces:
Runtime error
Runtime error
| import logging | |
| import pandas as pd | |
| from sqlalchemy.orm import Session | |
| from components.dbo.models.acronym import Acronym | |
| from components.dbo.models.dataset import Dataset | |
| from components.dbo.models.dataset_document import DatasetDocument | |
| from schemas.acronym import AcronymCollectionResponse | |
| logger = logging.getLogger(__name__) | |
| class AcronymService: | |
| """ | |
| Сервис для работы с аббревиатурами и сокращениями. | |
| """ | |
| def __init__(self, db: Session): | |
| logger.info("Initializing AcronymService") | |
| self.db = db | |
| def from_pandas(self, df: pd.DataFrame) -> None: | |
| """ | |
| Загрузить аббревиатуры и сокращения из pandas DataFrame. | |
| Args: | |
| df: DataFrame со столбцами document_id, short_form, full_form, type | |
| """ | |
| logger.info(f"Loading acronyms from DataFrame with {len(df)} rows") | |
| with self.db() as session: | |
| try: | |
| # Process each row in the DataFrame | |
| for _, row in df.iterrows(): | |
| # Create acronym | |
| acronym = Acronym( | |
| short_form=row['short_form'], | |
| full_form=row['full_form'], | |
| type=row['type'], | |
| document_id=( | |
| int(row['document_id']) | |
| if pd.notna(row['document_id']) | |
| else None | |
| ), | |
| ) | |
| session.add(acronym) | |
| session.commit() | |
| logger.info("Successfully loaded all acronyms") | |
| except Exception as e: | |
| session.rollback() | |
| logger.error(f"Error processing acronyms: {str(e)}") | |
| raise e | |
| finally: | |
| session.close() | |
| def get_abbreviations(self, document_id: int) -> list[Acronym]: | |
| """ | |
| Получить аббревиатуры и сокращения для документа. | |
| """ | |
| logger.info(f"Getting abbreviations for document {document_id}") | |
| with self.db() as session: | |
| result = ( | |
| session.query(Acronym) | |
| .filter( | |
| (Acronym.document_id == document_id) | (Acronym.document_id == None) | |
| ) | |
| .all() | |
| ) | |
| logger.debug(f"Found {len(result)} abbreviations for document {document_id}") | |
| return result | |
| def get_abbreviations_by_dataset_id(self, dataset_id: int) -> list[Acronym]: | |
| """ | |
| Получить аббревиатуры и сокращения для документа. | |
| """ | |
| logger.info(f"Getting abbreviations for dataset {dataset_id}") | |
| return self._get_acronyms_for_dataset(dataset_id) | |
| def get_current_acronyms(self) -> AcronymCollectionResponse: | |
| """ | |
| Получить аббревиатуры и сокращения для текущего активного набора данных. | |
| """ | |
| logger.info("Getting acronyms for current active dataset") | |
| with self.db() as session: | |
| active_dataset: Dataset = session.query(Dataset).filter(Dataset.is_active == True).first() | |
| if not active_dataset: | |
| logger.warning("No active dataset found") | |
| return AcronymCollectionResponse( | |
| collection_id=0, | |
| collection_name="", | |
| collection_filename="", | |
| updated_at=None, | |
| acronyms={}, | |
| ) | |
| result = self._get_acronyms_for_dataset(active_dataset.id) | |
| return AcronymCollectionResponse( | |
| collection_id=active_dataset.id, | |
| collection_name=active_dataset.name, | |
| collection_filename='', | |
| updated_at=active_dataset.date_created, #TODO: Что? | |
| acronyms=self._compress_acronyms(result), | |
| ) | |
| def _get_acronyms_for_dataset(self, dataset_id: int) -> list[Acronym]: | |
| """ | |
| Получить список акронимов для датасета. | |
| Args: | |
| dataset_id: ID датасета | |
| Returns: | |
| list[Acronym]: Список акронимов | |
| """ | |
| with self.db() as session: | |
| try: | |
| document_ids = ( | |
| session.query(DatasetDocument.document_id) | |
| .filter(DatasetDocument.id == dataset_id) | |
| .all() | |
| ) | |
| result = ( | |
| session.query(Acronym) | |
| .filter( | |
| (Acronym.document_id.in_([doc_id[0] for doc_id in document_ids])) | (Acronym.document_id == None) | |
| ) | |
| .all() | |
| ) | |
| logger.debug(f"Found {len(result)} acronyms for dataset {dataset_id}") | |
| return result | |
| finally: | |
| pass | |
| def _compress_acronyms(self, acronyms: list[Acronym]) -> dict[str, list[str]]: | |
| """ | |
| Сжать аббревиатуры и сокращения в словарь. | |
| """ | |
| short_forms = {acronym.short_form for acronym in acronyms} | |
| compressed = { | |
| short_form: [ | |
| acronym.full_form | |
| for acronym in acronyms | |
| if acronym.short_form == short_form | |
| ] | |
| for short_form in short_forms | |
| } | |
| logger.debug(f"Compressed {len(acronyms)} acronyms into {len(compressed)} unique short forms") | |
| return compressed | |