Spaces:
Runtime error
Runtime error
| import json | |
| import numpy as np | |
| from sqlalchemy import ForeignKey, Integer, LargeBinary, String | |
| from sqlalchemy.orm import Mapped, mapped_column, relationship | |
| from sqlalchemy.types import TypeDecorator | |
| from components.dbo.models.base import Base | |
| from components.dbo.models.dataset import Dataset | |
| class JSONType(TypeDecorator): | |
| """Тип для хранения JSON в SQLite.""" | |
| impl = String | |
| cache_ok = True | |
| def process_bind_param(self, value, dialect): | |
| """Сохранение dict в JSON строку.""" | |
| if value is None: | |
| return None | |
| return json.dumps(value) | |
| def process_result_value(self, value, dialect): | |
| """Загрузка JSON строки в dict.""" | |
| if value is None: | |
| return None | |
| return json.loads(value) | |
| class EmbeddingType(TypeDecorator): | |
| """Тип для хранения эмбеддингов в SQLite.""" | |
| impl = LargeBinary | |
| cache_ok = True | |
| def process_bind_param(self, value, dialect): | |
| """Сохранение numpy array в базу.""" | |
| if value is None: | |
| return None | |
| # Убеждаемся, что массив двумерный перед сохранением | |
| value = np.asarray(value, dtype=np.float32) | |
| if value.ndim == 1: | |
| value = value.reshape(1, -1) | |
| return value.tobytes() | |
| def process_result_value(self, value, dialect): | |
| """Загрузка из базы в numpy array.""" | |
| if value is None: | |
| return None | |
| return np.frombuffer(value, dtype=np.float32) | |
| class EntityModel(Base): | |
| """ | |
| SQLAlchemy модель для хранения сущностей. | |
| """ | |
| __tablename__ = "entity" | |
| uuid: Mapped[str] = mapped_column(String, unique=True) | |
| name: Mapped[str] = mapped_column(String, nullable=False) | |
| text: Mapped[str] = mapped_column(String, nullable=False) | |
| in_search_text: Mapped[str] = mapped_column(String, nullable=True) | |
| entity_type: Mapped[str] = mapped_column(String, nullable=False) | |
| # Поля для связей (триплетный подход) | |
| source_id: Mapped[str] = mapped_column(String, nullable=True) | |
| target_id: Mapped[str] = mapped_column(String, nullable=True) | |
| number_in_relation: Mapped[int] = mapped_column(Integer, nullable=True) | |
| # Поле для индекса чанка в документе | |
| chunk_index: Mapped[int] = mapped_column(Integer, nullable=True) | |
| # JSON-поле для хранения метаданных | |
| metadata_json: Mapped[dict] = mapped_column(JSONType, nullable=True) | |
| embedding: Mapped[np.ndarray] = mapped_column(EmbeddingType, nullable=True) | |
| dataset_id: Mapped[int] = mapped_column(Integer, ForeignKey("dataset.id"), nullable=False) | |
| dataset: Mapped["Dataset"] = relationship( | |
| "Dataset", | |
| back_populates="entities", | |
| cascade="all", | |
| ) | |