Spaces:
Sleeping
Sleeping
| # app/core/state.py | |
| # | |
| # Description: | |
| # This module holds the shared, in-memory state of the application. | |
| # It is initialized during startup and used by various services to avoid | |
| # reloading large models and data for each API request. | |
| import torch | |
| import numpy as np | |
| from typing import Optional, Dict, List | |
| from sentence_transformers import SentenceTransformer | |
| from openai import OpenAI | |
| # --- Application State Variables --- | |
| # Flag to indicate if all startup data has been loaded successfully. | |
| v2_data_loaded: bool = False | |
| artifacts_loaded: bool = False | |
| reranker_model_loaded: bool = False | |
| reranker_model: Optional[any] = None | |
| # Device to use for torch operations (cuda or cpu). | |
| device: Optional[torch.device] = None | |
| # The loaded query encoder model. | |
| query_encoder_model: Optional[SentenceTransformer] = None | |
| # --- FIX: Add the missing OpenAI client attribute --- | |
| # The loaded OpenAI client instance. | |
| openai_client: Optional[OpenAI] = None | |
| # --- END OF FIX --- | |
| # --- Artifacts for Pre-computed Retrieval --- | |
| # Pre-transformed and normalized chunk embeddings (numpy array). | |
| transformed_chunk_embeddings: Optional[np.ndarray] = None | |
| # List of chunk IDs in the same order as the embeddings. | |
| chunk_ids_in_order: Optional[List[str]] = None | |
| # The learned 'Wq' weight matrix for transforming query embeddings (torch tensor). | |
| wq_weights: Optional[torch.Tensor] = None | |
| # The learned temperature scalar for scaling similarity scores. | |
| temperature: Optional[float] = None | |
| # --- Content Maps --- | |
| # Maps chunk IDs to their text content. | |
| chunk_content_map: Dict[str, str] = {} | |
| # Maps chunk IDs to their metadata (e.g., original file, page ID). | |
| chunk_metadata_map: Dict[str, Dict] = {} | |
| # This dictionary will map a chunk_id to its sequential chunk_type (e.g., "Direct Participant Part 1") | |
| chunk_sequence_map: Dict[str, str] = {} | |
| # This flag tracks if the map has been loaded from Neo4j successfully | |
| chunk_sequence_map_loaded: bool = False | |
| # --- Sequence Organizer State --- | |
| # This dictionary will map a sequence's base name to a sorted list of its parts. | |
| # e.g., "Topic A": [{"id": "chunk1", "part": 1}, {"id": "chunk2", "part": 2}] | |
| sequence_base_to_parts_map: Dict[str, List[Dict]] = {} | |
| # This flag tracks if the map has been loaded from Neo4j successfully | |
| sequence_map_loaded: bool = False | |
| # ... other state variables | |
| # This map holds the specific type for every chunk that has one. | |
| chunk_type_map: Dict[str, str] = {} | |
| chunk_type_map_loaded: bool = False | |