# app/core/state.py # # Description: # This module holds the shared, in-memory state of the application. # It is initialized during startup and used by various services to avoid # reloading large models and data for each API request. import torch import numpy as np from typing import Optional, Dict, List from sentence_transformers import SentenceTransformer from openai import OpenAI # --- Application State Variables --- # Flag to indicate if all startup data has been loaded successfully. v2_data_loaded: bool = False artifacts_loaded: bool = False reranker_model_loaded: bool = False reranker_model: Optional[any] = None # Device to use for torch operations (cuda or cpu). device: Optional[torch.device] = None # The loaded query encoder model. query_encoder_model: Optional[SentenceTransformer] = None # --- FIX: Add the missing OpenAI client attribute --- # The loaded OpenAI client instance. openai_client: Optional[OpenAI] = None # --- END OF FIX --- # --- Artifacts for Pre-computed Retrieval --- # Pre-transformed and normalized chunk embeddings (numpy array). transformed_chunk_embeddings: Optional[np.ndarray] = None # List of chunk IDs in the same order as the embeddings. chunk_ids_in_order: Optional[List[str]] = None # The learned 'Wq' weight matrix for transforming query embeddings (torch tensor). wq_weights: Optional[torch.Tensor] = None # The learned temperature scalar for scaling similarity scores. temperature: Optional[float] = None # --- Content Maps --- # Maps chunk IDs to their text content. chunk_content_map: Dict[str, str] = {} # Maps chunk IDs to their metadata (e.g., original file, page ID). chunk_metadata_map: Dict[str, Dict] = {} # This dictionary will map a chunk_id to its sequential chunk_type (e.g., "Direct Participant Part 1") chunk_sequence_map: Dict[str, str] = {} # This flag tracks if the map has been loaded from Neo4j successfully chunk_sequence_map_loaded: bool = False # --- Sequence Organizer State --- # This dictionary will map a sequence's base name to a sorted list of its parts. # e.g., "Topic A": [{"id": "chunk1", "part": 1}, {"id": "chunk2", "part": 2}] sequence_base_to_parts_map: Dict[str, List[Dict]] = {} # This flag tracks if the map has been loaded from Neo4j successfully sequence_map_loaded: bool = False # ... other state variables # This map holds the specific type for every chunk that has one. chunk_type_map: Dict[str, str] = {} chunk_type_map_loaded: bool = False