helal94hb1's picture
feat: Update application with new changes13
a9465d3
# app/core/state.py
#
# Description:
# This module holds the shared, in-memory state of the application.
# It is initialized during startup and used by various services to avoid
# reloading large models and data for each API request.
import torch
import numpy as np
from typing import Optional, Dict, List
from sentence_transformers import SentenceTransformer
from openai import OpenAI
# --- Application State Variables ---
# Flag to indicate if all startup data has been loaded successfully.
v2_data_loaded: bool = False
artifacts_loaded: bool = False
reranker_model_loaded: bool = False
reranker_model: Optional[any] = None
# Device to use for torch operations (cuda or cpu).
device: Optional[torch.device] = None
# The loaded query encoder model.
query_encoder_model: Optional[SentenceTransformer] = None
# --- FIX: Add the missing OpenAI client attribute ---
# The loaded OpenAI client instance.
openai_client: Optional[OpenAI] = None
# --- END OF FIX ---
# --- Artifacts for Pre-computed Retrieval ---
# Pre-transformed and normalized chunk embeddings (numpy array).
transformed_chunk_embeddings: Optional[np.ndarray] = None
# List of chunk IDs in the same order as the embeddings.
chunk_ids_in_order: Optional[List[str]] = None
# The learned 'Wq' weight matrix for transforming query embeddings (torch tensor).
wq_weights: Optional[torch.Tensor] = None
# The learned temperature scalar for scaling similarity scores.
temperature: Optional[float] = None
# --- Content Maps ---
# Maps chunk IDs to their text content.
chunk_content_map: Dict[str, str] = {}
# Maps chunk IDs to their metadata (e.g., original file, page ID).
chunk_metadata_map: Dict[str, Dict] = {}
# This dictionary will map a chunk_id to its sequential chunk_type (e.g., "Direct Participant Part 1")
chunk_sequence_map: Dict[str, str] = {}
# This flag tracks if the map has been loaded from Neo4j successfully
chunk_sequence_map_loaded: bool = False
# --- Sequence Organizer State ---
# This dictionary will map a sequence's base name to a sorted list of its parts.
# e.g., "Topic A": [{"id": "chunk1", "part": 1}, {"id": "chunk2", "part": 2}]
sequence_base_to_parts_map: Dict[str, List[Dict]] = {}
# This flag tracks if the map has been loaded from Neo4j successfully
sequence_map_loaded: bool = False
# ... other state variables
# This map holds the specific type for every chunk that has one.
chunk_type_map: Dict[str, str] = {}
chunk_type_map_loaded: bool = False