Spaces:

helal94hb1
/

backend_chatbot

Sleeping

App Files Files Community

backend_chatbot / app /core /state.py

helal94hb1

feat: Update application with new changes13

a9465d3 3 months ago

raw

history blame contribute delete

2.46 kB

	# app/core/state.py
	#
	# Description:
	# This module holds the shared, in-memory state of the application.
	# It is initialized during startup and used by various services to avoid
	# reloading large models and data for each API request.

	import torch
	import numpy as np
	from typing import Optional, Dict, List
	from sentence_transformers import SentenceTransformer
	from openai import OpenAI

	# --- Application State Variables ---

	# Flag to indicate if all startup data has been loaded successfully.
	v2_data_loaded: bool = False
	artifacts_loaded: bool = False
	reranker_model_loaded: bool = False
	reranker_model: Optional[any] = None
	# Device to use for torch operations (cuda or cpu).
	device: Optional[torch.device] = None

	# The loaded query encoder model.
	query_encoder_model: Optional[SentenceTransformer] = None

	# --- FIX: Add the missing OpenAI client attribute ---
	# The loaded OpenAI client instance.
	openai_client: Optional[OpenAI] = None
	# --- END OF FIX ---

	# --- Artifacts for Pre-computed Retrieval ---

	# Pre-transformed and normalized chunk embeddings (numpy array).
	transformed_chunk_embeddings: Optional[np.ndarray] = None

	# List of chunk IDs in the same order as the embeddings.
	chunk_ids_in_order: Optional[List[str]] = None

	# The learned 'Wq' weight matrix for transforming query embeddings (torch tensor).
	wq_weights: Optional[torch.Tensor] = None

	# The learned temperature scalar for scaling similarity scores.
	temperature: Optional[float] = None

	# --- Content Maps ---

	# Maps chunk IDs to their text content.
	chunk_content_map: Dict[str, str] = {}

	# Maps chunk IDs to their metadata (e.g., original file, page ID).
	chunk_metadata_map: Dict[str, Dict] = {}

	# This dictionary will map a chunk_id to its sequential chunk_type (e.g., "Direct Participant Part 1")
	chunk_sequence_map: Dict[str, str] = {}
	# This flag tracks if the map has been loaded from Neo4j successfully
	chunk_sequence_map_loaded: bool = False

	# --- Sequence Organizer State ---
	# This dictionary will map a sequence's base name to a sorted list of its parts.
	# e.g., "Topic A": [{"id": "chunk1", "part": 1}, {"id": "chunk2", "part": 2}]
	sequence_base_to_parts_map: Dict[str, List[Dict]] = {}
	# This flag tracks if the map has been loaded from Neo4j successfully
	sequence_map_loaded: bool = False

	# ... other state variables
	# This map holds the specific type for every chunk that has one.
	chunk_type_map: Dict[str, str] = {}
	chunk_type_map_loaded: bool = False