""" Background initialization system for preloading heavy components during app startup. This module handles the eager loading of embedding models, retrievers, and chunks to improve first-question response time. """ import threading import time from typing import Optional, Callable from .config import logger class BackgroundInitializer: """Manages background initialization of heavy components""" def __init__(self): self._initialization_thread: Optional[threading.Thread] = None self._initialization_complete = threading.Event() self._initialization_error: Optional[Exception] = None self._progress_callback: Optional[Callable[[str, int], None]] = None self._status = "Not started" def set_progress_callback(self, callback: Callable[[str, int], None]): """Set a callback function to receive progress updates""" self._progress_callback = callback def _update_progress(self, message: str, percentage: int): """Update progress and call callback if set""" self._status = message logger.info(f"🔄 Background Init: {message} ({percentage}%)") if self._progress_callback: try: self._progress_callback(message, percentage) except Exception as e: logger.error(f"Progress callback error: {e}") def _initialize_components(self): """Initialize all heavy components in background thread""" try: self._update_progress("Starting background initialization...", 0) # Step 1: Load embedding model (this is the heaviest component) self._update_progress("Loading embedding model...", 10) from .config import get_embedding_model embedding_model = get_embedding_model() self._update_progress("Embedding model loaded successfully", 40) # Step 2: Initialize retrievers (this will load chunks and create vector store) self._update_progress("Initializing retrievers and loading chunks...", 50) from .retrievers import _ensure_initialized _ensure_initialized() self._update_progress("Retrievers initialized successfully", 90) # Step 3: Learn medical terminology from corpus self._update_progress("Learning medical terminology from corpus...", 92) try: from .medical_terminology import learn_from_corpus from . import utils # Load chunks to learn from chunks = utils.load_chunks() if chunks: # Convert to format expected by learner documents = [{'content': chunk.page_content} for chunk in chunks[:1000]] # Limit for performance learn_from_corpus(documents) logger.info(f"Learned medical terminology from {len(documents)} documents") except Exception as e: logger.warning(f"Could not learn terminology from corpus: {e}") # Step 4: Warm up LLM (optional, lightweight) self._update_progress("Warming up LLM...", 97) from .config import get_llm llm = get_llm() self._update_progress("All components initialized successfully", 100) logger.info("✅ Background initialization completed successfully") except Exception as e: self._initialization_error = e logger.error(f"❌ Background initialization failed: {e}") self._update_progress(f"Initialization failed: {str(e)}", -1) finally: self._initialization_complete.set() def start_background_initialization(self): """Start background initialization in a separate thread""" if self._initialization_thread is not None: logger.warning("Background initialization already started") return logger.info("🚀 Starting background initialization...") self._initialization_thread = threading.Thread( target=self._initialize_components, name="BackgroundInitializer", daemon=True ) self._initialization_thread.start() def is_complete(self) -> bool: """Check if initialization is complete""" return self._initialization_complete.is_set() def wait_for_completion(self, timeout: Optional[float] = None) -> bool: """Wait for initialization to complete""" return self._initialization_complete.wait(timeout) def get_status(self) -> str: """Get current initialization status""" return self._status def get_error(self) -> Optional[Exception]: """Get initialization error if any""" return self._initialization_error def is_successful(self) -> bool: """Check if initialization completed successfully""" return self.is_complete() and self._initialization_error is None # Global initializer instance _background_initializer = BackgroundInitializer() def start_background_initialization(progress_callback: Optional[Callable[[str, int], None]] = None): """Start background initialization with optional progress callback""" if progress_callback: _background_initializer.set_progress_callback(progress_callback) _background_initializer.start_background_initialization() def wait_for_initialization(timeout: Optional[float] = None) -> bool: """Wait for background initialization to complete""" return _background_initializer.wait_for_completion(timeout) def is_initialization_complete() -> bool: """Check if background initialization is complete""" return _background_initializer.is_complete() def get_initialization_status() -> str: """Get current initialization status""" return _background_initializer.get_status() def get_initialization_error() -> Optional[Exception]: """Get initialization error if any""" return _background_initializer.get_error() def is_initialization_successful() -> bool: """Check if initialization completed successfully""" return _background_initializer.is_successful()