Spaces:
Running
Running
| """ | |
| Background initialization system for preloading heavy components during app startup. | |
| This module handles the eager loading of embedding models, retrievers, and chunks | |
| to improve first-question response time. | |
| """ | |
| import threading | |
| import time | |
| from typing import Optional, Callable | |
| from .config import logger | |
| class BackgroundInitializer: | |
| """Manages background initialization of heavy components""" | |
| def __init__(self): | |
| self._initialization_thread: Optional[threading.Thread] = None | |
| self._initialization_complete = threading.Event() | |
| self._initialization_error: Optional[Exception] = None | |
| self._progress_callback: Optional[Callable[[str, int], None]] = None | |
| self._status = "Not started" | |
| def set_progress_callback(self, callback: Callable[[str, int], None]): | |
| """Set a callback function to receive progress updates""" | |
| self._progress_callback = callback | |
| def _update_progress(self, message: str, percentage: int): | |
| """Update progress and call callback if set""" | |
| self._status = message | |
| logger.info(f"π Background Init: {message} ({percentage}%)") | |
| if self._progress_callback: | |
| try: | |
| self._progress_callback(message, percentage) | |
| except Exception as e: | |
| logger.error(f"Progress callback error: {e}") | |
| def _initialize_components(self): | |
| """Initialize all heavy components in background thread""" | |
| try: | |
| self._update_progress("Starting background initialization...", 0) | |
| # Step 1: Load embedding model (this is the heaviest component) | |
| self._update_progress("Loading embedding model...", 10) | |
| from .config import get_embedding_model | |
| embedding_model = get_embedding_model() | |
| self._update_progress("Embedding model loaded successfully", 40) | |
| # Step 2: Initialize retrievers (this will load chunks and create vector store) | |
| self._update_progress("Initializing retrievers and loading chunks...", 50) | |
| from .retrievers import _ensure_initialized | |
| _ensure_initialized() | |
| self._update_progress("Retrievers initialized successfully", 90) | |
| # Step 3: Learn medical terminology from corpus | |
| self._update_progress("Learning medical terminology from corpus...", 92) | |
| try: | |
| from .medical_terminology import learn_from_corpus | |
| from . import utils | |
| # Load chunks to learn from | |
| chunks = utils.load_chunks() | |
| if chunks: | |
| # Convert to format expected by learner | |
| documents = [{'content': chunk.page_content} for chunk in chunks[:1000]] # Limit for performance | |
| learn_from_corpus(documents) | |
| logger.info(f"Learned medical terminology from {len(documents)} documents") | |
| except Exception as e: | |
| logger.warning(f"Could not learn terminology from corpus: {e}") | |
| # Step 4: Warm up LLM (optional, lightweight) | |
| self._update_progress("Warming up LLM...", 97) | |
| from .config import get_llm | |
| llm = get_llm() | |
| self._update_progress("All components initialized successfully", 100) | |
| logger.info("β Background initialization completed successfully") | |
| except Exception as e: | |
| self._initialization_error = e | |
| logger.error(f"β Background initialization failed: {e}") | |
| self._update_progress(f"Initialization failed: {str(e)}", -1) | |
| finally: | |
| self._initialization_complete.set() | |
| def start_background_initialization(self): | |
| """Start background initialization in a separate thread""" | |
| if self._initialization_thread is not None: | |
| logger.warning("Background initialization already started") | |
| return | |
| logger.info("π Starting background initialization...") | |
| self._initialization_thread = threading.Thread( | |
| target=self._initialize_components, | |
| name="BackgroundInitializer", | |
| daemon=True | |
| ) | |
| self._initialization_thread.start() | |
| def is_complete(self) -> bool: | |
| """Check if initialization is complete""" | |
| return self._initialization_complete.is_set() | |
| def wait_for_completion(self, timeout: Optional[float] = None) -> bool: | |
| """Wait for initialization to complete""" | |
| return self._initialization_complete.wait(timeout) | |
| def get_status(self) -> str: | |
| """Get current initialization status""" | |
| return self._status | |
| def get_error(self) -> Optional[Exception]: | |
| """Get initialization error if any""" | |
| return self._initialization_error | |
| def is_successful(self) -> bool: | |
| """Check if initialization completed successfully""" | |
| return self.is_complete() and self._initialization_error is None | |
| # Global initializer instance | |
| _background_initializer = BackgroundInitializer() | |
| def start_background_initialization(progress_callback: Optional[Callable[[str, int], None]] = None): | |
| """Start background initialization with optional progress callback""" | |
| if progress_callback: | |
| _background_initializer.set_progress_callback(progress_callback) | |
| _background_initializer.start_background_initialization() | |
| def wait_for_initialization(timeout: Optional[float] = None) -> bool: | |
| """Wait for background initialization to complete""" | |
| return _background_initializer.wait_for_completion(timeout) | |
| def is_initialization_complete() -> bool: | |
| """Check if background initialization is complete""" | |
| return _background_initializer.is_complete() | |
| def get_initialization_status() -> str: | |
| """Get current initialization status""" | |
| return _background_initializer.get_status() | |
| def get_initialization_error() -> Optional[Exception]: | |
| """Get initialization error if any""" | |
| return _background_initializer.get_error() | |
| def is_initialization_successful() -> bool: | |
| """Check if initialization completed successfully""" | |
| return _background_initializer.is_successful() | |