HBV_AI_Assistant / core /background_init.py
moazx's picture
Initial commit with all files including LFS
73c6377
"""
Background initialization system for preloading heavy components during app startup.
This module handles the eager loading of embedding models, retrievers, and chunks
to improve first-question response time.
"""
import threading
import time
from typing import Optional, Callable
from .config import logger
class BackgroundInitializer:
"""Manages background initialization of heavy components"""
def __init__(self):
self._initialization_thread: Optional[threading.Thread] = None
self._initialization_complete = threading.Event()
self._initialization_error: Optional[Exception] = None
self._progress_callback: Optional[Callable[[str, int], None]] = None
self._status = "Not started"
def set_progress_callback(self, callback: Callable[[str, int], None]):
"""Set a callback function to receive progress updates"""
self._progress_callback = callback
def _update_progress(self, message: str, percentage: int):
"""Update progress and call callback if set"""
self._status = message
logger.info(f"πŸ”„ Background Init: {message} ({percentage}%)")
if self._progress_callback:
try:
self._progress_callback(message, percentage)
except Exception as e:
logger.error(f"Progress callback error: {e}")
def _initialize_components(self):
"""Initialize all heavy components in background thread"""
try:
self._update_progress("Starting background initialization...", 0)
# Step 1: Load embedding model (this is the heaviest component)
self._update_progress("Loading embedding model...", 10)
from .config import get_embedding_model
embedding_model = get_embedding_model()
self._update_progress("Embedding model loaded successfully", 40)
# Step 2: Initialize retrievers (this will load chunks and create vector store)
self._update_progress("Initializing retrievers and loading chunks...", 50)
from .retrievers import _ensure_initialized
_ensure_initialized()
self._update_progress("Retrievers initialized successfully", 90)
# Step 3: Learn medical terminology from corpus
self._update_progress("Learning medical terminology from corpus...", 92)
try:
from .medical_terminology import learn_from_corpus
from . import utils
# Load chunks to learn from
chunks = utils.load_chunks()
if chunks:
# Convert to format expected by learner
documents = [{'content': chunk.page_content} for chunk in chunks[:1000]] # Limit for performance
learn_from_corpus(documents)
logger.info(f"Learned medical terminology from {len(documents)} documents")
except Exception as e:
logger.warning(f"Could not learn terminology from corpus: {e}")
# Step 4: Warm up LLM (optional, lightweight)
self._update_progress("Warming up LLM...", 97)
from .config import get_llm
llm = get_llm()
self._update_progress("All components initialized successfully", 100)
logger.info("βœ… Background initialization completed successfully")
except Exception as e:
self._initialization_error = e
logger.error(f"❌ Background initialization failed: {e}")
self._update_progress(f"Initialization failed: {str(e)}", -1)
finally:
self._initialization_complete.set()
def start_background_initialization(self):
"""Start background initialization in a separate thread"""
if self._initialization_thread is not None:
logger.warning("Background initialization already started")
return
logger.info("πŸš€ Starting background initialization...")
self._initialization_thread = threading.Thread(
target=self._initialize_components,
name="BackgroundInitializer",
daemon=True
)
self._initialization_thread.start()
def is_complete(self) -> bool:
"""Check if initialization is complete"""
return self._initialization_complete.is_set()
def wait_for_completion(self, timeout: Optional[float] = None) -> bool:
"""Wait for initialization to complete"""
return self._initialization_complete.wait(timeout)
def get_status(self) -> str:
"""Get current initialization status"""
return self._status
def get_error(self) -> Optional[Exception]:
"""Get initialization error if any"""
return self._initialization_error
def is_successful(self) -> bool:
"""Check if initialization completed successfully"""
return self.is_complete() and self._initialization_error is None
# Global initializer instance
_background_initializer = BackgroundInitializer()
def start_background_initialization(progress_callback: Optional[Callable[[str, int], None]] = None):
"""Start background initialization with optional progress callback"""
if progress_callback:
_background_initializer.set_progress_callback(progress_callback)
_background_initializer.start_background_initialization()
def wait_for_initialization(timeout: Optional[float] = None) -> bool:
"""Wait for background initialization to complete"""
return _background_initializer.wait_for_completion(timeout)
def is_initialization_complete() -> bool:
"""Check if background initialization is complete"""
return _background_initializer.is_complete()
def get_initialization_status() -> str:
"""Get current initialization status"""
return _background_initializer.get_status()
def get_initialization_error() -> Optional[Exception]:
"""Get initialization error if any"""
return _background_initializer.get_error()
def is_initialization_successful() -> bool:
"""Check if initialization completed successfully"""
return _background_initializer.is_successful()