File size: 6,319 Bytes
73c6377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
Background initialization system for preloading heavy components during app startup.
This module handles the eager loading of embedding models, retrievers, and chunks
to improve first-question response time.
"""

import threading
import time
from typing import Optional, Callable
from .config import logger


class BackgroundInitializer:
    """Manages background initialization of heavy components"""
    
    def __init__(self):
        self._initialization_thread: Optional[threading.Thread] = None
        self._initialization_complete = threading.Event()
        self._initialization_error: Optional[Exception] = None
        self._progress_callback: Optional[Callable[[str, int], None]] = None
        self._status = "Not started"
        
    def set_progress_callback(self, callback: Callable[[str, int], None]):
        """Set a callback function to receive progress updates"""
        self._progress_callback = callback
        
    def _update_progress(self, message: str, percentage: int):
        """Update progress and call callback if set"""
        self._status = message
        logger.info(f"πŸ”„ Background Init: {message} ({percentage}%)")
        if self._progress_callback:
            try:
                self._progress_callback(message, percentage)
            except Exception as e:
                logger.error(f"Progress callback error: {e}")
    
    def _initialize_components(self):
        """Initialize all heavy components in background thread"""
        try:
            self._update_progress("Starting background initialization...", 0)
            
            # Step 1: Load embedding model (this is the heaviest component)
            self._update_progress("Loading embedding model...", 10)
            from .config import get_embedding_model
            embedding_model = get_embedding_model()
            self._update_progress("Embedding model loaded successfully", 40)
            
            # Step 2: Initialize retrievers (this will load chunks and create vector store)
            self._update_progress("Initializing retrievers and loading chunks...", 50)
            from .retrievers import _ensure_initialized
            _ensure_initialized()
            self._update_progress("Retrievers initialized successfully", 90)
            
            # Step 3: Learn medical terminology from corpus
            self._update_progress("Learning medical terminology from corpus...", 92)
            try:
                from .medical_terminology import learn_from_corpus
                from . import utils
                
                # Load chunks to learn from
                chunks = utils.load_chunks()
                if chunks:
                    # Convert to format expected by learner
                    documents = [{'content': chunk.page_content} for chunk in chunks[:1000]]  # Limit for performance
                    learn_from_corpus(documents)
                    logger.info(f"Learned medical terminology from {len(documents)} documents")
            except Exception as e:
                logger.warning(f"Could not learn terminology from corpus: {e}")
            
            # Step 4: Warm up LLM (optional, lightweight)
            self._update_progress("Warming up LLM...", 97)
            from .config import get_llm
            llm = get_llm()
            self._update_progress("All components initialized successfully", 100)
            
            logger.info("βœ… Background initialization completed successfully")
            
        except Exception as e:
            self._initialization_error = e
            logger.error(f"❌ Background initialization failed: {e}")
            self._update_progress(f"Initialization failed: {str(e)}", -1)
        finally:
            self._initialization_complete.set()
    
    def start_background_initialization(self):
        """Start background initialization in a separate thread"""
        if self._initialization_thread is not None:
            logger.warning("Background initialization already started")
            return
            
        logger.info("πŸš€ Starting background initialization...")
        self._initialization_thread = threading.Thread(
            target=self._initialize_components,
            name="BackgroundInitializer",
            daemon=True
        )
        self._initialization_thread.start()
    
    def is_complete(self) -> bool:
        """Check if initialization is complete"""
        return self._initialization_complete.is_set()
    
    def wait_for_completion(self, timeout: Optional[float] = None) -> bool:
        """Wait for initialization to complete"""
        return self._initialization_complete.wait(timeout)
    
    def get_status(self) -> str:
        """Get current initialization status"""
        return self._status
    
    def get_error(self) -> Optional[Exception]:
        """Get initialization error if any"""
        return self._initialization_error
    
    def is_successful(self) -> bool:
        """Check if initialization completed successfully"""
        return self.is_complete() and self._initialization_error is None


# Global initializer instance
_background_initializer = BackgroundInitializer()


def start_background_initialization(progress_callback: Optional[Callable[[str, int], None]] = None):
    """Start background initialization with optional progress callback"""
    if progress_callback:
        _background_initializer.set_progress_callback(progress_callback)
    _background_initializer.start_background_initialization()


def wait_for_initialization(timeout: Optional[float] = None) -> bool:
    """Wait for background initialization to complete"""
    return _background_initializer.wait_for_completion(timeout)


def is_initialization_complete() -> bool:
    """Check if background initialization is complete"""
    return _background_initializer.is_complete()


def get_initialization_status() -> str:
    """Get current initialization status"""
    return _background_initializer.get_status()


def get_initialization_error() -> Optional[Exception]:
    """Get initialization error if any"""
    return _background_initializer.get_error()


def is_initialization_successful() -> bool:
    """Check if initialization completed successfully"""
    return _background_initializer.is_successful()