import os from pathlib import Path from langchain_huggingface import HuggingFaceEmbeddings from langchain_openai import ChatOpenAI from dotenv import load_dotenv import logging from logging.handlers import RotatingFileHandler from pydantic_settings import BaseSettings, SettingsConfigDict # Initialize environment load_dotenv() # --- Settings (simple, in-file) --- class Settings(BaseSettings): model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8', extra='ignore') OPENAI_API_KEY: str OPENAI_BASE_URL: str | None = None # LlamaParse configuration for advanced PDF parsing LLAMA_CLOUD_API_KEY: str | None = None LLAMA_PREMIUM_MODE: bool = False # Set to True for GPT-4o parsing (costs more) LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO") DATA_DIR: str = os.getenv("DATA_DIR", "") LOG_DIR: str = os.getenv("LOG_DIR", "") settings = Settings() # --- File Path Configuration (Cross-platform compatible) --- PROJECT_ROOT = Path(__file__).parent.parent.absolute() DATA_DIR = Path(settings.DATA_DIR or (PROJECT_ROOT / "data")) NEW_DATA = DATA_DIR / "new_data" PROCESSED_DATA = DATA_DIR / "processed_data" CHUNKS_PATH = DATA_DIR / "chunks.pkl" VECTOR_STORE_DIR = DATA_DIR / "vector_store" DATA_DIR.mkdir(parents=True, exist_ok=True) NEW_DATA.mkdir(parents=True, exist_ok=True) PROCESSED_DATA.mkdir(parents=True, exist_ok=True) VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True) # Setup logging LOG_DIR = Path(settings.LOG_DIR or (Path(__file__).parent.parent / "logs")) LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_FILE = LOG_DIR / "app.log" # Configure application logger (avoid duplicate handlers) LOG_LEVEL = settings.LOG_LEVEL.upper() logger = logging.getLogger("AgenticMedicalRAG") # centralized logger logger.setLevel(LOG_LEVEL) logger.propagate = False if not logger.handlers: formatter = logging.Formatter( fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) file_handler = RotatingFileHandler( LOG_FILE, maxBytes=1000000, backupCount=3, encoding="utf-8" ) file_handler.setFormatter(formatter) stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.addHandler(stream_handler) # --- LLM Configuration with lazy loading --- _llm = None def get_llm(): """Get LLM with lazy loading for faster startup""" global _llm if _llm is None: logger.info("Initializing LLM (first time)...") openai_key = settings.OPENAI_API_KEY if not openai_key: logger.error("OPENAI_API_KEY not found in environment variables") raise ValueError("OpenAI API key is required. Please set OPENAI_API_KEY environment variable.") try: _llm = ChatOpenAI( model="gpt-4o", api_key=openai_key, base_url=settings.OPENAI_BASE_URL, temperature=0.0, max_tokens=2048, request_timeout=30, # Increased timeout for stability max_retries=2, streaming=True, ) logger.info("LLM initialized successfully") except Exception as e: logger.error(f"Failed to initialize LLM: {e}") raise return _llm def create_llm(): """Create LLM with proper error handling and fallbacks""" return get_llm() # Lazy loading - only initialize when actually needed LLM = None # Will be loaded on first use # --- Embedding Model Configuration with lazy loading --- _embedding_model = None def get_embedding_model(): """Get embedding model with lazy loading for faster startup""" global _embedding_model if _embedding_model is None: logger.info("Loading embedding model (first time)...") try: _embedding_model = HuggingFaceEmbeddings( model_name="abhinand/MedEmbed-base-v0.1", model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': True} ) logger.info("Embedding model loaded successfully") except Exception as e: logger.error(f"Failed to load embedding model: {e}") raise ValueError("Failed to load embedding model") return _embedding_model # For backward compatibility def create_embedding_model(): """Create embedding model with proper error handling""" return get_embedding_model() # Lazy loading - only load when actually needed EMBEDDING_MODEL = None # Will be loaded on first use # Configuration validation def validate_config(): """Validate all required configurations""" required_env_vars = ["OPENAI_API_KEY"] missing_vars = [var for var in required_env_vars if not getattr(settings, var, None)] if missing_vars: raise ValueError(f"Missing required environment variables: {missing_vars}") logger.info("Configuration validation completed") # Run validation on import try: validate_config() except Exception as e: logger.error(f"Configuration validation failed: {e}") raise e