helal94hb1's picture
fix: new embeddings and reranker
bea2de8
# app/core/config.py
import os
from pydantic_settings import BaseSettings
from typing import List, Union, Optional
from pydantic import AnyHttpUrl, validator, PostgresDsn
import logging
from dotenv import dotenv_values
logger = logging.getLogger(__name__)
class Settings(BaseSettings):
PROJECT_NAME: str = "Chatbot Backend V2"
API_V2_PREFIX: str = "/api/v2"
DATABASE_URL: PostgresDsn
NEO4J_URI: Optional[str] = None
NEO4J_USERNAME: Optional[str] = None
NEO4J_PASSWORD: Optional[str] = None
NEO4J_DATABASE: Optional[str] = "neo4j"
OPENAI_API_KEY: str
OPENAI_MODEL_NAME: str = "gpt-4-turbo"
RETRIEVAL_ARTIFACTS_PATH: str = "data/final_retrieval_artifacts.npz"
QUERY_ENCODER_MODEL_NAME: str = "BAAI/bge-m3"
S3_ARTIFACTS_URL: Optional[str] = None
S3_RERANKER_URL: Optional[str] = None
RERANKER_MODEL_PATH: str = "data/best_expert_judge_cross_encoder.pt" # Or the exact name of your saved .pt file
RERANKER_MODEL_NAME: str = "mixedbread-ai/mxbai-rerank-base-v2" # The base model used in your training script
RERANKER_SCORE_THRESHOLD: float = 0.0
RERANKER_K_MIN: int = 5
RERANKER_FALLOFF_THRESHOLD: int = 1
RERANKER_K_MAX: int = 100
SEQUENCE_EXPANSION_THRESHOLD: float =0.68
BACKEND_CORS_ORIGINS: Union[str, List[AnyHttpUrl]] = "http://localhost:5173"
@validator("BACKEND_CORS_ORIGINS", pre=True)
def assemble_cors_origins(cls, v: Union[str, List[str]]) -> Union[List[str], str]:
if isinstance(v, str) and not v.startswith("["):
return [i.strip() for i in v.split(",")]
elif isinstance(v, list):
return v
raise ValueError(v)
class Config:
case_sensitive = True
env_file = '.env'
env_file_encoding = 'utf-8'
# --- FIX: Ignore extra fields from the .env file ---
# This prevents the app from crashing if obsolete variables like
# GNN_EMBEDDINGS_NPZ_PATH are still present.
extra = 'ignore'
# --- END OF FIX ---
settings = Settings()