medilang-tech / app /ai_agent /cameroon_data.py
Dama03's picture
first push of the AI
411a994
import pandas as pd
from typing import List, Dict, Optional
import os
import logging
from functools import lru_cache
logger = logging.getLogger(__name__)
class CameroonData:
_instance = None
def __new__(cls):
if cls._instance is None:
cls._instance = super(CameroonData, cls).__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if self._initialized:
return
self._initialized = True
self.df = None
self._load_data()
def _load_data(self):
"""Load the clinical data with error handling and performance optimizations."""
try:
csv_path = os.path.join(os.path.dirname(__file__), '../../clinical_summaries.csv')
if not os.path.exists(csv_path):
logger.warning(f"Clinical data file not found at {csv_path}")
self.df = pd.DataFrame()
return
# Load only necessary columns if possible
self.df = pd.read_csv(csv_path, dtype=str)
logger.info(f"Loaded {len(self.df)} clinical cases")
except Exception as e:
logger.error(f"Error loading clinical data: {str(e)}")
self.df = pd.DataFrame()
@lru_cache(maxsize=128)
def search_similar_cases(self, query: str, top_k: int = 3) -> List[Dict]:
"""
Search for similar cases using the query.
Args:
query: Search query string
top_k: Maximum number of results to return
Returns:
List of matching case dictionaries
"""
if self.df is None or self.df.empty:
logger.warning("No clinical data available for search")
return []
if not query or not query.strip():
return []
try:
# Convert query to lowercase once
query_terms = [term.lower() for term in query.split() if len(term) > 2] # Ignore very short terms
if not query_terms:
return []
results = []
# Pre-process text for each row once
for _, row in self.df.iterrows():
# Only process string columns and skip NaN values
row_text = ' '.join(
str(row[col]) for col in self.df.columns
if isinstance(row[col], str) and pd.notna(row[col])
).lower()
# Check if any query term is in the row text
if any(term in row_text for term in query_terms):
results.append(row.to_dict())
if len(results) >= top_k:
break
return results
except Exception as e:
logger.error(f"Error in search_similar_cases: {str(e)}")
return []
# Singleton instance
_cameroon_data_instance = None
def get_cameroon_data() -> Optional[CameroonData]:
"""
Get the singleton instance of CameroonData.
Returns None if the data cannot be loaded.
"""
global _cameroon_data_instance
if _cameroon_data_instance is None:
try:
_cameroon_data_instance = CameroonData()
# Verify data was loaded
if _cameroon_data_instance.df is None or _cameroon_data_instance.df.empty:
logger.error("Failed to load clinical data")
_cameroon_data_instance = None
except Exception as e:
logger.error(f"Error initializing CameroonData: {str(e)}")
_cameroon_data_instance = None
return _cameroon_data_instance