Dama03 commited on
Commit
411a994
·
1 Parent(s): 7067a7b

first push of the AI

Browse files
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ __pycache__
3
+ .pytest_cache
4
+ .cache_cameroon
5
+ .venv
6
+ .env
7
+ image-test1.jpg
8
+ image-test2.jpg
9
+ test-audio1.wav
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PYTHONDONTWRITEBYTECODE=1 \
4
+ PYTHONUNBUFFERED=1
5
+
6
+ WORKDIR /app
7
+
8
+ RUN apt-get update \
9
+ && apt-get install -y --no-install-recommends \
10
+ build-essential \
11
+ libpq-dev \
12
+ ffmpeg \
13
+ libsndfile1 \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ COPY requirements.txt ./
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ COPY . .
20
+
21
+ EXPOSE 8000
22
+
23
+ CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT:-8000}"]
24
+
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 damarusyoane
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
app/ai_agent/agent.py ADDED
@@ -0,0 +1,797 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional
2
+ from langchain.agents import AgentExecutor, create_react_agent, initialize_agent, AgentType
3
+ from langchain_openai import ChatOpenAI
4
+ from langchain_community.chat_models import ChatOllama
5
+ from langchain_community.tools import Tool
6
+ from langchain_community.utilities import SerpAPIWrapper
7
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
8
+ from pydantic import BaseModel, Field
9
+ from langchain_core.output_parsers import PydanticOutputParser
10
+ from langchain_huggingface import HuggingFaceEndpoint
11
+ from .maps_tool import GoogleMapsTool
12
+ from .medical_prompt import medical_system_prompt, medical_direct_prompt
13
+ from .cameroon_data import get_cameroon_data
14
+ from ..utils.config import settings
15
+ from app.ai_services import (
16
+ analyze_image as svc_analyze_image,
17
+ transcribe_audio as svc_transcribe_audio,
18
+ chat_completion as svc_chat_completion,
19
+ translate_text as svc_translate_text,
20
+ )
21
+ import logging
22
+ import base64
23
+ import io
24
+ import json
25
+ import mimetypes
26
+ import os
27
+ import tempfile
28
+ import requests
29
+ import secrets
30
+ import time
31
+
32
+ # Robust import for OpenAI rate limit error across SDK versions
33
+ try:
34
+ from openai import RateLimitError # Newer SDKs
35
+ except Exception: # pragma: no cover - fallback for older SDKs
36
+ try:
37
+ from openai.error import RateLimitError # Older SDKs
38
+ except Exception:
39
+ class RateLimitError(Exception):
40
+ pass
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ # Pydantic model for structured ReAct output
45
+ class ReActOutput(BaseModel):
46
+ thought: str = Field(description="Your internal reasoning about the query")
47
+ action: str = Field(description="Name of the tool to use, or empty if none", default="")
48
+ action_input: str = Field(description="Input for the tool, or empty if no tool", default="")
49
+ observation: str = Field(description="Observation from tool, or empty if no tool used", default="")
50
+ final_answer: str = Field(description="Final response to the user, or empty if continuing", default="")
51
+
52
+ def calculate_dosage(input_text):
53
+ # À compléter selon besoins médicaux réels
54
+ return "Calcul de dosage basé sur: " + input_text
55
+
56
+ # ==========================
57
+ # Helper utilities for tools
58
+ # ==========================
59
+ MAX_IMAGE_CHARS = 6_000_000 # ~6MB when passed as data URI length cap
60
+ MAX_AUDIO_BYTES = 10 * 1024 * 1024 # 10 MB
61
+ MAX_FILE_BYTES = 2 * 1024 * 1024 # 2 MB
62
+
63
+ def _is_data_uri(s: str) -> bool:
64
+ return isinstance(s, str) and s.startswith("data:")
65
+
66
+ def _looks_like_url(s: str) -> bool:
67
+ return isinstance(s, str) and (s.startswith("http://") or s.startswith("https://"))
68
+
69
+ def _to_image_data_ref(value: str) -> str:
70
+ """Return a URL or data URI suitable for OpenAI vision input.
71
+ If it's base64 without data: prefix, assume PNG.
72
+ """
73
+ if not isinstance(value, str):
74
+ raise ValueError("Image reference must be a string (URL or base64/data URI)")
75
+ if _is_data_uri(value) or _looks_like_url(value):
76
+ return value
77
+ if value.startswith("attach://"):
78
+ data = resolve_attachment(value)
79
+ b64 = base64.b64encode(data).decode("utf-8")
80
+ return f"data:image/png;base64,{b64}"
81
+ # Assume raw base64
82
+ return f"data:image/png;base64,{value}"
83
+
84
+ def _download_bytes(url: str, timeout: int = 15, max_bytes: int = MAX_AUDIO_BYTES) -> bytes:
85
+ with requests.get(url, stream=True, timeout=timeout) as r:
86
+ r.raise_for_status()
87
+ data = io.BytesIO()
88
+ total = 0
89
+ for chunk in r.iter_content(chunk_size=8192):
90
+ if not chunk:
91
+ continue
92
+ total += len(chunk)
93
+ if total > max_bytes:
94
+ raise ValueError("Downloaded content exceeds size limit")
95
+ data.write(chunk)
96
+ return data.getvalue()
97
+
98
+ def _decode_data_uri(data_uri: str) -> bytes:
99
+ # Format: data:<mime>;base64,<payload>
100
+ try:
101
+ header, b64data = data_uri.split(',', 1)
102
+ return base64.b64decode(b64data)
103
+ except Exception:
104
+ raise ValueError("Invalid data URI")
105
+
106
+ def _to_bytes_from_any(ref: str, max_bytes: int) -> bytes:
107
+ # Handle in-memory attach scheme first
108
+ if isinstance(ref, str) and ref.startswith("attach://"):
109
+ data = resolve_attachment(ref)
110
+ if len(data) > max_bytes:
111
+ raise ValueError("Content exceeds size limit")
112
+ return data
113
+ if _looks_like_url(ref):
114
+ return _download_bytes(ref, max_bytes=max_bytes)
115
+ if _is_data_uri(ref):
116
+ data = _decode_data_uri(ref)
117
+ if len(data) > max_bytes:
118
+ raise ValueError("Content exceeds size limit")
119
+ return data
120
+ # Assume base64
121
+ data = base64.b64decode(ref)
122
+ if len(data) > max_bytes:
123
+ raise ValueError("Content exceeds size limit")
124
+ return data
125
+
126
+ # ==========================
127
+ # In-memory attachment registry
128
+ # ==========================
129
+ ATTACHMENT_STORE: Dict[str, Dict[str, Any]] = {}
130
+
131
+ def register_attachment(data: bytes, filename: str | None = None, mime: str | None = None) -> str:
132
+ """Store bytes in a temp registry and return an attach:// token URI."""
133
+ token = secrets.token_urlsafe(16)
134
+ ATTACHMENT_STORE[token] = {"data": data, "filename": filename, "mime": mime}
135
+ return f"attach://{token}"
136
+
137
+ def resolve_attachment(ref: str) -> bytes:
138
+ """Resolve an attach:// token to bytes, or raise KeyError/ValueError."""
139
+ if not isinstance(ref, str) or not ref.startswith("attach://"):
140
+ raise ValueError("Not an attach:// reference")
141
+ token = ref.split("://", 1)[1]
142
+ item = ATTACHMENT_STORE.get(token)
143
+ if not item:
144
+ raise KeyError("Attachment not found or expired")
145
+ return item["data"]
146
+
147
+ # ==========================
148
+ # Tool: Vision (Image analysis)
149
+ # ==========================
150
+ def analyze_image_tool(input_str: str) -> str:
151
+ """Analyze a medical image (URL or base64/data URI) with an optional question.
152
+ Input can be JSON: {{"image": "<url|data|base64>", "question": "..."}}
153
+ or a pipe-separated string: "<url|data|base64>|<question>".
154
+ Returns concise medical observations and red flags.
155
+ """
156
+ try:
157
+ image = None
158
+ question = (
159
+ "Analyze this image and describe medically relevant observations, differential considerations, "
160
+ "and red flags. If urgent signs are suspected, advise seeking care."
161
+ )
162
+ if input_str.strip().startswith('{'):
163
+ obj = json.loads(input_str)
164
+ image = obj.get('image') or obj.get('url') or obj.get('image_url')
165
+ question = obj.get('question') or question
166
+ else:
167
+ parts = [p.strip() for p in input_str.split('|', 1)]
168
+ if parts:
169
+ image = parts[0]
170
+ if len(parts) > 1 and parts[1]:
171
+ question = parts[1]
172
+ if not image:
173
+ return "Format attendu: JSON {\"image\": \"...\", \"question\": \"...\"} ou 'image|question'"
174
+
175
+ image_ref = _to_image_data_ref(image)
176
+ if len(image_ref) > MAX_IMAGE_CHARS:
177
+ return "Image trop volumineuse. Réduisez la taille ou fournissez une URL."
178
+
179
+ # Delegate to unified image analysis service (HF or local)
180
+ return svc_analyze_image(image_ref, question)
181
+ except Exception as e:
182
+ logger.error(f"Error in analyze_image_tool: {e}", exc_info=True)
183
+ return "Impossible d'analyser l'image pour le moment. Essayez une image plus petite ou une meilleure connexion."
184
+
185
+ # ==========================
186
+ # Tool: Audio transcription (Whisper)
187
+ # ==========================
188
+ def transcribe_audio_tool(input_str: str) -> str:
189
+ """Transcribe an audio file (URL or base64/data URI). Returns plain text transcription.
190
+ Input can be JSON: {{"audio": "<url|data|base64>", "filename": "name.ext"}} or just the URL/base64.
191
+ Size cap: 10 MB.
192
+ """
193
+ try:
194
+ audio_ref = None
195
+ if input_str.strip().startswith('{'):
196
+ obj = json.loads(input_str)
197
+ audio_ref = obj.get('audio') or obj.get('url')
198
+ else:
199
+ audio_ref = input_str.strip()
200
+ if not audio_ref:
201
+ return "Format attendu: {\"audio\": \"...\"} ou une chaîne URL/base64/data URI."
202
+
203
+ audio_bytes = _to_bytes_from_any(audio_ref, MAX_AUDIO_BYTES)
204
+ # Write to a temp file and delegate to unified ASR
205
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
206
+ tmp.write(audio_bytes)
207
+ tmp.flush()
208
+ temp_path = tmp.name
209
+ try:
210
+ return svc_transcribe_audio(temp_path, None)
211
+ finally:
212
+ try:
213
+ os.unlink(temp_path)
214
+ except Exception:
215
+ pass
216
+ except ValueError as ve:
217
+ return f"Audio non valide: {ve}"
218
+ except Exception as e:
219
+ logger.error(f"Error in transcribe_audio_tool: {e}", exc_info=True)
220
+ return "Impossible de transcrire l'audio pour le moment. Fournissez un fichier plus petit ou réessayez."
221
+
222
+ # ==========================
223
+ # Tool: File parse and summarize
224
+ # ==========================
225
+ def parse_file_tool(input_str: str) -> str:
226
+ """Parse and summarize a small file (text/PDF). Input JSON: {{"file": "<url|data|base64>", "filename": "..."}}
227
+ or "<url|base64>|<filename>". Size cap: 2 MB.
228
+ Returns a concise medically-relevant summary.
229
+ """
230
+ try:
231
+ file_ref = None
232
+ filename = None
233
+ if input_str.strip().startswith('{'):
234
+ obj = json.loads(input_str)
235
+ file_ref = obj.get('file') or obj.get('url') or obj.get('content')
236
+ filename = obj.get('filename')
237
+ else:
238
+ parts = [p.strip() for p in input_str.split('|', 1)]
239
+ if parts:
240
+ file_ref = parts[0]
241
+ if len(parts) > 1:
242
+ filename = parts[1]
243
+ if not file_ref:
244
+ return "Format attendu: JSON {\"file\": \"...\", \"filename\": \"...\"} ou 'file|filename'"
245
+
246
+ data = _to_bytes_from_any(file_ref, MAX_FILE_BYTES)
247
+
248
+ # Determine type
249
+ ext = (os.path.splitext(filename)[1].lower() if filename else '')
250
+ text_content = None
251
+ if ext == '.pdf' or (_is_data_uri(file_ref) and 'application/pdf' in file_ref[:64]):
252
+ try:
253
+ import PyPDF2 # type: ignore
254
+ reader = PyPDF2.PdfReader(io.BytesIO(data))
255
+ pages = min(3, len(reader.pages))
256
+ buf = []
257
+ for i in range(pages):
258
+ try:
259
+ buf.append(reader.pages[i].extract_text() or '')
260
+ except Exception:
261
+ continue
262
+ text_content = "\n".join(buf)
263
+ except ImportError:
264
+ return "Lecture PDF indisponible: installez PyPDF2 pour activer l'analyse des PDF."
265
+ except Exception:
266
+ return "Impossible de lire ce PDF. Assurez-vous qu'il n'est pas corrompu et qu'il est < 2 Mo."
267
+ else:
268
+ # Assume text-like
269
+ try:
270
+ text_content = data.decode('utf-8', errors='ignore')
271
+ except Exception:
272
+ return "Type de fichier non supporté. Fournissez un texte ou un PDF (PyPDF2 requis)."
273
+
274
+ if not text_content or not text_content.strip():
275
+ return "Aucun texte exploitable trouvé dans le fichier."
276
+
277
+ snippet = text_content[:50_000] # keep prompt small
278
+ language = detect_language(snippet)
279
+ prompt = (
280
+ "Résume de façon concise les informations médicales pertinentes du contenu suivant. "
281
+ "Inclue les signaux d'alerte éventuels et recommande, si nécessaire, une consultation.\n\n"
282
+ f"Contenu:\n{snippet}"
283
+ )
284
+ return svc_chat_completion([{"role": "user", "content": prompt}], language)
285
+ except ValueError as ve:
286
+ return f"Fichier non valide: {ve}"
287
+ except Exception as e:
288
+ logger.error(f"Error in parse_file_tool: {e}", exc_info=True)
289
+ return "Impossible d'analyser le fichier pour le moment. Essayez un fichier texte court ou un PDF léger (<2 Mo)."
290
+
291
+ def create_medical_agent(scratchpad_style: str = "messages"):
292
+ logger.info(f"Creating medical agent (scratchpad_style={scratchpad_style})...")
293
+
294
+ try:
295
+ provider = (settings.AI_PROVIDER or "hf").lower()
296
+ logger.info(f"Agent provider: {provider}")
297
+ # Initialize with system message
298
+ system_message = """
299
+ Tu es Medicare, un assistant médical intelligent et bienveillant pour la population camerounaise.
300
+
301
+ Ton objectif :
302
+ - Écouter et rassurer la personne, comme un vrai professionnel de santé empathique.
303
+ - Répondre simplement, comme si tu discutais avec un proche ou un patient, mais toujours avec sérieux.
304
+ - Utiliser Google Maps pour proposer des hôpitaux ou pharmacies proches si besoin.
305
+ - Donner des conseils adaptés au Cameroun (maladies, médicaments, habitudes locales).
306
+ - Si tu retrouves des cas similaires dans la base camerounaise, mentionne-les naturellement dans la discussion.
307
+ - Si la question sort du médical, explique gentiment que tu es là pour la santé.
308
+
309
+ Règles :
310
+ - Ne pose jamais de diagnostic définitif.
311
+ - Pour les symptômes graves, incite à consulter un médecin ou à se rendre aux urgences, sans paniquer l'utilisateur.
312
+ - Si tu as besoin de plus d'infos, pose des questions ouvertes et humaines.
313
+ - Garde le fil de la conversation et adapte tes réponses à l'historique de l'échange.
314
+ - Si des pièces jointes sont listées sous forme de jetons attach://<token>, utilise les outils appropriés en leur passant directement ces références :
315
+ * Analyse_Image pour les images (JSON {{"image": "attach://...", "question": "..."}}).
316
+ * Transcription_Audio pour l'audio (JSON {{"audio": "attach://..."}}).
317
+ * Analyse_Fichier pour les fichiers texte/PDF (JSON {{"file": "attach://...", "filename": "..."}}).
318
+
319
+
320
+ Format de réponse :
321
+ - Commence par une phrase chaleureuse ou rassurante.
322
+ - Donne l'information ou le conseil principal de façon claire et naturelle.
323
+ - Si tu proposes des établissements, présente-les comme tu le ferais à un ami (nom, adresse, statut, téléphone).
324
+ - Termine par une phrase d'ouverture ou d'encouragement ("N'hésite pas si tu as d'autres questions !").
325
+ - Ajoute un avertissement discret si nécessaire (ex : "Si tu te sens vraiment mal, va vite consulter !").
326
+
327
+ IMPORTANT: N'utilise PAS de traductions pour les étiquettes. Utilise EXACTEMENT ces étiquettes en anglais: "Thought:", "Action:", "Action Input:", "Observation:", "Final Answer:".
328
+
329
+ Toujours répondre en utilisant CE FORMAT EXACT. Après chaque "Thought:", fais l'un des deux:
330
+ 1) Si tu as besoin d'un outil: fournis "Action:" et "Action Input:", puis attends l'"Observation:" de l'outil.
331
+ 2) Si tu n'as pas besoin d'outil: termine directement avec "Final Answer:".
332
+
333
+ Thought: [Tes réflexions internes sur la question et ce que tu vas faire ensuite. Sois bref et logique. Décide si un outil est nécessaire.]
334
+ Action: [Nom de l'outil exact, comme "Recherche_Web" ou "Google_Maps". Un seul outil. (N'inclus PAS "Final Answer" ici.)]
335
+ Action Input: [Entrée précise pour l'outil.]
336
+ Observation: [Résultat de l'outil.]
337
+
338
+ Final Answer: [Ta réponse finale complète pour l'utilisateur, en respectant le "Format de réponse" ci-dessus.]
339
+
340
+ Tu peux répéter Thought/Action/Observation au besoin (maximum 3 itérations), puis termine OBLIGATOIREMENT par "Final Answer:".
341
+
342
+ N'oublie pas : tu es empathique, humain, et tu adaptes toujours ton niveau de langage à la personne en face de toi.
343
+
344
+ """
345
+ if provider == "ollama":
346
+ logger.info(f"Using ChatOllama model: {settings.OLLAMA_MODEL}")
347
+ llm = ChatOllama(
348
+ temperature=0,
349
+ model=(settings.OLLAMA_MODEL or "llama3.1:8b"),
350
+ base_url=settings.OLLAMA_BASE_URL,
351
+ )
352
+ elif provider == "lmstudio":
353
+ logger.info(f"Using LM Studio base_url: {settings.LMSTUDIO_BASE_URL}")
354
+ llm = ChatOpenAI(
355
+ temperature=0,
356
+ model=(settings.LMSTUDIO_MODEL or "local-model"),
357
+ openai_api_key=(settings.OPENAI_API_KEY or "lm-studio"),
358
+ base_url=settings.LMSTUDIO_BASE_URL,
359
+ streaming=False,
360
+ max_retries=1,
361
+ timeout=30,
362
+ max_tokens=500,
363
+ )
364
+ elif provider == "hf":
365
+ logger.info(f"Using Hugging Face Inference model: {settings.HF_TEXT_MODEL}")
366
+ llm = HuggingFaceEndpoint(
367
+ repo_id=(settings.HF_TEXT_MODEL or "mistralai/Mistral-7B-Instruct-v0.3"),
368
+ task="text-generation",
369
+ max_new_tokens=500,
370
+ temperature=0.0,
371
+ huggingfacehub_api_token=settings.HF_API_TOKEN,
372
+ )
373
+ else:
374
+ # Default: try local Ollama first, then fallback to ChatOpenAI config
375
+ try:
376
+ llm = ChatOllama(
377
+ temperature=0,
378
+ model=(settings.OLLAMA_MODEL or "llama3.1:8b"),
379
+ base_url=settings.OLLAMA_BASE_URL,
380
+ )
381
+ except Exception:
382
+ llm = ChatOpenAI(
383
+ temperature=0,
384
+ model=(settings.OPENAI_MODEL or "gpt-4o-mini"),
385
+ openai_api_key=settings.OPENAI_API_KEY,
386
+ streaming=False,
387
+ max_retries=1,
388
+ timeout=30,
389
+ max_tokens=500,
390
+ )
391
+
392
+ # Initialize tools
393
+ tools = []
394
+
395
+ # Add Google Maps tool if API key is available
396
+ if settings.GOOGLE_MAPS_API_KEY:
397
+ logger.info("Adding Google Maps tool")
398
+ maps_tool = GoogleMapsTool()
399
+ tools.append(maps_tool)
400
+
401
+ # Add web search tool if API key is available
402
+ if settings.SERPAPI_API_KEY:
403
+ logger.info("Adding web search tool")
404
+ search = SerpAPIWrapper(serpapi_api_key=settings.SERPAPI_API_KEY)
405
+ web_search_tool = Tool(
406
+ name="Recherche_Web",
407
+ func=search.run,
408
+ description="Utile pour rechercher des informations médicales générales ou des hôpitaux"
409
+ )
410
+ tools.append(web_search_tool)
411
+
412
+ # Add dosage calculator tool
413
+ logger.info("Adding dosage calculator tool")
414
+ dosage_tool = Tool(
415
+ name="Calculateur_Dosage",
416
+ func=calculate_dosage,
417
+ description="Utile pour calculer des dosages de médicaments basés sur le poids et l'âge"
418
+ )
419
+ tools.append(dosage_tool)
420
+
421
+ # Add Vision, Audio Transcription, and File Parser tools
422
+ vision_tool = Tool(
423
+ name="Analyse_Image",
424
+ func=analyze_image_tool,
425
+ description=(
426
+ "Analyser une image médicale (URL ou base64/data URI). "
427
+ "Entrée: JSON {{\"image\": \"<url|data|base64>\", \"question\": \"...\"}} ou 'image|question'. Retourne des observations cliniques concises."
428
+ ),
429
+ )
430
+ tools.append(vision_tool)
431
+
432
+ audio_tool = Tool(
433
+ name="Transcription_Audio",
434
+ func=transcribe_audio_tool,
435
+ description=(
436
+ "Transcrire un audio (URL ou base64/data URI) en texte. "
437
+ "Entrée: JSON {{\"audio\": \"<url|data|base64>\"}} ou une chaîne URL/base64. Taille <= 10 Mo."
438
+ ),
439
+ )
440
+ tools.append(audio_tool)
441
+
442
+ file_tool = Tool(
443
+ name="Analyse_Fichier",
444
+ func=parse_file_tool,
445
+ description=(
446
+ "Analyser et résumer un petit fichier (texte/PDF). "
447
+ "Entrée: JSON {{\"file\": \"<url|data|base64>\", \"filename\": \"...\"}} ou 'file|filename'. PDF nécessite PyPDF2. Taille <= 2 Mo."
448
+ ),
449
+ )
450
+ tools.append(file_tool)
451
+
452
+ logger.info(f"Initialized {len(tools)} tools")
453
+
454
+ # Build agent according to requested scratchpad style
455
+ requested = str(scratchpad_style).lower()
456
+ logger.info(f"Requested scratchpad style: {requested}")
457
+ # For HF provider, prefer legacy ReAct agent which works with text-generation LLMs
458
+ if provider == "hf" and requested != "legacy":
459
+ logger.info("HF provider: switching scratchpad to 'legacy' for compatibility")
460
+ requested = "legacy"
461
+
462
+ common_kwargs = {
463
+ "verbose": True,
464
+ "handle_parsing_errors": True,
465
+ "max_iterations": 3,
466
+ "max_execution_time": 15,
467
+ }
468
+
469
+ if requested == "legacy" or requested == "string":
470
+ logger.info("Creating legacy string-based ReAct agent")
471
+ # Use PromptTemplate for string scratchpad
472
+ prompt = PromptTemplate.from_template(
473
+ system_message + "\n\n{input}\n{agent_scratchpad}"
474
+ )
475
+ legacy_agent = initialize_agent(
476
+ tools=tools,
477
+ llm=llm,
478
+ agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
479
+ prompt=prompt,
480
+ **common_kwargs,
481
+ )
482
+ logger.info("Legacy/string agent created successfully")
483
+ return legacy_agent
484
+
485
+ # Messages-based ReAct agent with Pydantic output parser guidance
486
+ logger.info("Creating messages-based ReAct agent")
487
+ # Combine system prompt with ReAct-style instructions (no JSON)
488
+ template = system_message + """
489
+
490
+ Tools: {tools}
491
+
492
+ Tool names: {tool_names}
493
+
494
+ Begin!
495
+
496
+ {input}
497
+
498
+ {agent_scratchpad}"""
499
+
500
+ prompt = ChatPromptTemplate.from_template(template)
501
+ # Supply tool variables for the prompt
502
+ prompt = prompt.partial(
503
+ tool_names=", ".join([t.name for t in tools]) if tools else "none",
504
+ tools="\n".join([f"- {t.name}: {t.description}" for t in tools]) if tools else "No tools available",
505
+ )
506
+ agent = create_react_agent(llm, tools, prompt)
507
+ logger.info("Messages-based agent created successfully")
508
+ return AgentExecutor(agent=agent, tools=tools, **common_kwargs)
509
+
510
+ except Exception as e:
511
+ logger.error(f"Error creating agent: {str(e)}", exc_info=True)
512
+ raise
513
+
514
+ import asyncio
515
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
516
+ from functools import partial
517
+ import re
518
+
519
+ # logger is initialized at the top of this module
520
+
521
+ def _invoke_with_timeout(agent: Any, user_input: str, timeout: int = 60) -> Any:
522
+ """Invoke the agent with a hard timeout in a background thread."""
523
+ def _invoke():
524
+ return agent.invoke({"input": user_input})
525
+ with ThreadPoolExecutor(max_workers=1) as executor:
526
+ future = executor.submit(_invoke)
527
+ return future.result(timeout=timeout)
528
+
529
+ def search_cases_with_timeout(query: str, timeout: int = 10) -> str:
530
+ """Search for similar cases with a timeout to prevent hanging."""
531
+ try:
532
+ cameroon_data = get_cameroon_data()
533
+ if cameroon_data is None:
534
+ logger.warning("No clinical data available")
535
+ return ""
536
+
537
+ similar_cases = cameroon_data.search_similar_cases(query, top_k=3)
538
+
539
+ if not similar_cases:
540
+ return ""
541
+
542
+ context = "Cas cliniques camerounais similaires trouvés :\n"
543
+ for case in similar_cases:
544
+ # Format the case nicely, showing only non-empty fields
545
+ case_info = []
546
+ for key, value in case.items():
547
+ if value and str(value).strip() and str(value).lower() != 'nan':
548
+ case_info.append(f"{key}: {value}")
549
+ if case_info:
550
+ context += f"- {' | '.join(case_info)}\n"
551
+
552
+ return context
553
+
554
+ except Exception as e:
555
+ logger.error(f"Error searching cases: {str(e)}")
556
+ return ""
557
+
558
+ from langdetect import detect
559
+
560
+ def detect_language(text: str) -> str:
561
+ """Detect the language of the input text."""
562
+ try:
563
+ lang = detect(text)
564
+ return lang if lang in ['fr', 'en'] else 'fr' # Default to French if not English
565
+ except:
566
+ return 'fr' # Default to French on detection failure
567
+
568
+ def handle_user_query(
569
+ query: str,
570
+ user_location: str = None,
571
+ image: str = None,
572
+ audio: str = None,
573
+ files: List[str] = None,
574
+ file_names: List[str] = None,
575
+ images: List[str] = None,
576
+ audios: List[str] = None,
577
+ agent_mode: str = None,
578
+ ) -> str:
579
+ try:
580
+ logger.info(f"Handling query: {query[:100]}...") # Log first 100 chars of query
581
+
582
+ # Detect input language
583
+ input_language = detect_language(query)
584
+ logger.info(f"Detected input language: {input_language}")
585
+
586
+ # Build the final input with language instruction
587
+ try:
588
+ language_instruction = " (répondez en français)" if input_language == 'fr' else " (respond in English)"
589
+ query_with_language = f"{query}{language_instruction}"
590
+
591
+ # Quick simple-query bypass BEFORE agent creation: no attachments and no location
592
+ has_attachments = bool((image and image.strip()) or (images and len(images) > 0) or (audio and audio.strip()) or (audios and len(audios) > 0) or (files and len(files) > 0))
593
+ if not has_attachments and not user_location:
594
+ logger.info("Simple query; direct LLM (no agent) via unified provider")
595
+ try:
596
+ direct_prompt = medical_direct_prompt + f"\n{query_with_language}"
597
+ return svc_chat_completion([{"role": "user", "content": direct_prompt}], input_language)
598
+ except Exception as e:
599
+ logger.warning(f"Simple path failed, using agent: {e}")
600
+
601
+ except Exception as e:
602
+ logger.warning(f"Error preparing input for simple bypass: {e}")
603
+
604
+ # Create agent automatically only if needed: try messages first, fall back to legacy/string
605
+ try:
606
+ agent = create_medical_agent("messages")
607
+ logger.info("Agent created successfully (messages scratchpad)")
608
+ except Exception as e:
609
+ logger.warning(f"Messages-based agent creation failed, trying legacy fallback: {e}")
610
+ try:
611
+ agent = create_medical_agent("legacy")
612
+ logger.info("Agent created successfully (legacy scratchpad)")
613
+ except Exception as e2:
614
+ logger.error(f"Error creating agent after legacy fallback: {str(e2)}", exc_info=True)
615
+ return "Désolé, une erreur est survenue lors de l'initialisation de l'assistant."
616
+
617
+ # Run Cameroon data search with timeout in a separate thread
618
+ context = ""
619
+ try:
620
+ with ThreadPoolExecutor() as executor:
621
+ future = executor.submit(search_cases_with_timeout, query)
622
+ context = future.result(timeout=15) # 15 seconds timeout
623
+ logger.info("Successfully searched clinical data")
624
+ except FuturesTimeoutError:
625
+ logger.warning("Cameroon data search timed out")
626
+ except Exception as e:
627
+ logger.error(f"Error in clinical data search: {str(e)}", exc_info=True)
628
+
629
+ # Build the final input with language instruction
630
+ try:
631
+ # Summarize attachments so the agent is aware of provided modalities
632
+ attachment_lines: List[str] = []
633
+ all_images = []
634
+ if image:
635
+ all_images.append(image)
636
+ if images:
637
+ all_images.extend(images)
638
+ if all_images:
639
+ # Show attach URIs to enable tool usage
640
+ show = ", ".join(all_images[:5]) + ("..." if len(all_images) > 5 else "")
641
+ attachment_lines.append(f"- Images: {len(all_images)} fournies -> {show}")
642
+
643
+ all_audios = []
644
+ if audio:
645
+ all_audios.append(audio)
646
+ if audios:
647
+ all_audios.extend(audios)
648
+ if all_audios:
649
+ show = ", ".join(all_audios[:5]) + ("..." if len(all_audios) > 5 else "")
650
+ attachment_lines.append(f"- Audios: {len(all_audios)} fournis -> {show}")
651
+
652
+ if files:
653
+ names = file_names or []
654
+ display_names = ", ".join(names[:5]) + ("..." if len(names) > 5 else "")
655
+ show = ", ".join((files or [])[:5]) + ("..." if len(files or []) > 5 else "")
656
+ attachment_lines.append(f"- Fichiers: {len(files)} ({display_names}) -> {show}")
657
+
658
+ attachments_context = ""
659
+ if attachment_lines:
660
+ attachments_context = "Pièces jointes:\n" + "\n".join(attachment_lines)
661
+
662
+ components: List[str] = []
663
+ if context:
664
+ components.append(context)
665
+ if attachments_context:
666
+ components.append(attachments_context)
667
+ components.append(query_with_language)
668
+ user_input = "\n\n".join(components)
669
+ if user_location:
670
+ user_input += f"\nUser location: {user_location}"
671
+
672
+ logger.debug(f"Final input to agent: {user_input[:200]}...") # Log first 200 chars
673
+
674
+ # Simple-query bypass: if no attachments and no explicit location, answer directly via LLM
675
+ if not attachments_context and not user_location:
676
+ try:
677
+ simple_prompt = medical_direct_prompt + f"\n{user_input}"
678
+ return svc_chat_completion([{"role": "user", "content": simple_prompt}], input_language)
679
+ except Exception as e:
680
+ logger.warning(f"Simple path failed, using agent: {e}")
681
+
682
+ # Execute agent with a hard timeout; if parsing issues occur, apply fallbacks
683
+ try:
684
+ response = _invoke_with_timeout(agent, user_input, timeout=60)
685
+ except FuturesTimeoutError:
686
+ logger.warning("Agent invocation timed out; retrying after 10s sleep")
687
+ time.sleep(10)
688
+ try:
689
+ response = _invoke_with_timeout(agent, user_input, timeout=60)
690
+ except FuturesTimeoutError:
691
+ logger.error("Agent invocation timed out again after retry")
692
+ return _offline_fallback_response(query, context, input_language)
693
+ except Exception as e2:
694
+ logger.error(f"Error after timeout retry: {e2}", exc_info=True)
695
+ return _offline_fallback_response(query, context, input_language)
696
+ except Exception as e:
697
+ msg = str(e).lower()
698
+ # Timeout retry once
699
+ if ("timeout" in msg) or isinstance(e, FuturesTimeoutError):
700
+ logger.warning("Invocation timed out; retrying after 10s sleep")
701
+ time.sleep(10)
702
+ return _invoke_with_timeout(agent, user_input, timeout=60)
703
+ # Parsing-related issues: prefer direct LLM fallback
704
+ if isinstance(e, RateLimitError):
705
+ wait_time = 180
706
+ logger.warning(f"Rate limit hit; sleeping {wait_time}s before retry")
707
+ time.sleep(wait_time)
708
+ try:
709
+ response = _invoke_with_timeout(agent, user_input, timeout=60)
710
+ except Exception as e2:
711
+ logger.error(f"Retry after rate limit failed: {e2}", exc_info=True)
712
+ return _offline_fallback_response(query, context, input_language)
713
+ elif re.search(r"(invalid format|missing action|parsing failure)", msg, re.IGNORECASE):
714
+ logger.warning("Detected parsing issue; using direct LLM fallback first")
715
+ # Prefer a single-call direct LLM response to avoid further agent loops
716
+ try:
717
+ direct_prompt = medical_direct_prompt + f"\n{user_input}"
718
+ response_text = svc_chat_completion([{"role": "user", "content": direct_prompt}], input_language)
719
+ return response_text
720
+ except Exception:
721
+ logger.warning("Direct LLM fallback failed; trying legacy agent as secondary fallback")
722
+ try:
723
+ agent = create_medical_agent("legacy")
724
+ response = _invoke_with_timeout(agent, user_input, timeout=60)
725
+ except Exception:
726
+ return _offline_fallback_response(query, context, input_language)
727
+ else:
728
+ logger.error(f"Error during agent invocation: {e}", exc_info=True)
729
+ return _offline_fallback_response(query, context, input_language)
730
+
731
+ # Normalize response across LC versions (dict vs. string)
732
+ if response is None:
733
+ logger.error("Agent returned None")
734
+ return _offline_fallback_response(query, context, input_language)
735
+
736
+ # Extract response text
737
+ if isinstance(response, dict):
738
+ # Handle dict response (newer LC versions)
739
+ if 'output' in response:
740
+ response_text = response['output']
741
+ elif 'final_answer' in response:
742
+ response_text = response['final_answer']
743
+ elif 'result' in response:
744
+ response_text = response['result']
745
+ else:
746
+ logger.warning(f"Unexpected response format: {response}")
747
+ response_text = str(response)
748
+ else:
749
+ # Handle string response (older LC versions)
750
+ response_text = str(response)
751
+
752
+ # Ensure response is in the same language as the input
753
+ if input_language == 'fr' and not any(word in response_text.lower() for word in ['désolé', 'bonjour', 'merci']):
754
+ try:
755
+ response_text = svc_translate_text(response_text, 'fr')
756
+ except Exception as e:
757
+ logger.error(f"Error translating response to French: {e}")
758
+ response_text = f"[English Response] {response_text}\n\nDésolé, je n'ai pas pu traduire la réponse en français. Voici la réponse en anglais ci-dessus."
759
+
760
+ return response_text
761
+
762
+ except Exception as e:
763
+ logger.error(f"Error in agent execution: {str(e)}", exc_info=True)
764
+ return _offline_fallback_response(query, context, input_language)
765
+
766
+ except Exception as e:
767
+ logger.critical(f"Critical error in handle_user_query: {str(e)}", exc_info=True)
768
+ return "Désolé, une erreur inattendue s'est produite. Veuillez réessayer plus tard."
769
+
770
+ def _offline_fallback_response(query: str, context: str, lang: str) -> str:
771
+ """Produce a concise offline fallback response for low connectivity situations."""
772
+ if lang == 'fr':
773
+ parts = [
774
+ "Connexion instable détectée. Voici une réponse rapide basée sur des bonnes pratiques générales :",
775
+ ]
776
+ if context:
777
+ parts.append(context.strip())
778
+ parts.extend([
779
+ "- Prends soin de toi et évite les efforts inutiles.",
780
+ "- Tu peux envisager des antalgiques en vente libre avec prudence si nécessaire.",
781
+ "- Si les symptômes sont sévères, persistent ou s'aggravent, consulte rapidement un professionnel de santé.",
782
+ "Note: message généré en mode connectivité limitée. Pour un avis personnalisé, consulte un professionnel.",
783
+ ])
784
+ return "\n".join(parts)
785
+ else:
786
+ parts = [
787
+ "Unstable connection detected. Here is a brief response based on general good practices:",
788
+ ]
789
+ if context:
790
+ parts.append(context.strip())
791
+ parts.extend([
792
+ "- Take care and avoid unnecessary strain.",
793
+ "- Consider over-the-counter pain relief responsibly if appropriate.",
794
+ "- If symptoms are severe, persistent, or worsening, seek medical care promptly.",
795
+ "Note: generated in low-connectivity mode. For personalized advice, consult a professional.",
796
+ ])
797
+ return "\n".join(parts)
app/ai_agent/cameroon_data.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import List, Dict, Optional
3
+ import os
4
+ import logging
5
+ from functools import lru_cache
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class CameroonData:
10
+ _instance = None
11
+
12
+ def __new__(cls):
13
+ if cls._instance is None:
14
+ cls._instance = super(CameroonData, cls).__new__(cls)
15
+ cls._instance._initialized = False
16
+ return cls._instance
17
+
18
+ def __init__(self):
19
+ if self._initialized:
20
+ return
21
+
22
+ self._initialized = True
23
+ self.df = None
24
+ self._load_data()
25
+
26
+ def _load_data(self):
27
+ """Load the clinical data with error handling and performance optimizations."""
28
+ try:
29
+ csv_path = os.path.join(os.path.dirname(__file__), '../../clinical_summaries.csv')
30
+ if not os.path.exists(csv_path):
31
+ logger.warning(f"Clinical data file not found at {csv_path}")
32
+ self.df = pd.DataFrame()
33
+ return
34
+
35
+ # Load only necessary columns if possible
36
+ self.df = pd.read_csv(csv_path, dtype=str)
37
+ logger.info(f"Loaded {len(self.df)} clinical cases")
38
+
39
+ except Exception as e:
40
+ logger.error(f"Error loading clinical data: {str(e)}")
41
+ self.df = pd.DataFrame()
42
+
43
+ @lru_cache(maxsize=128)
44
+ def search_similar_cases(self, query: str, top_k: int = 3) -> List[Dict]:
45
+ """
46
+ Search for similar cases using the query.
47
+
48
+ Args:
49
+ query: Search query string
50
+ top_k: Maximum number of results to return
51
+
52
+ Returns:
53
+ List of matching case dictionaries
54
+ """
55
+ if self.df is None or self.df.empty:
56
+ logger.warning("No clinical data available for search")
57
+ return []
58
+
59
+ if not query or not query.strip():
60
+ return []
61
+
62
+ try:
63
+ # Convert query to lowercase once
64
+ query_terms = [term.lower() for term in query.split() if len(term) > 2] # Ignore very short terms
65
+ if not query_terms:
66
+ return []
67
+
68
+ results = []
69
+
70
+ # Pre-process text for each row once
71
+ for _, row in self.df.iterrows():
72
+ # Only process string columns and skip NaN values
73
+ row_text = ' '.join(
74
+ str(row[col]) for col in self.df.columns
75
+ if isinstance(row[col], str) and pd.notna(row[col])
76
+ ).lower()
77
+
78
+ # Check if any query term is in the row text
79
+ if any(term in row_text for term in query_terms):
80
+ results.append(row.to_dict())
81
+ if len(results) >= top_k:
82
+ break
83
+
84
+ return results
85
+
86
+ except Exception as e:
87
+ logger.error(f"Error in search_similar_cases: {str(e)}")
88
+ return []
89
+
90
+ # Singleton instance
91
+ _cameroon_data_instance = None
92
+
93
+ def get_cameroon_data() -> Optional[CameroonData]:
94
+ """
95
+ Get the singleton instance of CameroonData.
96
+ Returns None if the data cannot be loaded.
97
+ """
98
+ global _cameroon_data_instance
99
+
100
+ if _cameroon_data_instance is None:
101
+ try:
102
+ _cameroon_data_instance = CameroonData()
103
+ # Verify data was loaded
104
+ if _cameroon_data_instance.df is None or _cameroon_data_instance.df.empty:
105
+ logger.error("Failed to load clinical data")
106
+ _cameroon_data_instance = None
107
+ except Exception as e:
108
+ logger.error(f"Error initializing CameroonData: {str(e)}")
109
+ _cameroon_data_instance = None
110
+
111
+ return _cameroon_data_instance
app/ai_agent/maps_tool.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from langchain.tools import BaseTool
4
+ from pydantic import BaseModel, Field
5
+ from typing import Type
6
+
7
+ class LocationInput(BaseModel):
8
+ location: str = Field(description="La localisation pour la recherche d'établissements médicaux")
9
+ keyword: str = Field(default="hospital|pharmacy", description="Type d'établissement: hôpital, pharmacie, etc.")
10
+
11
+ class GoogleMapsTool(BaseTool):
12
+ name : str = "google_maps_search"
13
+ description : str = "Recherche des établissements médicaux près d'une localisation using Google Maps API"
14
+ args_schema: Type[BaseModel] = LocationInput
15
+
16
+ def _run(self, location: str, keyword: str = "hospital|pharmacy"):
17
+ api_key = os.getenv("GOOGLE_MAPS_API_KEY")
18
+ geocode_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={location}&key={api_key}"
19
+ geocode_data = requests.get(geocode_url).json()
20
+ if not geocode_data['results']:
21
+ return "Désolé, je n'ai pas pu trouver cette localisation."
22
+ coords = geocode_data['results'][0]['geometry']['location']
23
+ lat, lng = coords['lat'], coords['lng']
24
+ places_url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{lng}&radius=5000&keyword={keyword}&type=hospital|pharmacy&key={api_key}"
25
+ places_data = requests.get(places_url).json()
26
+ results = []
27
+ for place in places_data.get('results', [])[:5]:
28
+ is_open = "Ouvert" if place.get('opening_hours', {}).get('open_now', False) else "Fermé"
29
+ details_url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place['place_id']}&fields=name,formatted_phone_number,opening_hours,formatted_address&key={api_key}"
30
+ details_data = requests.get(details_url).json()
31
+ place_info = details_data.get('result', {})
32
+ results.append({
33
+ 'name': place_info.get('name', 'Nom non disponible'),
34
+ 'address': place_info.get('formatted_address', 'Adresse non disponible'),
35
+ 'phone': place_info.get('formatted_phone_number', 'Téléphone non disponible'),
36
+ 'status': is_open,
37
+ 'rating': place.get('rating', 'Non noté'),
38
+ 'types': place.get('types', [])
39
+ })
40
+ return results
41
+
42
+ def _arun(self, location: str, keyword: str):
43
+ raise NotImplementedError("Async non supporté")
app/ai_agent/medical_prompt.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ medical_system_prompt = '''
2
+ Tu es Medicare, un assistant médical intelligent et bienveillant pour la population camerounaise.
3
+
4
+ Ton objectif :
5
+ - Écouter et rassurer la personne, comme un vrai professionnel de santé empathique.
6
+ - Répondre simplement, comme si tu discutais avec un proche ou un patient, mais toujours avec sérieux.
7
+ - Utiliser Google Maps pour proposer des hôpitaux ou pharmacies proches si besoin.
8
+ - Donner des conseils adaptés au Cameroun (maladies, médicaments, habitudes locales).
9
+ - Si tu retrouves des cas similaires dans la base camerounaise, mentionne-les naturellement dans la discussion.
10
+ - Si la question sort du médical, explique gentiment que tu es là pour la santé.
11
+
12
+ Règles :
13
+ - Ne pose jamais de diagnostic définitif.
14
+ - Pour les symptômes graves, incite à consulter un médecin ou à se rendre aux urgences, sans paniquer l'utilisateur.
15
+ - Si tu as besoin de plus d'infos, pose des questions ouvertes et humaines.
16
+ - Garde le fil de la conversation et adapte tes réponses à l'historique de l'échange.
17
+
18
+ Format de réponse :
19
+ - Commence par une phrase chaleureuse ou rassurante.
20
+ - Donne l'information ou le conseil principal de façon claire et naturelle.
21
+ - Si tu proposes des établissements, présente-les comme tu le ferais à un ami (nom, adresse, statut, téléphone).
22
+ - Termine par une phrase d'ouverture ou d'encouragement ("N'hésite pas si tu as d'autres questions !").
23
+ - Ajoute un avertissement discret si nécessaire (ex : "Si tu te sens vraiment mal, va vite consulter !").
24
+
25
+ IMPORTANT: N'utilise PAS de traductions pour les étiquettes. Utilise EXACTEMENT ces étiquettes en anglais: "Thought:", "Action:", "Action Input:", "Observation:", "Final Answer:".
26
+
27
+ Toujours répondre en utilisant CE FORMAT EXACT. Après chaque "Thought:", fais l'un des deux:
28
+ 1) Si tu as besoin d'un outil: fournis "Action:" et "Action Input:", puis attends l'"Observation:" de l'outil.
29
+ 2) Si tu n'as pas besoin d'outil: termine directement avec "Final Answer:".
30
+
31
+ Thought: [Tes réflexions internes sur la question et ce que tu vas faire ensuite. Sois bref et logique. Décide si un outil est nécessaire.]
32
+ Action: [Nom de l'outil exact, comme "Recherche_Web" ou "Google_Maps". Un seul outil. (N'inclus PAS "Final Answer" ici.)]
33
+ Action Input: [Entrée précise pour l'outil.]
34
+ Observation: [Résultat de l'outil.]
35
+
36
+ Final Answer: [Ta réponse finale complète pour l'utilisateur, en respectant le "Format de réponse" ci-dessus.]
37
+
38
+ Tu peux répéter Thought/Action/Observation au besoin (maximum 3 itérations), puis termine OBLIGATOIREMENT par "Final Answer:".
39
+
40
+ N'oublie pas : tu es empathique, humain, et tu adaptes toujours ton niveau de langage à la personne en face de toi.
41
+ '''
42
+
43
+
44
+ medical_direct_prompt = '''
45
+ Tu es Medicare, un assistant médical intelligent et bienveillant pour la population camerounaise.
46
+
47
+ Ton objectif :
48
+ - Écouter et rassurer la personne, comme un vrai professionnel de santé empathique.
49
+ - Répondre simplement, comme si tu discutais avec un proche ou un patient, mais toujours avec sérieux.
50
+ - Utiliser, si nécessaire, tes connaissances médicales générales (pas d'outils externes dans ce mode direct).
51
+ - Donner des conseils adaptés au Cameroun (maladies, médicaments, habitudes locales).
52
+ - Si la question sort du médical, explique gentiment que tu es là pour la santé.
53
+
54
+ Règles :
55
+ - Ne pose jamais de diagnostic définitif.
56
+ - Pour les symptômes graves, incite à consulter un médecin ou à se rendre aux urgences, sans paniquer l'utilisateur.
57
+ - Si tu as besoin de plus d'infos, pose des questions ouvertes et humaines.
58
+ - Garde le fil de la conversation et adapte tes réponses à l'historique de l'échange.
59
+
60
+ Format de réponse :
61
+ - Commence par une phrase chaleureuse ou rassurante.
62
+ - Donne l'information ou le conseil principal de façon claire et naturelle.
63
+ - Si tu proposes des établissements, présente-les comme tu le ferais à un ami (nom, adresse, statut, téléphone) si tu en connais.
64
+ - Termine par une phrase d'ouverture ou d'encouragement ("N'hésite pas si tu as d'autres questions !").
65
+ - Ajoute un avertissement discret si nécessaire (ex : "Si tu te sens vraiment mal, va vite consulter !").
66
+
67
+ IMPORTANT :
68
+ - Fournis uniquement la réponse finale destinée à l'utilisateur.
69
+ - N'affiche AUCUN format ReAct. Pas de "Thought:", pas de "Action:", pas d'"Observation:".
70
+ - Réponds directement dans la langue de la requête (français si la requête est en français, sinon anglais).
71
+ '''
app/ai_agent/memory.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.memory import ChatMessageHistory, ConversationBufferMemory
2
+
3
+ def get_memory():
4
+ """
5
+ Create a conversation memory with the latest LangChain syntax.
6
+ """
7
+ # Create a message history object
8
+ message_history = ChatMessageHistory()
9
+
10
+ # Create a memory that uses the message history
11
+ memory = ConversationBufferMemory(
12
+ memory_key="chat_history",
13
+ chat_memory=message_history,
14
+ return_messages=True,
15
+ output_key="output"
16
+ )
17
+
18
+ return memory
app/ai_services.py ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from loguru import logger
3
+ import json
4
+ import io
5
+ import os
6
+ import base64
7
+
8
+ from app.utils.config import settings
9
+
10
+ # Keep OpenAI symbol to avoid breaking legacy tests that patch it, but do not use it in runtime paths
11
+ try:
12
+ from openai import OpenAI
13
+ except Exception: # pragma: no cover - optional import during dev
14
+ OpenAI = None # type: ignore
15
+
16
+
17
+ def _load_patient_context() -> str:
18
+ try:
19
+ # Prefer CSV summaries for Cameroon context if available
20
+ csv_path = getattr(settings, "CAMEROON_DATA_CSV", None)
21
+ if csv_path and os.path.exists(csv_path):
22
+ import csv
23
+ rows: list[str] = []
24
+ with open(csv_path, "r", encoding="utf-8") as f:
25
+ reader = csv.DictReader(f)
26
+ for i, row in enumerate(reader):
27
+ if i >= 120: # cap to avoid oversized prompts
28
+ break
29
+ parts = []
30
+ for k, v in (row or {}).items():
31
+ if v is None:
32
+ continue
33
+ s = str(v).strip()
34
+ if s:
35
+ parts.append(f"{k}: {s}")
36
+ if parts:
37
+ rows.append(" | ".join(parts))
38
+ text = "\n".join(rows)
39
+ return text[:6000]
40
+ # Fallback to legacy JSON patient data
41
+ with open(settings.PATIENT_DATA_PATH, "r", encoding="utf-8") as f:
42
+ data = json.load(f)
43
+ return json.dumps(data)[:6000]
44
+ except Exception as exc:
45
+ logger.warning(f"Cannot load patient RAG data: {exc}")
46
+ return ""
47
+
48
+
49
+ def build_system_prompt(language: str) -> str:
50
+ if language.lower().startswith("fr"):
51
+ disclaimer = (
52
+ "Tu es Medilang, un assistant médical virtuel compatissant et bienveillant, spécialement conçu pour les utilisateurs camerounais. Ton rôle est de fournir des conseils de premier recours, des informations sanitaires et de l'orientation, en tenant strictement compte du contexte local camerounais. "
53
+ "N'oublie pas de specifier que tu n'est qu'une ia et recommande le contact au medecin en cas de situation grave"
54
+ )
55
+
56
+ else:
57
+ disclaimer = (
58
+ "You are Medilang, a compassionate medical assistant for Cameroon. "
59
+ "Be clear and adapt advice to local context (malaria, typhoid, vaccination, access to care). "
60
+ "Include a medical disclaimer and recommend seeing a doctor for serious cases."
61
+ )
62
+ rag = _load_patient_context()
63
+ return f"{disclaimer}\nContext (Cameroon RAG): {rag[:4000]}"
64
+
65
+
66
+ def detect_language(text: str) -> str:
67
+ try:
68
+ from langdetect import detect
69
+ code = detect(text)
70
+ # Map common codes to our expected values
71
+ if code.startswith("fr"):
72
+ return "fr"
73
+ if code.startswith("en"):
74
+ return "en"
75
+ return code
76
+ except Exception:
77
+ return "fr"
78
+
79
+
80
+ def openai_client():
81
+ """Legacy helper kept for backward compatibility in tests.
82
+ Not used by runtime code after migration to HF/Ollama/LM Studio.
83
+ """
84
+ if not settings.OPENAI_API_KEY:
85
+ raise RuntimeError("OPENAI_API_KEY not configured")
86
+ if OpenAI is None:
87
+ raise RuntimeError("openai package not available")
88
+ return OpenAI(api_key=settings.OPENAI_API_KEY)
89
+
90
+
91
+ # ==========================
92
+ # Provider utilities
93
+ # ==========================
94
+ def _flatten_messages(messages: List[dict], system: str | None) -> str:
95
+ parts: List[str] = []
96
+ if system:
97
+ parts.append(f"System:\n{system}\n")
98
+ for m in messages:
99
+ role = m.get("role") or "user"
100
+ content = m.get("content")
101
+ if isinstance(content, list):
102
+ # Extract text parts if using OpenAI-style content chunks
103
+ text_chunks = []
104
+ for c in content:
105
+ if isinstance(c, dict) and c.get("type") == "text":
106
+ text_chunks.append(c.get("text") or "")
107
+ elif isinstance(c, dict) and c.get("type") == "image_url":
108
+ url = (c.get("image_url") or {}).get("url") if isinstance(c.get("image_url"), dict) else c.get("image_url")
109
+ if url:
110
+ text_chunks.append(f"[Image: {url}]")
111
+ content = "\n".join([t for t in text_chunks if t])
112
+ parts.append(f"{role.capitalize()}: {content}")
113
+ parts.append("Assistant:")
114
+ return "\n\n".join(parts)
115
+
116
+
117
+ def _hf_generate_text(prompt: str, max_new_tokens: int = 400, temperature: float = 0.3) -> str:
118
+ import httpx
119
+ headers = {"Authorization": f"Bearer {settings.HF_API_TOKEN}"} if settings.HF_API_TOKEN else {}
120
+ url = f"https://api-inference.huggingface.co/models/{settings.HF_TEXT_MODEL}"
121
+ payload = {
122
+ "inputs": prompt,
123
+ "parameters": {
124
+ "max_new_tokens": max_new_tokens,
125
+ "temperature": temperature,
126
+ "return_full_text": False,
127
+ },
128
+ }
129
+ r = httpx.post(url, headers=headers, json=payload, timeout=120)
130
+ r.raise_for_status()
131
+ out = r.json()
132
+ # HF can return list[{generated_text}] or dict/text
133
+ if isinstance(out, list) and out and isinstance(out[0], dict) and out[0].get("generated_text"):
134
+ return out[0]["generated_text"]
135
+ if isinstance(out, dict) and out.get("generated_text"):
136
+ return out.get("generated_text")
137
+ if isinstance(out, str):
138
+ return out
139
+ return json.dumps(out)[:1000]
140
+
141
+
142
+ def _ollama_chat(messages: List[dict], model: str | None = None, base_url: str | None = None) -> str:
143
+ import httpx
144
+ model = model or settings.OLLAMA_MODEL
145
+ base = (base_url or settings.OLLAMA_BASE_URL).rstrip("/")
146
+ url = f"{base}/api/chat"
147
+ payload = {
148
+ "model": model,
149
+ "messages": messages,
150
+ "stream": False,
151
+ "options": {"temperature": 0.3}
152
+ }
153
+ r = httpx.post(url, json=payload, timeout=120)
154
+ r.raise_for_status()
155
+ data = r.json()
156
+ # Newer Ollama returns {message: {content: "..."}} when stream=False
157
+ if isinstance(data, dict):
158
+ if data.get("message") and isinstance(data["message"], dict):
159
+ return data["message"].get("content", "")
160
+ if data.get("response"):
161
+ return data.get("response", "")
162
+ return str(data)
163
+
164
+
165
+ def _lmstudio_chat(messages: List[dict]) -> str:
166
+ import httpx
167
+ base = settings.LMSTUDIO_BASE_URL.rstrip("/")
168
+ url = f"{base}/chat/completions"
169
+ model = settings.LMSTUDIO_MODEL or "local-model"
170
+ payload = {
171
+ "model": model,
172
+ "messages": messages,
173
+ "temperature": 0.3,
174
+ }
175
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {settings.OPENAI_API_KEY or 'lm-studio'}"}
176
+ r = httpx.post(url, headers=headers, json=payload, timeout=120)
177
+ r.raise_for_status()
178
+ data = r.json()
179
+ if isinstance(data, dict) and data.get("choices"):
180
+ ch0 = data["choices"][0]
181
+ # OpenAI-style
182
+ msg = ch0.get("message") if isinstance(ch0, dict) else None
183
+ if msg and isinstance(msg, dict):
184
+ return msg.get("content", "")
185
+ # Some variants return {choices:[{text:"..."}]}
186
+ if ch0.get("text"):
187
+ return ch0.get("text")
188
+ return str(data)
189
+
190
+
191
+ def _unified_chat(messages: List[dict], system: str | None = None) -> str:
192
+ provider = (settings.AI_PROVIDER or "hf").lower()
193
+ if provider == "ollama":
194
+ # Ollama supports chat natively
195
+ final_msgs = ([] if not system else [{"role": "system", "content": system}]) + messages
196
+ return _ollama_chat(final_msgs)
197
+ if provider == "lmstudio":
198
+ final_msgs = ([] if not system else [{"role": "system", "content": system}]) + messages
199
+ return _lmstudio_chat(final_msgs)
200
+ # Default: Hugging Face text generation with flattened chat
201
+ prompt = _flatten_messages(messages, system)
202
+ return _hf_generate_text(prompt, max_new_tokens=400, temperature=0.3)
203
+
204
+
205
+ def chat_completion(messages: List[dict], language: str) -> str:
206
+ system = build_system_prompt(language or "fr")
207
+ # Test compatibility: if openai_client is patched in tests, honor it first
208
+ try:
209
+ oc = openai_client() # patched MagicMock returns a mock without requiring real API key
210
+ final_messages = ([{"role": "system", "content": system}] + messages)
211
+ resp = oc.chat.completions.create(
212
+ model=getattr(settings, "OPENAI_MODEL", "gpt-4o-mini"),
213
+ messages=final_messages,
214
+ temperature=0.3,
215
+ )
216
+ # Support MagicMock structure used in tests
217
+ return getattr(resp.choices[0].message, "content", "")
218
+ except Exception:
219
+ pass
220
+ try:
221
+ return _unified_chat(messages, system)
222
+ except Exception as e:
223
+ logger.error(f"Chat completion failed: {e}")
224
+ return ""
225
+
226
+
227
+ def _transcribe_with_huggingface(audio_url: str, language: str | None = None) -> str:
228
+ """Transcribe audio using Hugging Face Inference API"""
229
+ import httpx
230
+ import librosa
231
+ import soundfile as sf
232
+
233
+ # Load and process audio
234
+ content: bytes
235
+ if isinstance(audio_url, str) and os.path.exists(audio_url):
236
+ with open(audio_url, "rb") as f:
237
+ content = f.read()
238
+ else:
239
+ with httpx.Client(timeout=60.0) as client:
240
+ resp = client.get(audio_url)
241
+ resp.raise_for_status()
242
+ content = resp.content
243
+
244
+ # Process audio with librosa
245
+ raw_buf = io.BytesIO(content)
246
+ raw_buf.seek(0)
247
+ y, sr = librosa.load(raw_buf, sr=None, mono=False)
248
+ if y.ndim > 1:
249
+ y = librosa.to_mono(y)
250
+ if sr != 16000:
251
+ y = librosa.resample(y, orig_sr=sr, target_sr=16000)
252
+ sr = 16000
253
+
254
+ # Denoise
255
+ import noisereduce as nr
256
+ noise_frames = int(sr * 0.5)
257
+ if noise_frames > 0 and len(y) > noise_frames:
258
+ noise_clip = y[:noise_frames]
259
+ else:
260
+ noise_clip = y
261
+ y = nr.reduce_noise(y=y, y_noise=noise_clip, sr=sr)
262
+
263
+ # Encode to WAV
264
+ wav_buf = io.BytesIO()
265
+ sf.write(wav_buf, y, sr, format="WAV")
266
+ wav_buf.seek(0)
267
+ audio_bytes = wav_buf.read()
268
+
269
+ # Try configured HF ASR model first, then fallbacks
270
+ headers = {"Authorization": f"Bearer {settings.HF_API_TOKEN}"} if settings.HF_API_TOKEN else {}
271
+ models = [
272
+ settings.HF_ASR_MODEL,
273
+ "openai/whisper-large-v3",
274
+ "facebook/wav2vec2-large-960h-lv60-self"
275
+ ]
276
+
277
+ for model in models:
278
+ try:
279
+ url = f"https://api-inference.huggingface.co/models/{model}"
280
+ params = {"task": "transcribe"}
281
+ if language:
282
+ params["language"] = language
283
+
284
+ with httpx.Client(timeout=120.0) as client:
285
+ r = client.post(
286
+ url,
287
+ headers={**headers, "Content-Type": "audio/wav"},
288
+ params=params,
289
+ content=audio_bytes,
290
+ )
291
+ r.raise_for_status()
292
+ out = r.json()
293
+
294
+ if isinstance(out, dict) and out.get("text"):
295
+ logger.info(f"HF transcription successful with model: {model}")
296
+ return out["text"]
297
+ if isinstance(out, list) and out and isinstance(out[0], dict) and out[0].get("text"):
298
+ logger.info(f"HF transcription successful with model: {model}")
299
+ return out[0]["text"]
300
+ except Exception as e:
301
+ logger.warning(f"HF model {model} failed: {e}")
302
+ continue
303
+
304
+ return ""
305
+
306
+
307
+ def transcribe_audio(audio_url: str | None, language: str | None = None) -> str:
308
+ if not audio_url:
309
+ return ""
310
+ # Test compatibility: if openai_client is patched, try it first
311
+ try:
312
+ oc = openai_client()
313
+ import httpx
314
+ import librosa
315
+ import soundfile as sf
316
+ # Load audio bytes (local path or URL)
317
+ if isinstance(audio_url, str) and os.path.exists(audio_url):
318
+ with open(audio_url, "rb") as f:
319
+ raw_bytes = f.read()
320
+ else:
321
+ with httpx.Client(timeout=60.0) as client:
322
+ r = client.get(audio_url)
323
+ r.raise_for_status()
324
+ raw_bytes = r.content
325
+ # Ensure 16 kHz mono and noise reduction before Whisper
326
+ raw_buf = io.BytesIO(raw_bytes)
327
+ raw_buf.seek(0)
328
+ y, sr = librosa.load(raw_buf, sr=None, mono=False)
329
+ if hasattr(y, "ndim") and getattr(y, "ndim", 1) > 1:
330
+ y = librosa.to_mono(y) # type: ignore
331
+ if sr != 16000:
332
+ y = librosa.resample(y, orig_sr=sr, target_sr=16000)
333
+ sr = 16000
334
+ import noisereduce as nr
335
+ noise_frames = int(sr * 0.5)
336
+ noise_clip = y[:noise_frames] if len(y) > noise_frames else y
337
+ y = nr.reduce_noise(y=y, y_noise=noise_clip, sr=sr)
338
+ # Encode to WAV file-like for OpenAI Whisper API
339
+ wav_buf = io.BytesIO()
340
+ sf.write(wav_buf, y, sr, format="WAV")
341
+ wav_buf.seek(0)
342
+ wav_buf.name = "input.wav" # some clients expect a name
343
+ tr = oc.audio.transcriptions.create(
344
+ model=getattr(settings, "OPENAI_WHISPER_MODEL", "whisper-1"),
345
+ file=wav_buf,
346
+ language=language if language else None,
347
+ )
348
+ return getattr(tr, "text", "") or (tr.get("text") if isinstance(tr, dict) else "") or ""
349
+ except Exception:
350
+ pass
351
+ # Prefer HF ASR
352
+ try:
353
+ import httpx
354
+ import mimetypes
355
+ import librosa
356
+ import soundfile as sf
357
+
358
+ # 1) Load audio from local path or URL
359
+ content: bytes
360
+ if isinstance(audio_url, str) and os.path.exists(audio_url):
361
+ with open(audio_url, "rb") as f:
362
+ content = f.read()
363
+ else:
364
+ with httpx.Client(timeout=60.0) as client:
365
+ resp = client.get(audio_url)
366
+ resp.raise_for_status()
367
+ content = resp.content
368
+
369
+ # 2) Decode to waveform (mono, 16k)
370
+ raw_buf = io.BytesIO(content)
371
+ raw_buf.seek(0)
372
+ y, sr = librosa.load(raw_buf, sr=None, mono=False)
373
+ if hasattr(y, 'ndim') and getattr(y, 'ndim', 1) > 1:
374
+ import numpy as np
375
+ y = librosa.to_mono(y) # type: ignore
376
+ if sr != 16000:
377
+ y = librosa.resample(y, orig_sr=sr, target_sr=16000)
378
+ sr = 16000
379
+
380
+ # 3) Denoise
381
+ import noisereduce as nr
382
+ noise_frames = int(sr * 0.5)
383
+ noise_clip = y[:noise_frames] if len(y) > noise_frames else y
384
+ y = nr.reduce_noise(y=y, y_noise=noise_clip, sr=sr)
385
+
386
+ # 4) Encode WAV
387
+ wav_buf = io.BytesIO()
388
+ sf.write(wav_buf, y, sr, format="WAV")
389
+ wav_buf.seek(0)
390
+ audio_bytes = wav_buf.read()
391
+
392
+ # 5) HF
393
+ headers = {"Authorization": f"Bearer {settings.HF_API_TOKEN}"} if settings.HF_API_TOKEN else {}
394
+ asr_models = [settings.HF_ASR_MODEL, "openai/whisper-large-v3"]
395
+ for model in asr_models:
396
+ try:
397
+ url = f"https://api-inference.huggingface.co/models/{model}"
398
+ with httpx.Client(timeout=180.0) as client:
399
+ r = client.post(url, headers={**headers, "Content-Type": "audio/wav"}, content=audio_bytes)
400
+ r.raise_for_status()
401
+ out = r.json()
402
+ if isinstance(out, dict) and out.get("text"):
403
+ return out["text"]
404
+ if isinstance(out, list) and out and isinstance(out[0], dict) and out[0].get("text"):
405
+ return out[0]["text"]
406
+ except Exception as e:
407
+ logger.warning(f"HF ASR model {model} failed: {e}")
408
+ continue
409
+ return ""
410
+ except Exception as exc:
411
+ logger.error(f"HF transcription failed: {exc}")
412
+ return ""
413
+
414
+
415
+ def _hf_image_caption(image_ref: str) -> str:
416
+ """Generate a caption for an image using HF image-to-text model."""
417
+ import httpx
418
+ # Load bytes from URL, file path, or data URI
419
+ data: bytes
420
+ if isinstance(image_ref, str) and os.path.exists(image_ref):
421
+ with open(image_ref, "rb") as f:
422
+ data = f.read()
423
+ elif isinstance(image_ref, str) and image_ref.startswith("data:"):
424
+ try:
425
+ b64 = image_ref.split(",", 1)[1]
426
+ data = base64.b64decode(b64)
427
+ except Exception:
428
+ data = b""
429
+ else:
430
+ with httpx.Client(timeout=60.0) as client:
431
+ r = client.get(image_ref)
432
+ r.raise_for_status()
433
+ data = r.content
434
+ headers = {"Authorization": f"Bearer {settings.HF_API_TOKEN}"} if settings.HF_API_TOKEN else {}
435
+ url = f"https://api-inference.huggingface.co/models/{settings.HF_VISION_CAPTION_MODEL}"
436
+ r = httpx.post(url, headers=headers, content=data, timeout=120)
437
+ r.raise_for_status()
438
+ out = r.json()
439
+ if isinstance(out, list) and out and isinstance(out[0], dict):
440
+ return out[0].get("generated_text") or out[0].get("caption", "") or ""
441
+ if isinstance(out, dict):
442
+ return out.get("generated_text") or out.get("caption", "") or ""
443
+ return ""
444
+
445
+
446
+ def _ollama_vision(image_ref: str, prompt: str) -> str:
447
+ import httpx
448
+ # Prepare image bytes as base64 for Ollama
449
+ if isinstance(image_ref, str) and os.path.exists(image_ref):
450
+ with open(image_ref, "rb") as f:
451
+ img_bytes = f.read()
452
+ elif isinstance(image_ref, str) and image_ref.startswith("data:"):
453
+ try:
454
+ img_bytes = base64.b64decode(image_ref.split(",", 1)[1])
455
+ except Exception:
456
+ img_bytes = b""
457
+ else:
458
+ with httpx.Client(timeout=60.0) as client:
459
+ r = client.get(image_ref)
460
+ r.raise_for_status()
461
+ img_bytes = r.content
462
+ b64img = base64.b64encode(img_bytes).decode("ascii")
463
+ base = settings.OLLAMA_BASE_URL.rstrip("/")
464
+ url = f"{base}/api/generate"
465
+ payload = {
466
+ "model": settings.OLLAMA_VISION_MODEL,
467
+ "prompt": prompt or "Describe the medically relevant observations in this image.",
468
+ "images": [b64img],
469
+ "stream": False,
470
+ "options": {"temperature": 0.2},
471
+ }
472
+ r = httpx.post(url, json=payload, timeout=180)
473
+ r.raise_for_status()
474
+ data = r.json()
475
+ # Non-stream returns may include 'response'
476
+ if isinstance(data, dict) and data.get("response"):
477
+ return data["response"]
478
+ return str(data)
479
+
480
+
481
+ def analyze_image(image_url: str, prompt: str | None) -> str:
482
+ # Test compatibility: if openai_client is patched, use it first
483
+ try:
484
+ oc = openai_client()
485
+ content = []
486
+ if prompt:
487
+ content.append({"type": "text", "text": prompt})
488
+ content.append({"type": "image_url", "image_url": {"url": image_url}})
489
+ resp = oc.chat.completions.create(
490
+ model=getattr(settings, "OPENAI_MODEL", "gpt-4o-mini"),
491
+ messages=[{"role": "user", "content": content}],
492
+ )
493
+ return getattr(resp.choices[0].message, "content", "") or ""
494
+ except Exception:
495
+ pass
496
+ provider = (settings.AI_PROVIDER or "hf").lower()
497
+ try:
498
+ if provider == "ollama":
499
+ return _ollama_vision(image_url, prompt or "Analyze this medical image and report relevant findings.")
500
+ # Default HF: caption + chat reasoning
501
+ caption = _hf_image_caption(image_url)
502
+ reasoning_prompt = (
503
+ (prompt or "Analyze this medical image and report relevant findings, red flags, and advice.")
504
+ + f"\n\nImage caption: {caption}"
505
+ )
506
+ return _hf_generate_text(reasoning_prompt, max_new_tokens=250, temperature=0.2)
507
+ except Exception as e:
508
+ logger.error(f"Image analysis failed: {e}")
509
+ return ""
510
+
511
+
512
+ def translate_text(text: str, target_language: str) -> str:
513
+ # Prefer HF dedicated translation model if available (only if token is set to avoid network in tests)
514
+ if settings.HF_API_TOKEN:
515
+ try:
516
+ import httpx
517
+ headers = {"Authorization": f"Bearer {settings.HF_API_TOKEN}"}
518
+ payload = {"inputs": text}
519
+ model = settings.HF_TRANSLATION_MODEL
520
+ url = f"https://api-inference.huggingface.co/models/{model}"
521
+ r = httpx.post(url, headers=headers, json=payload, timeout=60)
522
+ if r.status_code == 200:
523
+ out = r.json()
524
+ if isinstance(out, list) and out and isinstance(out[0], dict) and out[0].get("translation_text"):
525
+ return out[0]["translation_text"]
526
+ except Exception as exc: # pragma: no cover
527
+ logger.warning(f"HF translation failed: {exc}")
528
+ # Test compatibility: try OpenAI-style client if patched
529
+ try:
530
+ oc = openai_client()
531
+ resp = oc.chat.completions.create(
532
+ model=getattr(settings, "OPENAI_MODEL", "gpt-4o-mini"),
533
+ messages=[
534
+ {"role": "system", "content": "You translate text faithfully."},
535
+ {"role": "user", "content": f"Translate to {target_language}: {text}"},
536
+ ],
537
+ )
538
+ return getattr(resp.choices[0].message, "content", None) or text
539
+ except Exception:
540
+ pass
541
+ # Fallback via unified chat with explicit instruction
542
+ prompt = f"Translate to {target_language} (preserve meaning and medical accuracy): {text}"
543
+ try:
544
+ return _unified_chat([{"role": "user", "content": prompt}], system=None) or text
545
+ except Exception:
546
+ return text
547
+
app/routers/ai.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Body, UploadFile, File, Form, Request
2
+ from pydantic import BaseModel
3
+ from typing import Optional, List
4
+ from app.ai_agent.agent import handle_user_query, create_medical_agent, search_cases_with_timeout, register_attachment
5
+ import logging
6
+ import asyncio
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ router = APIRouter()
11
+
12
+ # Basic size limits (bytes)
13
+ MAX_IMAGE_BYTES = 6_000_000 # ~6 MB
14
+ MAX_AUDIO_BYTES = 10 * 1024 * 1024 # 10 MB
15
+ MAX_FILE_BYTES = 2 * 1024 * 1024 # 2 MB
16
+
17
+ class AIRequest(BaseModel):
18
+ text: Optional[str] = None
19
+ image: Optional[str] = None # URL ou base64
20
+ images: Optional[List[str]] = None # URLs ou base64 multiples
21
+ audio: Optional[str] = None # URL ou base64
22
+ audios: Optional[List[str]] = None # URLs ou base64 multiples
23
+ want_stats: Optional[bool] = False
24
+ location: Optional[str] = None # Pour la recherche d'établissements
25
+ files: Optional[List[str]] = None # URLs ou base64 de fichiers (petits)
26
+ file_names: Optional[List[str]] = None # Noms des fichiers correspondants
27
+ agent_mode: Optional[str] = None # 'messages' (zero-shot), 'string', or 'legacy'
28
+
29
+ class AIResponse(BaseModel):
30
+ result: str
31
+ stats: Optional[dict] = None
32
+
33
+ @router.post("/ai", response_model=AIResponse)
34
+ async def ai_endpoint(req: AIRequest = Body(...)):
35
+ # Construction de la requête utilisateur pour l'agent
36
+ user_query = ""
37
+ if req.text:
38
+ user_query += req.text + "\n"
39
+ if req.image:
40
+ user_query += f"[Image fournie]\n"
41
+ if req.audio:
42
+ user_query += f"[Audio fourni]\n"
43
+ if req.location:
44
+ user_query += f"[Localisation: {req.location}]\n"
45
+
46
+ # Appel à l'agent LangChain dans un thread pour éviter de bloquer l'event loop
47
+ result = await asyncio.to_thread(
48
+ handle_user_query,
49
+ user_query,
50
+ req.location,
51
+ req.image,
52
+ req.audio,
53
+ req.files or [],
54
+ req.file_names or [],
55
+ req.images or [],
56
+ req.audios or [],
57
+ req.agent_mode,
58
+ )
59
+
60
+ stats = None
61
+ if req.want_stats:
62
+ stats = {}
63
+ if req.text:
64
+ stats["word_count"] = len(req.text.split())
65
+ if req.image:
66
+ stats["image_url_or_b64_length"] = len(req.image)
67
+ if req.images:
68
+ stats["images_count"] = len(req.images)
69
+ if req.audio:
70
+ stats["audio_url_or_b64_length"] = len(req.audio)
71
+ if req.audios:
72
+ stats["audios_count"] = len(req.audios)
73
+ if req.files:
74
+ stats["files_count"] = len(req.files)
75
+ # Ajoute d'autres stats pertinentes ici
76
+
77
+ return AIResponse(result=result, stats=stats)
78
+
79
+ # =============================================================================
80
+ # Multipart/form-data endpoint for uploads
81
+ # =============================================================================
82
+ @router.post("/ai/form", response_model=AIResponse)
83
+ async def ai_form_endpoint(
84
+ request: Request,
85
+ text: Optional[str] = Form(None),
86
+ location: Optional[str] = Form(None),
87
+ want_stats: Optional[bool] = Form(False),
88
+ agent_mode: Optional[str] = Form(None),
89
+ ):
90
+ # Parse the raw form to accept both UploadFile and string references
91
+ try:
92
+ form = await request.form()
93
+ except Exception:
94
+ form = None
95
+
96
+ image_refs: List[str] = []
97
+ audio_refs: List[str] = []
98
+ file_refs: List[str] = []
99
+ file_names: List[str] = []
100
+
101
+ if form:
102
+ # Helpers to iterate possible single/plural fields
103
+ def _iter_values(keys: List[str]):
104
+ for key in keys:
105
+ for v in form.getlist(key):
106
+ yield v
107
+
108
+ # Images
109
+ for v in _iter_values(["image", "images"]):
110
+ if isinstance(v, UploadFile):
111
+ try:
112
+ data = await v.read()
113
+ if data and len(data) > MAX_IMAGE_BYTES:
114
+ raise HTTPException(status_code=413, detail=f"Image '{v.filename}' trop volumineuse (> 6 Mo)")
115
+ ref = register_attachment(data, filename=v.filename, mime=v.content_type)
116
+ image_refs.append(ref)
117
+ finally:
118
+ await v.close()
119
+ elif isinstance(v, str) and v.strip():
120
+ image_refs.append(v.strip())
121
+
122
+ # Audios
123
+ for v in _iter_values(["audio", "audios"]):
124
+ if isinstance(v, UploadFile):
125
+ try:
126
+ data = await v.read()
127
+ if data and len(data) > MAX_AUDIO_BYTES:
128
+ raise HTTPException(status_code=413, detail=f"Audio '{v.filename}' trop volumineux (> 10 Mo)")
129
+ ref = register_attachment(data, filename=v.filename, mime=v.content_type)
130
+ audio_refs.append(ref)
131
+ finally:
132
+ await v.close()
133
+ elif isinstance(v, str) and v.strip():
134
+ audio_refs.append(v.strip())
135
+
136
+ # Files (text/PDF)
137
+ string_file_names = form.getlist("file_names") if "file_names" in form else []
138
+ string_file_index = 0
139
+ for v in _iter_values(["file", "files"]):
140
+ if isinstance(v, UploadFile):
141
+ try:
142
+ data = await v.read()
143
+ if data and len(data) > MAX_FILE_BYTES:
144
+ raise HTTPException(status_code=413, detail=f"Fichier '{v.filename}' trop volumineux (> 2 Mo)")
145
+ ref = register_attachment(data, filename=v.filename, mime=v.content_type)
146
+ file_refs.append(ref)
147
+ file_names.append(v.filename or "file")
148
+ finally:
149
+ await v.close()
150
+ elif isinstance(v, str) and v.strip():
151
+ file_refs.append(v.strip())
152
+ # try map a provided filename
153
+ name = None
154
+ if string_file_names and string_file_index < len(string_file_names):
155
+ maybe = string_file_names[string_file_index]
156
+ if isinstance(maybe, str) and maybe.strip():
157
+ name = maybe.strip()
158
+ file_names.append(name or "file")
159
+ string_file_index += 1
160
+
161
+ # Validate agent_mode if provided
162
+ if agent_mode and agent_mode.lower() not in {"messages", "string", "legacy"}:
163
+ raise HTTPException(status_code=400, detail="agent_mode invalide: utilisez 'messages', 'string' ou 'legacy'")
164
+
165
+ # Construct user query summary (all inputs optional)
166
+ user_query = (text or "").strip()
167
+ if image_refs:
168
+ user_query += ("\n" if user_query else "") + "[Image(s) fournie(s)]"
169
+ if audio_refs:
170
+ user_query += ("\n" if user_query else "") + "[Audio(s) fourni(s)]"
171
+ if location:
172
+ user_query += ("\n" if user_query else "") + f"[Localisation: {location}]"
173
+ # All inputs are optional; proceed even if user_query is empty.
174
+
175
+ # Invoke agent with attach:// references
176
+ result = await asyncio.to_thread(
177
+ handle_user_query,
178
+ user_query,
179
+ location,
180
+ None, # single image param not used here
181
+ None, # single audio param not used here
182
+ file_refs,
183
+ file_names,
184
+ image_refs,
185
+ audio_refs,
186
+ agent_mode,
187
+ )
188
+
189
+ stats = None
190
+ if want_stats:
191
+ stats = {
192
+ "word_count": len(text.split()) if text else 0,
193
+ "images_count": len(image_refs),
194
+ "audios_count": len(audio_refs),
195
+ "files_count": len(file_refs),
196
+ }
197
+
198
+ return AIResponse(result=result, stats=stats)
199
+
200
+ # =============================================================================
201
+ # DEBUG ENDPOINTS to isolate the hanging issue
202
+ # =============================================================================
203
+
204
+ @router.get("/ai/debug/create-agent", tags=["AI Debug"])
205
+ async def debug_create_agent():
206
+ """Tests if creating the medical agent works without hanging."""
207
+ logger.info("--- DEBUG: Testing agent creation ---")
208
+ try:
209
+ agent = create_medical_agent()
210
+ if agent:
211
+ logger.info("--- DEBUG: Agent creation successful ---")
212
+ return {"status": "Agent created successfully"}
213
+ else:
214
+ logger.error("--- DEBUG: Agent creation failed, returned None ---")
215
+ raise HTTPException(status_code=500, detail="Agent creation returned None")
216
+ except Exception as e:
217
+ logger.error(f"--- DEBUG: Agent creation failed with exception: {e} ---", exc_info=True)
218
+ raise HTTPException(status_code=500, detail=f"Agent creation failed: {e}")
219
+
220
+ @router.get("/ai/debug/search-data", tags=["AI Debug"])
221
+ async def debug_search_data(q: str = "fever and headache"):
222
+ """Tests if the clinical data search works without hanging."""
223
+ logger.info(f"--- DEBUG: Testing data search with query: '{q}' ---")
224
+ try:
225
+ context = search_cases_with_timeout(q, timeout=15)
226
+ logger.info("--- DEBUG: Data search successful ---")
227
+ return {"status": "Data search completed", "context_found": bool(context), "context": context}
228
+ except Exception as e:
229
+ logger.error(f"--- DEBUG: Data search failed with exception: {e} ---", exc_info=True)
230
+ raise HTTPException(status_code=500, detail=f"Data search failed: {e}")
231
+
232
+ @router.get("/ai/debug/invoke-agent", tags=["AI Debug"])
233
+ async def debug_invoke_agent(q: str = "hello, how are you?"):
234
+ """Tests if invoking the agent with a simple query works without hanging."""
235
+ logger.info(f"--- DEBUG: Testing agent invocation with query: '{q}' ---")
236
+ try:
237
+ agent = create_medical_agent()
238
+ logger.info("--- DEBUG: Agent created, invoking... ---")
239
+ response = await asyncio.to_thread(agent.invoke, {"input": q})
240
+ logger.info("--- DEBUG: Agent invocation successful ---")
241
+ return {"status": "Agent invoked successfully", "response": response}
242
+ except Exception as e:
243
+ logger.error(f"--- DEBUG: Agent invocation failed with exception: {e} ---", exc_info=True)
244
+ raise HTTPException(status_code=500, detail=f"Agent invocation failed: {e}")
app/routers/audio.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from app.schemas import TranscribeRequest, TranscribeResponse
3
+ from app.utils.security import get_current_user
4
+ from app.ai_services import transcribe_audio
5
+
6
+
7
+ router = APIRouter()
8
+
9
+
10
+ @router.post("/transcribe", response_model=TranscribeResponse)
11
+ def transcribe(req: TranscribeRequest, user=Depends(get_current_user)):
12
+ text = transcribe_audio(req.audio_url, req.language)
13
+ return TranscribeResponse(text=text)
14
+
15
+
16
+ async def handle_transcription_via_gateway(payload: dict, current_user):
17
+ if current_user is None:
18
+ raise HTTPException(status_code=401, detail="Authentification requise")
19
+ req = TranscribeRequest(**payload)
20
+ return transcribe(req, user=current_user) # type: ignore
21
+
app/routers/cameroon_data.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from pydantic import BaseModel
3
+ from typing import Any, Dict, List
4
+
5
+ from app.services.cameroon_data import get_cameroon_data
6
+
7
+
8
+ router = APIRouter()
9
+
10
+
11
+ class SimilarCasesRequest(BaseModel):
12
+ query_text: str
13
+ top_k: int = 10
14
+
15
+
16
+ @router.get("/stats/overview")
17
+ def stats_overview() -> Dict[str, Any]:
18
+ data = get_cameroon_data()
19
+ return data.stats_overview()
20
+
21
+
22
+ @router.get("/stats/diseases/{disease_name}")
23
+ def stats_disease(disease_name: str) -> Dict[str, Any]:
24
+ data = get_cameroon_data()
25
+ return data.stats_disease(disease_name)
26
+
27
+
28
+ @router.post("/search/similar-cases")
29
+ def search_similar_cases(req: SimilarCasesRequest) -> Dict[str, Any]:
30
+ data = get_cameroon_data()
31
+ results = data.search_similar_cases(req.query_text, top_k=req.top_k)
32
+ return {
33
+ "similar_cases": [
34
+ {
35
+ "summary_id": r.summary_id,
36
+ "diagnosis": r.diagnosis,
37
+ "age": r.age,
38
+ "gender": r.gender,
39
+ "summary_snippet": r.summary_snippet,
40
+ "similarity_score": r.similarity_score,
41
+ }
42
+ for r in results
43
+ ]
44
+ }
45
+
46
+
47
+ @router.get("/patterns/seasonal")
48
+ def patterns_seasonal() -> Dict[str, Any]:
49
+ data = get_cameroon_data()
50
+ return data.seasonal_patterns()
51
+
52
+
53
+ @router.get("/patterns/age-gender")
54
+ def patterns_age_gender() -> Dict[str, Any]:
55
+ data = get_cameroon_data()
56
+ return data.age_gender_distribution()
57
+
58
+
app/routers/chat.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from typing import List, Dict, Any
3
+ import os
4
+ import base64
5
+ import mimetypes
6
+ from app.schemas import ChatRequest, ChatResponse, TranslateRequest, TranslateResponse, UnifiedChatRequest, UnifiedChatResponse, UnifiedContext
7
+ from app.utils.security import get_current_user
8
+ from app.utils.helpers import medical_disclaimer, emergency_triage
9
+ from app.ai_services import chat_completion, translate_text, detect_language
10
+
11
+ from app.services.cameroon_data import get_cameroon_data
12
+
13
+
14
+ router = APIRouter()
15
+
16
+
17
+ @router.post("/chat", response_model=ChatResponse)
18
+ def chat(req: ChatRequest, user=Depends(get_current_user)):
19
+ try:
20
+ raise HTTPException(status_code=501, detail="Historique de conversation désactivé (pas de base de données)")
21
+
22
+ # Unreachable since DB disabled
23
+
24
+ # Build conversation history
25
+ history = [{"role": "user", "content": req.text}]
26
+
27
+ # Get AI response
28
+ answer = chat_completion(history, req.language)
29
+ answer = f"{answer}\n\n{medical_disclaimer(req.language)}"
30
+
31
+ return ChatResponse(reply=answer, conversation_id=0)
32
+
33
+ except HTTPException:
34
+ raise
35
+ except Exception as e:
36
+ raise HTTPException(status_code=500, detail=f"Erreur serveur: {str(e)}")
37
+
38
+
39
+ @router.post("/translate", response_model=TranslateResponse)
40
+ def translate(req: TranslateRequest):
41
+ out = translate_text(req.text, req.target_language)
42
+ return TranslateResponse(text=out)
43
+
44
+
45
+ # Gateway helpers
46
+ async def handle_chat_via_gateway(payload: dict, current_user):
47
+ req = ChatRequest(**payload)
48
+ if current_user is None:
49
+ raise HTTPException(status_code=401, detail="Authentification requise")
50
+ return chat(req, user=current_user) # type: ignore
51
+
52
+
53
+ async def handle_translate_via_gateway(payload: dict, current_user):
54
+ req = TranslateRequest(**payload)
55
+ return translate(req)
56
+
57
+
58
+ @router.post("/chat/unified", response_model=UnifiedChatResponse)
59
+ async def chat_unified(req: UnifiedChatRequest):
60
+ try:
61
+ # Step 1 - preprocess by type
62
+ processed_text = req.message
63
+ detected_lang = req.language or None
64
+ if req.message_type == "audio":
65
+ from app.ai_services import transcribe_audio
66
+ # Auto-detect after transcription if language not specified
67
+ processed_text = transcribe_audio(req.message, None)
68
+ if processed_text:
69
+ detected_lang = detect_language(processed_text)
70
+ # If transcription failed, stop here to avoid sending raw audio to GPT
71
+ if not processed_text or processed_text.strip() == "":
72
+ raise HTTPException(status_code=400, detail="Transcription audio non disponible. Veuillez fournir un audio plus clair.")
73
+ elif req.message_type == "image":
74
+ from app.ai_services import analyze_image
75
+ image_input = req.message
76
+ # Support local file paths by converting to data URL
77
+ try:
78
+ if isinstance(image_input, str) and os.path.exists(image_input):
79
+ mime, _ = mimetypes.guess_type(image_input)
80
+ mime = mime or "image/jpeg"
81
+ with open(image_input, "rb") as f:
82
+ b64 = base64.b64encode(f.read()).decode("ascii")
83
+ image_input = f"data:{mime};base64,{b64}"
84
+ except Exception:
85
+ # Fallback to original value if any error occurs
86
+ pass
87
+ processed_text = analyze_image(image_input, "Analyse l'image médicale et décris les signes cliniques pertinents.")
88
+ # Detect language from the analysis output if not provided
89
+ if not req.language:
90
+ detected_lang = detect_language(processed_text)
91
+
92
+ # Step 2 - Cameroon context analysis
93
+ data = get_cameroon_data()
94
+ similar = data.search_similar_cases(processed_text, top_k=10)
95
+ disease_counts: Dict[str, int] = {}
96
+ for r in similar:
97
+ if r.diagnosis:
98
+ disease_counts[r.diagnosis] = disease_counts.get(r.diagnosis, 0) + 1
99
+ most_probable = max(disease_counts, key=disease_counts.get) if disease_counts else None
100
+ if len(similar) >= 7:
101
+ confidence = "high"
102
+ elif len(similar) >= 3:
103
+ confidence = "medium"
104
+ else:
105
+ confidence = "low"
106
+
107
+ # Step 3 - General AI call
108
+ history_msgs = []
109
+ if req.history:
110
+ for m in req.history[-6:]:
111
+ history_msgs.append({"role": "user", "content": m})
112
+ history_msgs.append({"role": "user", "content": processed_text})
113
+ # If text input and language not provided, detect it
114
+ if req.message_type == "text" and (not req.language):
115
+ detected_lang = detect_language(processed_text)
116
+ language_to_use = detected_lang or "fr"
117
+ reply = chat_completion(history_msgs, language_to_use)
118
+
119
+ # Enrich with disclaimer
120
+ reply = f"{reply}\n\n{medical_disclaimer(language_to_use)}"
121
+
122
+ # Step 4 - Format response
123
+ ctx = UnifiedContext(
124
+ similar_cases_found=len(similar),
125
+ most_probable_diagnosis=most_probable,
126
+ confidence_level=confidence,
127
+ advice="Consultez un centre de santé si les symptômes persistent ou s'aggravent."
128
+ )
129
+ return UnifiedChatResponse(
130
+ response=reply,
131
+ context=ctx,
132
+ suggested_actions=["Rechercher centres de santé", "En savoir plus"],
133
+ language=language_to_use,
134
+ )
135
+ except HTTPException as he:
136
+ # Propagate intended HTTP errors (e.g., 400 for bad audio transcription)
137
+ raise he
138
+ except Exception as e:
139
+ raise HTTPException(status_code=500, detail=f"Erreur serveur: {str(e)}")
app/routers/images.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from app.schemas import AnalyzeImageRequest, AnalyzeImageResponse
3
+ from app.utils.security import get_current_user
4
+ from app.ai_services import analyze_image
5
+
6
+
7
+ router = APIRouter()
8
+
9
+
10
+ @router.post("/analyze-image", response_model=AnalyzeImageResponse)
11
+ def analyze(req: AnalyzeImageRequest, user=Depends(get_current_user)):
12
+ result = analyze_image(req.image_url, req.prompt)
13
+ return AnalyzeImageResponse(result=result)
14
+
15
+
16
+ async def handle_analyze_image_via_gateway(payload: dict, current_user):
17
+ if current_user is None:
18
+ raise HTTPException(status_code=401, detail="Authentification requise")
19
+ req = AnalyzeImageRequest(**payload)
20
+ return analyze(req, user=current_user) # type: ignore
21
+
22
+
23
+ async def handle_analyze_multimodal_via_gateway(payload: dict, current_user):
24
+ if current_user is None:
25
+ raise HTTPException(status_code=401, detail="Authentification requise")
26
+ image_url = payload.get("image_url")
27
+ text = payload.get("text")
28
+ if not image_url or not text:
29
+ raise HTTPException(status_code=400, detail="Champs 'image_url' et 'text' requis")
30
+ # Appelle la fonction d'analyse d'image et combine avec le texte
31
+ from app.ai_services import analyze_image, chat_completion
32
+ image_analysis = analyze_image(image_url, text)
33
+ # Combine l'analyse image et texte pour la réponse IA
34
+ prompt = f"Analyse l'image médicale suivante et le texte associé.\n\nImage: {image_url}\n\nTexte: {text}\n\nAnalyse IA de l'image: {image_analysis}\n\nDonne une synthèse clinique globale."
35
+ reply = chat_completion([{"role": "user", "content": prompt}], None)
36
+ return {"result": reply}
37
+
app/routers/users.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Depends, HTTPException
2
+ from fastapi.security import OAuth2PasswordRequestForm
3
+ from typing import Optional
4
+
5
+ from app.schemas import UserCreate, UserOut, Token
6
+ from app.utils.security import hash_password, verify_password, create_access_token
7
+ from app.utils.config import settings
8
+
9
+
10
+
11
+ router = APIRouter()
12
+
13
+
14
+ @router.post("/users/register", response_model=UserOut)
15
+ def register(user_in: UserCreate):
16
+ try:
17
+ raise HTTPException(status_code=501, detail="Inscription désactivée (pas de base de données)")
18
+
19
+ except HTTPException:
20
+ raise
21
+ except Exception as e:
22
+ raise HTTPException(status_code=500, detail=f"Erreur serveur: {str(e)}")
23
+
24
+
25
+ @router.post("/users/login", response_model=Token)
26
+ def login(form_data: OAuth2PasswordRequestForm = Depends()):
27
+ try:
28
+ raise HTTPException(status_code=501, detail="Connexion désactivée (pas de base de données)")
29
+
30
+ except HTTPException:
31
+ raise
32
+ except Exception as e:
33
+ raise HTTPException(status_code=500, detail=f"Erreur serveur: {str(e)}")
34
+
app/schemas.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, EmailStr, Field
2
+ from typing import Optional, Literal, List
3
+ from datetime import datetime
4
+
5
+
6
+ class UserCreate(BaseModel):
7
+ email: Optional[EmailStr] = None
8
+ password: Optional[str] = None
9
+ preferred_language: str = Field(default="fr")
10
+
11
+
12
+ class UserOut(BaseModel):
13
+ id: str # Supabase auth user UUID
14
+ preferred_language: str
15
+ email: Optional[EmailStr] = None
16
+ created_at: datetime
17
+
18
+ class Config:
19
+ from_attributes = True
20
+
21
+
22
+ class Token(BaseModel):
23
+ access_token: str
24
+ token_type: str = "bearer"
25
+
26
+
27
+ class ConversationCreate(BaseModel):
28
+ user_id: Optional[str] = None
29
+ context: Optional[str] = ""
30
+
31
+
32
+ class ConversationOut(BaseModel):
33
+ id: int
34
+ user_id: Optional[str]
35
+ started_at: datetime
36
+ context: str
37
+
38
+ class Config:
39
+ from_attributes = True
40
+
41
+
42
+ class MessageCreate(BaseModel):
43
+ conversation_id: int
44
+ message_type: Literal["text", "audio", "image"] = "text"
45
+ content: str
46
+ role: Literal["user", "assistant"] = "user"
47
+
48
+
49
+ class MessageOut(BaseModel):
50
+ id: int
51
+ conversation_id: int
52
+ message_type: str
53
+ content: str
54
+ role: str
55
+ timestamp: datetime
56
+
57
+ class Config:
58
+ from_attributes = True
59
+
60
+
61
+ class ChatRequest(BaseModel):
62
+ conversation_id: Optional[int] = None
63
+ text: str
64
+ language: str = "fr"
65
+
66
+
67
+ class ChatResponse(BaseModel):
68
+ reply: str
69
+ conversation_id: int
70
+
71
+
72
+ class TranscribeRequest(BaseModel):
73
+ audio_url: Optional[str] = None
74
+ language: Optional[str] = None
75
+
76
+
77
+ class TranscribeResponse(BaseModel):
78
+ text: str
79
+
80
+
81
+ class AnalyzeImageRequest(BaseModel):
82
+ image_url: str
83
+ prompt: Optional[str] = None
84
+
85
+
86
+ class AnalyzeImageResponse(BaseModel):
87
+ result: str
88
+
89
+
90
+ class TranslateRequest(BaseModel):
91
+ text: str
92
+ target_language: str
93
+
94
+
95
+ class TranslateResponse(BaseModel):
96
+ text: str
97
+
98
+
99
+ # Unified chat endpoint models
100
+ class UnifiedChatRequest(BaseModel):
101
+ message: str
102
+ message_type: Literal["text", "audio", "image"] = "text"
103
+ user_id: Optional[str] = None
104
+ language: Optional[str] = None
105
+ history: Optional[List[str]] = None
106
+
107
+
108
+ class UnifiedContext(BaseModel):
109
+ similar_cases_found: int
110
+ most_probable_diagnosis: Optional[str] = None
111
+ confidence_level: Literal["high", "medium", "low"]
112
+ advice: str
113
+
114
+
115
+ class UnifiedChatResponse(BaseModel):
116
+ response: str
117
+ context: UnifiedContext
118
+ suggested_actions: List[str]
119
+ language: Literal["fr", "en"]
120
+
app/services/cameroon_data.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import json
5
+ from dataclasses import dataclass
6
+ from typing import Any, Dict, List, Optional, Tuple
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from loguru import logger
11
+ from sklearn.neighbors import NearestNeighbors
12
+ from sentence_transformers import SentenceTransformer
13
+
14
+ from app.utils.config import settings
15
+ from app.utils.helpers import normalize_gender, clean_diagnosis
16
+
17
+
18
+ @dataclass
19
+ class SimilarCase:
20
+ summary_id: str
21
+ diagnosis: Optional[str]
22
+ age: Optional[float]
23
+ gender: Optional[str]
24
+ summary_snippet: str
25
+ similarity_score: float
26
+
27
+
28
+ class CameroonMedicalData:
29
+ """
30
+ Load, clean, analyze and search medical summaries specialized for the Cameroonian context.
31
+ Designed for ~45k rows. Caches embeddings and lightweight stats.
32
+ """
33
+
34
+ def __init__(self, csv_path: Optional[str] = None):
35
+ self.csv_path = csv_path or settings.CAMEROON_DATA_CSV
36
+ if not self.csv_path or not os.path.exists(self.csv_path):
37
+ logger.warning("CameroonMedicalData: CSV path missing or not found. Set CAMEROON_DATA_CSV in .env")
38
+ self.df = pd.DataFrame()
39
+ else:
40
+ self.df = self._load_csv(self.csv_path, settings.CAMEROON_MAX_ROWS)
41
+ self._cleaned: bool = False
42
+ self._model: Optional[SentenceTransformer] = None
43
+ self._embeddings: Optional[np.ndarray] = None
44
+ self._nn: Optional[NearestNeighbors] = None
45
+ self._cache_dir = settings.CAMEROON_CACHE_DIR
46
+ os.makedirs(self._cache_dir, exist_ok=True)
47
+
48
+ # ----------------------- Data Loading & Cleaning -----------------------
49
+ def _load_csv(self, path: str, limit: Optional[int]) -> pd.DataFrame:
50
+ df = pd.read_csv(path)
51
+ if limit and limit > 0:
52
+ df = df.head(limit)
53
+ return df
54
+
55
+ def clean(self) -> None:
56
+ if self.df.empty:
57
+ self._cleaned = True
58
+ return
59
+
60
+ df = self.df.copy()
61
+
62
+ # Standardize column names
63
+ expected_cols = [
64
+ "summary_id","patient_id","patient_age","patient_gender","diagnosis",
65
+ "body_temp_c","blood_pressure_systolic","heart_rate","summary_text","date_recorded"
66
+ ]
67
+ missing = [c for c in expected_cols if c not in df.columns]
68
+ if missing:
69
+ raise ValueError(f"Missing required columns: {missing}")
70
+
71
+ # Parse dates
72
+ df["date_recorded"] = pd.to_datetime(df["date_recorded"], errors="coerce")
73
+
74
+ # Handle missing values
75
+ df["patient_gender"] = df["patient_gender"].fillna("")
76
+ df["diagnosis"] = df["diagnosis"].fillna("")
77
+ df["summary_text"] = df["summary_text"].fillna("")
78
+
79
+ # Normalize gender and diagnosis
80
+ df["patient_gender_norm"] = df["patient_gender"].apply(lambda v: normalize_gender(str(v)))
81
+ df["diagnosis_norm"] = df["diagnosis"].apply(lambda v: clean_diagnosis(str(v)))
82
+
83
+ # Coerce numeric vitals
84
+ for col in ["patient_age","body_temp_c","blood_pressure_systolic","heart_rate"]:
85
+ df[col] = pd.to_numeric(df[col], errors="coerce")
86
+
87
+ # Drop rows with no summary text and no diagnosis
88
+ df = df[~((df["summary_text"].str.len() == 0) & (df["diagnosis_norm"].isna()))]
89
+
90
+ self.df = df.reset_index(drop=True)
91
+ self._cleaned = True
92
+
93
+ # ----------------------------- Statistics -----------------------------
94
+ def stats_overview(self) -> Dict[str, Any]:
95
+ if not self._cleaned:
96
+ self.clean()
97
+ if self.df.empty:
98
+ return {"total_rows": 0}
99
+
100
+ df = self.df
101
+ top_diagnoses = (
102
+ df["diagnosis_norm"].value_counts(dropna=True).head(20).dropna().to_dict()
103
+ )
104
+ age_desc = df["patient_age"].describe().fillna(0).to_dict()
105
+
106
+ return {
107
+ "total_rows": int(len(df)),
108
+ "top_diagnoses": top_diagnoses,
109
+ "age_stats": age_desc,
110
+ "gender_distribution": df["patient_gender_norm"].value_counts(dropna=True).to_dict(),
111
+ }
112
+
113
+ def stats_disease(self, disease_name: str) -> Dict[str, Any]:
114
+ if not self._cleaned:
115
+ self.clean()
116
+ if self.df.empty:
117
+ return {"disease": disease_name, "total_cases": 0}
118
+
119
+ df = self.df
120
+ mask = df["diagnosis_norm"] == disease_name.lower()
121
+ subset = df[mask]
122
+ total = int(len(subset))
123
+
124
+ # Age buckets
125
+ bins = [-1, 18, 35, 60, 200]
126
+ labels = ["0-18", "19-35", "36-60", "60+"]
127
+ ages = pd.cut(subset["patient_age"], bins=bins, labels=labels)
128
+ age_dist = ages.value_counts().reindex(labels, fill_value=0).to_dict()
129
+
130
+ gender_dist = subset["patient_gender_norm"].value_counts().to_dict()
131
+
132
+ # Common symptom terms (very simple proxy: frequent tokens in summary_text)
133
+ common_symptoms = self._extract_common_terms(subset["summary_text"].tolist(), top_k=15)
134
+
135
+ return {
136
+ "disease": disease_name,
137
+ "total_cases": total,
138
+ "age_distribution": age_dist,
139
+ "gender_distribution": gender_dist,
140
+ "common_symptoms": common_symptoms,
141
+ }
142
+
143
+ def seasonal_patterns(self) -> Dict[str, int]:
144
+ if not self._cleaned:
145
+ self.clean()
146
+ if self.df.empty:
147
+ return {}
148
+ df = self.df.dropna(subset=["date_recorded"]).copy()
149
+ df["month"] = df["date_recorded"].dt.month
150
+ counts = df["month"].value_counts().sort_index()
151
+ # map month numbers to english lowercase names for consistency
152
+ months = ["january","february","march","april","may","june","july","august","september","october","november","december"]
153
+ return {months[i-1]: int(counts.get(i, 0)) for i in range(1, 13)}
154
+
155
+ def age_gender_distribution(self) -> Dict[str, Any]:
156
+ if not self._cleaned:
157
+ self.clean()
158
+ if self.df.empty:
159
+ return {"age_buckets": {}, "gender_distribution": {}}
160
+
161
+ df = self.df
162
+ bins = [-1, 18, 35, 60, 200]
163
+ labels = ["0-18", "19-35", "36-60", "60+"]
164
+ ages = pd.cut(df["patient_age"], bins=bins, labels=labels)
165
+ age_dist = ages.value_counts().reindex(labels, fill_value=0).to_dict()
166
+ gender_dist = df["patient_gender_norm"].value_counts().to_dict()
167
+ return {"age_buckets": age_dist, "gender_distribution": gender_dist}
168
+
169
+ # --------------------------- Semantic Similarity ---------------------------
170
+ def _ensure_embeddings(self) -> None:
171
+ if self._embeddings is not None and self._nn is not None:
172
+ return
173
+ if not self._cleaned:
174
+ self.clean()
175
+ if self.df.empty:
176
+ self._embeddings = np.zeros((0, 384), dtype=np.float32)
177
+ self._nn = None
178
+ return
179
+
180
+ # Load model lazily
181
+ if self._model is None:
182
+ model_name = settings.CAMEROON_EMBEDDINGS_MODEL
183
+ logger.info(f"Loading sentence-transformers model: {model_name}")
184
+ self._model = SentenceTransformer(model_name)
185
+
186
+ cache_file = os.path.join(self._cache_dir, "embeddings.npy")
187
+ if os.path.exists(cache_file):
188
+ try:
189
+ self._embeddings = np.load(cache_file)
190
+ except Exception:
191
+ self._embeddings = None
192
+
193
+ if self._embeddings is None or len(self._embeddings) != len(self.df):
194
+ texts = self.df["summary_text"].astype(str).tolist()
195
+ self._embeddings = self._model.encode(texts, batch_size=64, show_progress_bar=False, normalize_embeddings=True)
196
+ np.save(cache_file, self._embeddings)
197
+
198
+ # Build NN index
199
+ self._nn = NearestNeighbors(n_neighbors=10, metric="cosine")
200
+ self._nn.fit(self._embeddings)
201
+
202
+ def search_similar_cases(self, query_text: str, top_k: int = 10) -> List[SimilarCase]:
203
+ if not query_text or query_text.strip() == "":
204
+ return []
205
+ self._ensure_embeddings()
206
+ if self._model is None or self._nn is None or self._embeddings is None or self.df.empty:
207
+ return []
208
+
209
+ q = self._model.encode([query_text], normalize_embeddings=True)
210
+ distances, indices = self._nn.kneighbors(q, n_neighbors=min(top_k, len(self.df)))
211
+ distances = distances[0]
212
+ indices = indices[0]
213
+
214
+ results: List[SimilarCase] = []
215
+ for dist, idx in zip(distances, indices):
216
+ row = self.df.iloc[int(idx)]
217
+ # similarity = 1 - cosine distance
218
+ sim = float(1.0 - dist)
219
+ snippet = str(row.get("summary_text", ""))[:140] + ("..." if len(str(row.get("summary_text", ""))) > 140 else "")
220
+ results.append(SimilarCase(
221
+ summary_id=str(row.get("summary_id", "")),
222
+ diagnosis=row.get("diagnosis_norm"),
223
+ age=float(row.get("patient_age")) if pd.notna(row.get("patient_age")) else None,
224
+ gender=row.get("patient_gender_norm"),
225
+ summary_snippet=snippet,
226
+ similarity_score=sim,
227
+ ))
228
+ return results
229
+
230
+ # ----------------------------- Utils -----------------------------
231
+ def _extract_common_terms(self, texts: List[str], top_k: int = 20) -> List[str]:
232
+ # Very naive bag-of-words; in production consider medical entity extraction.
233
+ from collections import Counter
234
+ tokens: List[str] = []
235
+ for t in texts:
236
+ for w in str(t).lower().replace(",", " ").replace(".", " ").split():
237
+ if len(w) >= 3 and w.isalpha():
238
+ tokens.append(w)
239
+ return [w for w, _ in Counter(tokens).most_common(top_k)]
240
+
241
+
242
+ # Singleton accessor
243
+ _singleton: Optional[CameroonMedicalData] = None
244
+
245
+
246
+ def get_cameroon_data() -> CameroonMedicalData:
247
+ global _singleton
248
+ if _singleton is None:
249
+ _singleton = CameroonMedicalData()
250
+ return _singleton
251
+
252
+
app/utils/config.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+ from pydantic import Field, field_validator, ConfigDict
3
+ from typing import List, Any, Optional, Dict
4
+ import os
5
+ import logging
6
+ import sys
7
+ from pathlib import Path
8
+ from dotenv import load_dotenv
9
+
10
+ # Load environment variables from .env file
11
+ load_dotenv()
12
+
13
+ def setup_logging(log_level: str = "INFO", log_file: str = None) -> None:
14
+ """Configure logging for the application."""
15
+ log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
16
+
17
+ # Configure root logger
18
+ logging.basicConfig(
19
+ level=log_level,
20
+ format=log_format,
21
+ handlers=[logging.StreamHandler(sys.stdout)]
22
+ )
23
+
24
+ # Add file handler if log file is specified
25
+ if log_file:
26
+ log_file = Path(log_file)
27
+ log_file.parent.mkdir(parents=True, exist_ok=True)
28
+ file_handler = logging.FileHandler(log_file)
29
+ file_handler.setFormatter(logging.Formatter(log_format))
30
+ logging.getLogger().addHandler(file_handler)
31
+
32
+ # Set log level for specific loggers
33
+ logging.getLogger("httpx").setLevel(logging.WARNING)
34
+ logging.getLogger("openai").setLevel(logging.WARNING)
35
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
36
+
37
+
38
+ class Settings(BaseSettings):
39
+ model_config = ConfigDict(
40
+ env_file=".env",
41
+ env_file_encoding="utf-8",
42
+ extra="allow",
43
+ env_nested_delimiter="__"
44
+ )
45
+
46
+ # App configuration
47
+ APP_NAME: str = Field(default="Carehelp")
48
+ ENVIRONMENT: str = Field(default="development")
49
+
50
+ # Logging configuration
51
+ LOG_LEVEL: str = Field(default="INFO")
52
+ LOG_FILE: Optional[str] = Field(default=None)
53
+
54
+ def configure_logging(self) -> None:
55
+ """Configure logging based on settings."""
56
+ setup_logging(
57
+ log_level=self.LOG_LEVEL,
58
+ log_file=self.LOG_FILE
59
+ )
60
+ PORT: int = Field(default=8000)
61
+
62
+ # CORS configuration
63
+ CORS_ALLOW_ORIGINS: List[str] = Field(default_factory=lambda: ["*"])
64
+
65
+ # API Keys
66
+ GOOGLE_MAPS_API_KEY: str | None = None
67
+ SERPAPI_API_KEY: str | None = None
68
+
69
+ # Supabase configuration
70
+ SUPABASE_URL: str | None = None
71
+ SUPABASE_ANON_KEY: str | None = None
72
+ SUPABASE_SERVICE_ROLE_KEY: str | None = None
73
+ SUPABASE_DB_PASSWORD: str | None = None
74
+
75
+ # OpenAI configuration
76
+ OPENAI_API_KEY: str | None = None
77
+ OPENAI_MODEL: str = Field(default="gpt-4")
78
+ OPENAI_WHISPER_MODEL: str = Field(default="whisper-1")
79
+ OPENAI_WHISPER_FALLBACK_MODEL: str | None = Field(default="gpt-4-transcribe")
80
+ OPENAI_WHISPER_MAX_CHUNK_SECS: int = Field(default=120)
81
+
82
+ # JWT configuration
83
+ JWT_SECRET: str = Field(default="change_this_secret")
84
+ JWT_ALGORITHM: str = Field(default="HS256")
85
+ ACCESS_TOKEN_EXPIRE_MINUTES: int = Field(default=60 * 24 * 30)
86
+
87
+ # Hugging Face configuration
88
+ HF_API_TOKEN: str | None = None
89
+ HF_TRANSLATION_MODEL: str = Field(default="facebook/nllb-200-distilled-600M")
90
+
91
+ # Provider selection and models
92
+ # Options: 'hf' (Hugging Face Inference), 'ollama' (local), 'lmstudio' (local OpenAI-compatible)
93
+ AI_PROVIDER: str = Field(default="hf")
94
+
95
+ # Text generation (chat) models
96
+ HF_TEXT_MODEL: str = Field(default="meta-llama/Meta-Llama-3-8B-Instruct")
97
+ OLLAMA_MODEL: str = Field(default="llama3.1:8b")
98
+ OLLAMA_BASE_URL: str = Field(default="http://localhost:11434")
99
+ LMSTUDIO_MODEL: str | None = Field(default=None)
100
+ LMSTUDIO_BASE_URL: str = Field(default="http://localhost:1234/v1")
101
+
102
+ # Vision caption model (used to assist image analysis with HF)
103
+ HF_VISION_CAPTION_MODEL: str = Field(default="Salesforce/blip-image-captioning-large")
104
+ # Optional local vision model for Ollama (e.g., 'llava:latest')
105
+ OLLAMA_VISION_MODEL: str = Field(default="llava:latest")
106
+
107
+ # Automatic Speech Recognition (ASR)
108
+ HF_ASR_MODEL: str = Field(default="distil-whisper/distil-large-v3")
109
+
110
+ # Data paths
111
+ PATIENT_DATA_PATH: str = Field(default="../patient_records.json")
112
+
113
+ # Cameroon data configuration
114
+ CAMEROON_DATA_CSV: str = Field(default="../../clinical_summaries.csv")
115
+ CAMEROON_EMBEDDINGS_MODEL: str = Field(
116
+ default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
117
+ )
118
+ CAMEROON_CACHE_DIR: str = Field(default=".cache_cameroon")
119
+ CAMEROON_MAX_ROWS: int | None = None # limit for dev/testing
120
+
121
+ @field_validator("CORS_ALLOW_ORIGINS", mode="before")
122
+ @classmethod
123
+ def parse_cors_origins(cls, v: Any) -> Any:
124
+ # Accept JSON array, comma-separated string, or single "*"
125
+ if isinstance(v, list):
126
+ return v
127
+ if isinstance(v, str):
128
+ s = v.strip()
129
+ if s == "":
130
+ return ["*"]
131
+ if s == "*":
132
+ return ["*"]
133
+ # comma separated
134
+ return [x.strip() for x in s.split(",") if x.strip()]
135
+ return v
136
+
137
+
138
+ settings = Settings()
139
+
app/utils/helpers.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cachetools import TTLCache
2
+ from typing import Any, Optional
3
+
4
+
5
+ cache = TTLCache(maxsize=512, ttl=60 * 10)
6
+
7
+
8
+ def cache_get(key: str) -> Any | None:
9
+ return cache.get(key)
10
+
11
+
12
+ def cache_set(key: str, value: Any) -> None:
13
+ cache[key] = value
14
+
15
+
16
+ def medical_disclaimer(language: str) -> str:
17
+ if language.lower().startswith("fr"):
18
+ return (
19
+ "Avertissement: Je ne suis pas un médecin. Les conseils fournis par l'IA peuvent contenir des erreurs. "
20
+ "En cas de symptômes graves, consultez un professionnel de santé ou appelez les services d'urgence."
21
+ )
22
+ return (
23
+ "Disclaimer: I am not a medical professional. AI advice can be inaccurate. "
24
+ "For serious symptoms, consult a healthcare professional or emergency services."
25
+ )
26
+
27
+
28
+ def emergency_triage(text: str) -> bool:
29
+ signals = ["chest pain", "poitrine", "hemorrag", "fainting", "inconscient", "stroke", "AVC", "difficulty breathing", "respire"]
30
+ lower = text.lower()
31
+ return any(s in lower for s in signals)
32
+
33
+
34
+ def normalize_gender(value: str) -> Optional[str]:
35
+ if value is None:
36
+ return None
37
+ v = value.strip().lower()
38
+ mapping = {
39
+ "m": "male", "male": "male", "man": "male", "masculin": "male", "homme": "male",
40
+ "f": "female", "female": "female", "woman": "female", "feminin": "female", "femme": "female"
41
+ }
42
+ return mapping.get(v, None)
43
+
44
+
45
+ def clean_diagnosis(value: str) -> Optional[str]:
46
+ if not value:
47
+ return None
48
+ v = value.strip().nlower() if hasattr(value, 'nlower') else value.strip().lower()
49
+ return v
50
+
app/utils/security.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, timedelta, timezone
2
+ from typing import Optional
3
+
4
+ import jwt
5
+ from passlib.context import CryptContext
6
+ from fastapi import Depends, HTTPException, status
7
+ from fastapi.security import OAuth2PasswordBearer
8
+
9
+ from typing import Any
10
+
11
+
12
+ pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
13
+ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/users/login")
14
+
15
+
16
+ def hash_password(password: str) -> str:
17
+ return pwd_context.hash(password)
18
+
19
+
20
+ def verify_password(password: str, hashed: str) -> bool:
21
+ return pwd_context.verify(password, hashed)
22
+
23
+
24
+ def create_access_token(data: dict, secret: str, algorithm: str, expires_minutes: int) -> str:
25
+ to_encode = data.copy()
26
+ expire = datetime.now(timezone.utc) + timedelta(minutes=expires_minutes)
27
+ to_encode.update({"exp": expire})
28
+ return jwt.encode(to_encode, secret, algorithm=algorithm)
29
+
30
+
31
+ def decode_token(token: str, secret: str, algorithms: list[str]) -> dict:
32
+ return jwt.decode(token, secret, algorithms=algorithms)
33
+
34
+
35
+ async def get_current_user_optional(token: Optional[str] = Depends(oauth2_scheme)):
36
+ if not token:
37
+ return None
38
+ try:
39
+ from app.utils.config import settings
40
+ payload = decode_token(token, settings.JWT_SECRET, [settings.JWT_ALGORITHM])
41
+ # supabase auth uses UUID subject if you later switch to Supabase JWTs
42
+ return payload.get("sub")
43
+ except Exception:
44
+ return None
45
+
46
+
47
+ async def get_current_user(token: str = Depends(oauth2_scheme)):
48
+ from app.utils.config import settings
49
+ try:
50
+ payload = decode_token(token, settings.JWT_SECRET, [settings.JWT_ALGORITHM])
51
+ user_id = payload.get("sub") # string UUID or int
52
+ if user_id is None:
53
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Token invalide")
54
+ return {"id": user_id} # Minimal user placeholder until Supabase integration
55
+ except jwt.ExpiredSignatureError:
56
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Token expiré")
57
+ except Exception:
58
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Token invalide")
59
+
clinical_summaries.csv ADDED
The diff for this file is too large to render. See raw diff
 
debug_main.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import asyncio
3
+ from fastapi import FastAPI, Request
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from fastapi.responses import JSONResponse
6
+ import uvicorn
7
+
8
+ # Configure logging
9
+ logging.basicConfig(
10
+ level=logging.INFO,
11
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
12
+ )
13
+ logger = logging.getLogger(__name__)
14
+
15
+ app = FastAPI()
16
+
17
+ # Add CORS middleware
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_credentials=True,
22
+ allow_methods=["*"],
23
+ allow_headers=["*"],
24
+ )
25
+
26
+ # Simple health check endpoint
27
+ @app.get("/health")
28
+ async def healthcheck():
29
+ return {"status": "ok"}
30
+
31
+ # Test endpoint that doesn't use any external services
32
+ @app.get("/test")
33
+ async def test_endpoint():
34
+ return {"message": "Test endpoint working"}
35
+
36
+ # Test endpoint that simulates a delay
37
+ @app.get("/test-delay")
38
+ async def test_delay(seconds: float = 2.0):
39
+ await asyncio.sleep(seconds)
40
+ return {"message": f"Delayed response after {seconds} seconds"}
41
+
42
+ if __name__ == "__main__":
43
+ uvicorn.run(
44
+ "debug_main:app",
45
+ host="0.0.0.0",
46
+ port=8000,
47
+ reload=True,
48
+ log_level="info"
49
+ )
main.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from fastapi import FastAPI, Request, Depends
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import JSONResponse
5
+ from starlette import status
6
+
7
+ from app.utils.config import settings
8
+ from app.utils.security import get_current_user_optional
9
+ from app.routers import users, chat, audio, images
10
+ from app.routers import cameroon_data
11
+ from app.routers import ai
12
+
13
+
14
+ def create_app() -> FastAPI:
15
+ # Configure logging
16
+ settings.configure_logging()
17
+ logger = logging.getLogger(__name__)
18
+
19
+ app = FastAPI(
20
+ title="Carehelp API",
21
+ description="Backend IA médical pour le Cameroun (Carehelp)",
22
+ version="1.0.0",
23
+ contact={
24
+ "name": "Carehelp",
25
+ },
26
+ )
27
+
28
+ app.add_middleware(
29
+ CORSMiddleware,
30
+ allow_origins=settings.CORS_ALLOW_ORIGINS,
31
+ allow_credentials=True,
32
+ allow_methods=["*"]
33
+ ,
34
+ allow_headers=["*"]
35
+ ,
36
+ )
37
+
38
+ # Routers
39
+ app.include_router(users.router, prefix="/api", tags=["users"])
40
+ app.include_router(chat.router, prefix="/api", tags=["chat"])
41
+ app.include_router(audio.router, prefix="/api", tags=["audio"])
42
+ app.include_router(images.router, prefix="/api", tags=["images"])
43
+ app.include_router(cameroon_data.router, prefix="/api/cameroon-data", tags=["cameroon-data"])
44
+ app.include_router(ai.router, prefix="/api", tags=["ai"])
45
+
46
+ # Log all registered routes for debugging
47
+ try:
48
+ route_paths = sorted([getattr(r, 'path', str(r)) for r in app.routes])
49
+ logging.getLogger(__name__).info(f"Registered routes: {route_paths}")
50
+ except Exception as e:
51
+ logging.getLogger(__name__).error(f"Failed to list routes: {e}")
52
+
53
+ @app.get("/health")
54
+ def healthcheck():
55
+ return {"status": "ok"}
56
+
57
+ # Debug: return list of routes
58
+ @app.get("/routes")
59
+ def list_routes():
60
+ return {"routes": [
61
+ {
62
+ "path": getattr(r, 'path', str(r)),
63
+ "name": getattr(r, 'name', None),
64
+ "methods": list(getattr(r, 'methods', []) or [])
65
+ }
66
+ for r in app.routes
67
+ ]}
68
+
69
+ # No database initialization; Supabase will manage schema
70
+
71
+ @app.post("/gateway")
72
+ async def gateway(request: Request, current_user=Depends(get_current_user_optional)):
73
+ """
74
+ Endpoint unique pour le frontend.
75
+ - Mode actions: {"action": "chat|transcribe|analyze-image|analyze-multimodal|translate", "payload": {...}}
76
+ - Mode unifié (recommandé): {"payload": {text?, image?, images?, audio?, audios?, files?, file_names?, location?, want_stats?, agent_mode?}}
77
+ Si "action" est omis ou vaut l'un de {"auto", "unified", "ai"}, la requête est routée automatiquement selon les champs fournis.
78
+ """
79
+ body = await request.json()
80
+ action = body.get("action")
81
+ payload = body.get("payload", {})
82
+
83
+ # Mode unifié: pas d'action explicitement fournie ou action spéciale
84
+ if not action or str(action).lower() in {"auto", "unified", "ai"}:
85
+ try:
86
+ req = ai.AIRequest(
87
+ text=payload.get("text"),
88
+ image=payload.get("image"),
89
+ images=payload.get("images"),
90
+ audio=payload.get("audio"),
91
+ audios=payload.get("audios"),
92
+ want_stats=payload.get("want_stats", False),
93
+ location=payload.get("location"),
94
+ files=payload.get("files"),
95
+ file_names=payload.get("file_names"),
96
+ agent_mode=payload.get("agent_mode"),
97
+ )
98
+ except Exception as e:
99
+ return JSONResponse(status_code=status.HTTP_400_BAD_REQUEST, content={"detail": f"Payload invalide pour le mode unifié: {e}"})
100
+ # Déléguer au point d'entrée IA unifié (gère texte, image(s), audio(s), fichiers)
101
+ return await ai.ai_endpoint(req)
102
+
103
+ # Proxifier vers les endpoints internes
104
+ if action == "chat":
105
+ return await chat.handle_chat_via_gateway(payload, current_user)
106
+ if action == "transcribe":
107
+ return await audio.handle_transcription_via_gateway(payload, current_user)
108
+ if action == "analyze-image":
109
+ return await images.handle_analyze_image_via_gateway(payload, current_user)
110
+ if action == "analyze-multimodal":
111
+ return await images.handle_analyze_multimodal_via_gateway(payload, current_user)
112
+ if action == "translate":
113
+ return await chat.handle_translate_via_gateway(payload, current_user)
114
+
115
+ return JSONResponse(
116
+ status_code=status.HTTP_400_BAD_REQUEST,
117
+ content={"detail": "Action inconnue"},
118
+ )
119
+
120
+ return app
121
+
122
+
123
+ app = create_app()
124
+
125
+ if __name__ == "__main__":
126
+ import uvicorn
127
+ uvicorn.run("main:app", host="0.0.0.0", port=int(settings.PORT), reload=True)
128
+
openapi.json ADDED
Binary file (2.81 kB). View file
 
patient_records.json ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "patient_id": "P001",
4
+ "diagnosis": {
5
+ "fr": "Diabète de type 2",
6
+ "en": "Type 2 Diabetes",
7
+ "dua": "Sɔŋgɔ a sukɛlɛ a bibale",
8
+ "bss": "Màtìtì ma sùgàl ma mbìs",
9
+ "ewo": "Mvɔɔ ɔsɔk"
10
+ },
11
+ "medications": [
12
+ {
13
+ "name": {
14
+ "fr": "Metformine 500mg",
15
+ "en": "Metformin 500mg"
16
+ },
17
+ "dosage": "2 fois par jour",
18
+ "side_effects": {
19
+ "fr": "Nausées, diarrhée",
20
+ "en": "Nausea, diarrhea"
21
+ }
22
+ }
23
+ ],
24
+ "care_instructions": {
25
+ "fr": "Contrôle glycémique quotidien. Éviter les sucres rapides et faire 30 minutes d'exercice par jour.",
26
+ "en": "Monitor blood sugar daily. Avoid sugary foods and exercise for 30 minutes daily."
27
+ }
28
+ },
29
+ {
30
+ "patient_id": "P002",
31
+ "diagnosis": {
32
+ "fr": "Paludisme",
33
+ "en": "Malaria",
34
+ "dua": "Sɔŋgɔ a misɔɔ",
35
+ "bss": "Ntɔɔ̀",
36
+ "ewo": "Mvɔɔ ɔsɔŋ"
37
+ },
38
+ "medications": [
39
+ {
40
+ "name": {
41
+ "fr": "Artésunate 100mg",
42
+ "en": "Artesunate 100mg"
43
+ },
44
+ "dosage": "1 fois par jour pendant 3 jours",
45
+ "side_effects": {
46
+ "fr": "Maux de tête, vertiges",
47
+ "en": "Headache, dizziness"
48
+ }
49
+ }
50
+ ],
51
+ "care_instructions": {
52
+ "fr": "Boire beaucoup d'eau, se reposer. Utiliser une moustiquaire imprégnée.",
53
+ "en": "Drink plenty of water, rest. Sleep under an insecticide-treated mosquito net."
54
+ }
55
+ },
56
+ {
57
+ "patient_id": "P006",
58
+ "diagnosis": {
59
+ "fr": "VIH (Stade 1)",
60
+ "en": "HIV (Stage 1)",
61
+ "dua": "VIH (Búnu 1)",
62
+ "bss": "VIH (Lilɔŋ 1)",
63
+ "ewo": "VIH (Awono 1)"
64
+ },
65
+ "medications": [
66
+ {
67
+ "name": {
68
+ "fr": "Ténofovir + Lamivudine + Dolutégravir (TLD)",
69
+ "en": "Tenofovir + Lamivudine + Dolutegravir (TLD)"
70
+ },
71
+ "dosage": "1 comprimé/jour à vie",
72
+ "side_effects": {
73
+ "fr": "Vertiges (premières semaines), rash cutané",
74
+ "en": "Dizziness (first weeks), skin rash"
75
+ }
76
+ }
77
+ ],
78
+ "care_instructions": {
79
+ "fr": "Prise strictement quotidienne. Utilisation obligatoire de préservatifs pour protéger vos partenaires.",
80
+ "en": "Strict daily intake. Mandatory condom use to protect partners."
81
+ },
82
+ "emergency_scenarios": [
83
+ {
84
+ "trigger": {
85
+ "fr": "Oubli de dose >24h",
86
+ "en": "Missed dose >24h"
87
+ },
88
+ "action_steps": {
89
+ "fr": "1. Prenez la dose oubliée immédiatement. 2. Ne doublez jamais la dose suivante. 3. Contactez votre centre ARV pour informer le personnel.",
90
+ "en": "1. Take the missed dose immediately. 2. Never double the next dose. 3. Contact your ARV center to inform the staff."
91
+ }
92
+ }
93
+ ]
94
+ },
95
+ {
96
+ "context_id": "EDU001",
97
+ "context_type": "prevention",
98
+ "topic": {
99
+ "fr": "Hygiène des mains",
100
+ "en": "Hand Hygiene",
101
+ "dua": "Sɔbɔlɔ a mɛ́nyɔ",
102
+ "bss": "Sùgùsɛ̀l mi mbɔk",
103
+ "ewo": "Mvɔɔ ɔbɔɔ"
104
+ },
105
+ "educational_content": {
106
+ "fr": "Le lavage des mains avec du savon réduit de 50% les diarrhées et infections respiratoires. Lavez-les avant de manger, après les toilettes, et en rentrant à la maison.",
107
+ "en": "Washing hands with soap reduces diarrhea and respiratory infections by 50%. Wash them before eating, after using the toilet, and when returning home."
108
+ },
109
+ "visual_cue": "🫧⏱️",
110
+ "target_group": "Tous publics"
111
+ },
112
+ {
113
+ "context_id": "EDU002",
114
+ "context_type": "prevention",
115
+ "topic": {
116
+ "fr": "Prévention du paludisme",
117
+ "en": "Malaria Prevention",
118
+ "dua": "Bìtɔ́ɔ́ bi misɔɔ",
119
+ "bss": "Bìlɔ̀gɔ̀bì bi ntɔɔ̀",
120
+ "ewo": "Atìŋ ɔmvɔɔ ɔsɔŋ"
121
+ },
122
+ "educational_content": {
123
+ "fr": "Actions clés : 1. Dormir sous moustiquaire imprégnée. 2. Éliminer les eaux stagnantes autour de la maison. 3. Porter des vêtements longs le soir.",
124
+ "en": "Key actions: 1. Sleep under an insecticide-treated net. 2. Eliminate standing water around the house. 3. Wear long clothing in the evening."
125
+ },
126
+ "visual_cue": "🦟🚫",
127
+ "target_group": "Zones endémiques"
128
+ },
129
+ {
130
+ "context_id": "EDU003",
131
+ "context_type": "nutrition",
132
+ "topic": {
133
+ "fr": "Alimentation équilibrée",
134
+ "en": "Balanced Diet",
135
+ "dua": "Bìdì bi wɔ́ɔ́s",
136
+ "bss": "Bìjɛk bìtìì",
137
+ "ewo": "Mvɔɔ ɔjɛ"
138
+ },
139
+ "educational_content": {
140
+ "fr": "Un régime équilibré renforce votre immunité. Mangez des fruits, des légumes et des protéines (poisson, haricots) chaque jour. Limitez le sucre, le sel et les aliments transformés.",
141
+ "en": "A balanced diet boosts your immunity. Eat fruits, vegetables, and proteins (fish, beans) daily. Limit sugar, salt, and processed foods."
142
+ },
143
+ "visual_cue": "🥗🍎",
144
+ "target_group": "Général"
145
+ },
146
+ {
147
+ "context_id": "EMER001",
148
+ "context_type": "emergency_education",
149
+ "scenario": {
150
+ "fr": "Fièvre chez l'enfant de moins de 5 ans",
151
+ "en": "Fever in a child under 5",
152
+ "dua": "Munyɛŋgɛ mwa mwana a ponda 5",
153
+ "bss": "Njòŋgò nì mùn à nnɔ̀k 5",
154
+ "ewo": "Mvɔɔ ɔwondo ɔman"
155
+ },
156
+ "action_steps": {
157
+ "fr": "1. Mesurer la température. 2. Si >38°C, déshabiller l'enfant et lui donner un bain tiède. 3. Donner du paracétamol selon le poids. 4. Si la fièvre persiste ou >40°C, consulter immédiatement un médecin.",
158
+ "en": "1. Measure the temperature. 2. If >38°C, undress the child and give a lukewarm bath. 3. Give paracetamol according to weight. 4. If fever persists or >40°C, see a doctor immediately."
159
+ },
160
+ "visual_cue": "🌡️👶",
161
+ "risk_level": "Modéré"
162
+ },
163
+ {
164
+ "context_id": "EMER002",
165
+ "context_type": "emergency_education",
166
+ "scenario": {
167
+ "fr": "Morsure de serpent",
168
+ "en": "Snake Bite"
169
+ },
170
+ "action_steps": {
171
+ "fr": "1. Restez calme et rassurez la victime. 2. Immobilisez le membre mordu (ne pas le bouger). 3. NE PAS faire de garrot, NE PAS inciser, NE PAS aspirer le venin. 4. Aller à l'hôpital le plus proche IMMÉDIATEMENT.",
172
+ "en": "1. Stay calm and reassure the victim. 2. Immobilize the bitten limb (do not move it). 3. DO NOT use a tourniquet, DO NOT cut, DO NOT suck the venom. 4. Go to the nearest hospital IMMEDIATELY."
173
+ },
174
+ "visual_cue": "🐍➡️🏥",
175
+ "risk_level": "Élevé"
176
+ },
177
+ {
178
+ "patient_id": "P011",
179
+ "diagnosis": {
180
+ "fr": "Tuberculose",
181
+ "en": "Tuberculosis (TB)",
182
+ "dua": "Sɔŋgɔ a Kɔsɛ",
183
+ "bss": "Ntɔɔ̀ nì Kɔ̀s",
184
+ "ewo": "Mvɔɔ ɔkɔs"
185
+ },
186
+ "medications": [
187
+ {
188
+ "name": {
189
+ "fr": "Rifampicine, Isoniazide, Pyrazinamide, Ethambutol",
190
+ "en": "Rifampicin, Isoniazid, Pyrazinamide, Ethambutol"
191
+ },
192
+ "dosage": "Prise quotidienne pendant 6 mois, selon protocole national",
193
+ "side_effects": {
194
+ "fr": "Coloration orange des urines, troubles digestifs, douleurs articulaires",
195
+ "en": "Orange discoloration of urine, digestive issues, joint pain"
196
+ }
197
+ }
198
+ ],
199
+ "care_instructions": {
200
+ "fr": "Suivre le traitement sans interruption est crucial. Couvrez-vous la bouche et le nez lorsque vous toussez. Aérez bien votre logement. Mangez équilibré.",
201
+ "en": "Following the treatment without interruption is crucial. Cover your mouth and nose when coughing. Ventilate your home well. Eat a balanced diet."
202
+ },
203
+ "common_questions": [
204
+ {
205
+ "question": {
206
+ "fr": "La tuberculose se guérit-elle ?",
207
+ "en": "Is tuberculosis curable?"
208
+ },
209
+ "answer": {
210
+ "fr": "Oui, la tuberculose se guérit complètement si le traitement de 6 mois est suivi correctement sans interruption.",
211
+ "en": "Yes, tuberculosis is completely curable if the 6-month treatment is followed correctly without interruption."
212
+ }
213
+ }
214
+ ]
215
+ },
216
+ {
217
+ "patient_id": "P012",
218
+ "diagnosis": {
219
+ "fr": "Gastrite",
220
+ "en": "Gastritis",
221
+ "dua": "Sɔŋgɔ a dibɔ́mɔ",
222
+ "bss": "Ntɔɔ̀ nì libùm",
223
+ "ewo": "Mvɔɔ ɔwom"
224
+ },
225
+ "medications": [
226
+ {
227
+ "name": {
228
+ "fr": "Oméprazole 20mg",
229
+ "en": "Omeprazole 20mg"
230
+ },
231
+ "dosage": "1 comprimé par jour avant le repas",
232
+ "side_effects": {
233
+ "fr": "Maux de tête, diarrhée",
234
+ "en": "Headache, diarrhea"
235
+ }
236
+ }
237
+ ],
238
+ "care_instructions": {
239
+ "fr": "Évitez les aliments épicés, acides, gras, ainsi que l'alcool et le café. Mangez de plus petits repas plus fréquemment. Ne vous allongez pas juste après avoir mangé.",
240
+ "en": "Avoid spicy, acidic, fatty foods, as well as alcohol and coffee. Eat smaller, more frequent meals. Do not lie down immediately after eating."
241
+ },
242
+ "common_questions": [
243
+ {
244
+ "question": {
245
+ "fr": "Qu'est-ce qui cause la gastrite ?",
246
+ "en": "What causes gastritis?"
247
+ },
248
+ "answer": {
249
+ "fr": "La gastrite peut être causée par une infection (Helicobacter pylori), le stress, l'alcool, ou la prise de certains médicaments anti-inflammatoires.",
250
+ "en": "Gastritis can be caused by an infection (Helicobacter pylori), stress, alcohol, or taking certain anti-inflammatory drugs."
251
+ }
252
+ }
253
+ ]
254
+ },
255
+ {
256
+ "patient_id": "P013",
257
+ "diagnosis": {
258
+ "fr": "Infection urinaire",
259
+ "en": "Urinary Tract Infection (UTI)",
260
+ "dua": "Sɔŋgɔ a misɔ́pí",
261
+ "bss": "Ntɔɔ̀ nì màsɔ̀p",
262
+ "ewo": "Mvɔɔ ɔsɔp"
263
+ },
264
+ "medications": [
265
+ {
266
+ "name": {
267
+ "fr": "Ciprofloxacine 500mg",
268
+ "en": "Ciprofloxacin 500mg"
269
+ },
270
+ "dosage": "1 comprimé 2 fois par jour pendant 7 jours",
271
+ "side_effects": {
272
+ "fr": "Nausées, sensibilité au soleil",
273
+ "en": "Nausea, sun sensitivity"
274
+ }
275
+ }
276
+ ],
277
+ "care_instructions": {
278
+ "fr": "Buvez beaucoup d'eau (au moins 2 litres par jour) pour aider à nettoyer les voies urinaires. Urinez fréquemment et ne vous retenez pas. Essuyez-vous d'avant en arrière après être allée aux toilettes.",
279
+ "en": "Drink plenty of water (at least 2 liters per day) to help flush the urinary tract. Urinate frequently and do not hold it in. Wipe from front to back after using the toilet."
280
+ },
281
+ "common_questions": [
282
+ {
283
+ "question": {
284
+ "fr": "Comment prévenir les infections urinaires ?",
285
+ "en": "How to prevent UTIs?"
286
+ },
287
+ "answer": {
288
+ "fr": "Pour prévenir les infections, buvez beaucoup d'eau, urinez après les rapports sexuels, et évitez les produits d'hygiène irritants.",
289
+ "en": "To prevent infections, drink plenty of water, urinate after intercourse, and avoid irritating hygiene products."
290
+ }
291
+ }
292
+ ]
293
+ }
294
+ ]
requirements.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ python-multipart
4
+ pydantic
5
+ pydantic-settings
6
+ pydantic[email]
7
+ passlib[bcrypt]
8
+ PyJWT
9
+ PyPDF2
10
+ httpx
11
+ openai
12
+ python-dotenv
13
+ langchain-openai
14
+ langchain-huggingface
15
+ huggingface-hub
16
+ cachetools
17
+ loguru
18
+ numpy
19
+ pandas
20
+ scikit-learn
21
+ scipy
22
+ sentence-transformers
23
+ rapidfuzz
24
+ librosa
25
+ soundfile
26
+ noisereduce
27
+ langdetect
28
+ requests
29
+ serpapi
30
+ googlemaps
31
+ langchain
32
+ langchain-community
33
+ langchain-core
34
+ langsmith
scripts/local_media_tests.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import requests
5
+ from pathlib import Path
6
+
7
+ API_URL = os.environ.get("API_URL", "http://127.0.0.1:8000")
8
+
9
+ ROOT = Path(__file__).resolve().parents[1]
10
+ IMG1 = ROOT / "image-test1.jpg"
11
+ IMG2 = ROOT / "image-test2.jpg"
12
+ AUDIO1 = ROOT / "test-audio1.wav"
13
+
14
+
15
+ def wait_for_health(timeout=60):
16
+ url = f"{API_URL}/health"
17
+ start = time.time()
18
+ while time.time() - start < timeout:
19
+ try:
20
+ r = requests.get(url, timeout=3)
21
+ if r.status_code == 200:
22
+ return True
23
+ except Exception:
24
+ pass
25
+ time.sleep(1)
26
+ return False
27
+
28
+
29
+ def pretty(obj):
30
+ try:
31
+ return json.dumps(obj, indent=2, ensure_ascii=False)
32
+ except Exception:
33
+ return str(obj)
34
+
35
+
36
+ def post_json(path, payload):
37
+ url = f"{API_URL}{path}"
38
+ r = requests.post(url, json=payload, timeout=180)
39
+ try:
40
+ return r.status_code, r.json()
41
+ except Exception:
42
+ return r.status_code, {"text": r.text}
43
+
44
+
45
+ def post_form(path, data, files):
46
+ url = f"{API_URL}{path}"
47
+ r = requests.post(url, data=data, files=files, timeout=300)
48
+ try:
49
+ return r.status_code, r.json()
50
+ except Exception:
51
+ return r.status_code, {"text": r.text}
52
+
53
+
54
+ def test_unified_text():
55
+ payload = {
56
+ "message": "Patient with rash and fever for 2 days.",
57
+ "message_type": "text",
58
+ }
59
+ return post_json("/api/chat/unified", payload)
60
+
61
+
62
+ def test_unified_image(path: Path):
63
+ payload = {
64
+ "message": str(path), # local path supported by server (converted to data URL)
65
+ "message_type": "image",
66
+ }
67
+ return post_json("/api/chat/unified", payload)
68
+
69
+
70
+ def test_unified_audio(path: Path):
71
+ payload = {
72
+ "message": str(path), # local path supported by server
73
+ "message_type": "audio",
74
+ }
75
+ return post_json("/api/chat/unified", payload)
76
+
77
+
78
+ def test_form(text: str | None = None, images: list[Path] | None = None, audios: list[Path] | None = None):
79
+ data = {}
80
+ if text is not None:
81
+ data["text"] = text
82
+ data["want_stats"] = "true"
83
+ files = []
84
+ for img in images or []:
85
+ mime = "image/jpeg" if img.suffix.lower() in {".jpg", ".jpeg"} else "image/png"
86
+ files.append(("images", (img.name, open(img, "rb"), mime)))
87
+ for au in audios or []:
88
+ files.append(("audios", (au.name, open(au, "rb"), "audio/wav")))
89
+ try:
90
+ return post_form("/api/ai/form", data, files)
91
+ finally:
92
+ # Close any file handles we opened
93
+ for _, (name, fh, _mime) in files:
94
+ try:
95
+ fh.close()
96
+ except Exception:
97
+ pass
98
+
99
+
100
+ def main():
101
+ print(f"API_URL: {API_URL}")
102
+ # Validate media presence
103
+ missing = [p for p in [IMG1, IMG2, AUDIO1] if not p.exists()]
104
+ if missing:
105
+ print("Missing local files:", ", ".join(map(str, missing)))
106
+ return 2
107
+
108
+ print("Waiting for API /health ...")
109
+ if not wait_for_health(timeout=90):
110
+ print("Server not ready within timeout.")
111
+ return 3
112
+
113
+ results = []
114
+
115
+ print("\n=== Unified - TEXT only ===")
116
+ results.append(("unified_text",) + test_unified_text())
117
+
118
+ print("\n=== Unified - IMAGE only (image-test1.jpg) ===")
119
+ results.append(("unified_image_img1",) + test_unified_image(IMG1))
120
+
121
+ print("\n=== Unified - IMAGE only (image-test2.jpg) ===")
122
+ results.append(("unified_image_img2",) + test_unified_image(IMG2))
123
+
124
+ print("\n=== Unified - AUDIO only (test-audio1.wav) ===")
125
+ results.append(("unified_audio_audio1",) + test_unified_audio(AUDIO1))
126
+
127
+ print("\n=== Form - TEXT + IMAGE (img1) ===")
128
+ results.append(("form_text_img1",) + test_form(
129
+ text="Patient with rash and fever for 2 days.", images=[IMG1], audios=[]
130
+ ))
131
+
132
+ print("\n=== Form - TEXT + AUDIO (audio1) ===")
133
+ results.append(("form_text_audio1",) + test_form(
134
+ text="Patient with cough and sore throat.", images=[], audios=[AUDIO1]
135
+ ))
136
+
137
+ print("\n=== Form - TEXT + IMAGES (img1, img2) + AUDIO (audio1) ===")
138
+ results.append(("form_text_imgs_audio",) + test_form(
139
+ text="Patient with rash, fever and mild headache.", images=[IMG1, IMG2], audios=[AUDIO1]
140
+ ))
141
+
142
+ # Print a compact summary at the end
143
+ print("\n==== SUMMARY ====")
144
+ for label, status, payload in results:
145
+ status_str = "OK" if status == 200 else f"ERR({status})"
146
+ print(f"- {label}: {status_str}")
147
+
148
+ # Optionally dump detailed JSON for failures
149
+ failures = [(l, s, p) for (l, s, p) in results if s != 200]
150
+ if failures:
151
+ print("\n==== FAILURES (detailed) ====")
152
+ for label, status, payload in failures:
153
+ print(f"\n## {label} -> HTTP {status}")
154
+ print(pretty(payload))
155
+ else:
156
+ print("\nAll tests returned HTTP 200.")
157
+
158
+ return 0
159
+
160
+
161
+ if __name__ == "__main__":
162
+ raise SystemExit(main())
scripts/run_unified_tests.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import importlib.util
4
+ import subprocess
5
+ import pytest
6
+ from colorama import init, Fore, Style
7
+
8
+ # Initialize colorama for colored output
9
+ init()
10
+
11
+ # Ensure project root is on sys.path
12
+ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
13
+ if PROJECT_ROOT not in sys.path:
14
+ sys.path.insert(0, PROJECT_ROOT)
15
+
16
+ def check_dependencies():
17
+ """Check if all required dependencies are installed"""
18
+ required_packages = ['httpx', 'pytest', 'colorama', 'fastapi']
19
+ missing_packages = []
20
+
21
+ for package in required_packages:
22
+ if importlib.util.find_spec(package) is None:
23
+ missing_packages.append(package)
24
+
25
+ if missing_packages:
26
+ print(f"{Fore.RED}Missing required dependencies: {', '.join(missing_packages)}{Style.RESET_ALL}")
27
+ install = input(f"Would you like to install them now? (y/n): ").lower().strip() == 'y'
28
+
29
+ if install:
30
+ print(f"{Fore.YELLOW}Installing missing packages...{Style.RESET_ALL}")
31
+ subprocess.check_call([sys.executable, "-m", "pip", "install"] + missing_packages)
32
+ print(f"{Fore.GREEN}Dependencies installed successfully!{Style.RESET_ALL}")
33
+ return True
34
+ else:
35
+ print(f"{Fore.RED}Cannot run tests without required dependencies.{Style.RESET_ALL}")
36
+ return False
37
+ return True
38
+
39
+ def main():
40
+ # Check dependencies first
41
+ if not check_dependencies():
42
+ return 1
43
+
44
+ print(f"{Fore.CYAN}===== Running Unified API Tests ====={Style.RESET_ALL}")
45
+
46
+ # Define test patterns to run
47
+ test_patterns = [
48
+ "tests/test_api_endpoints.py::test_unified_chat_text",
49
+ "tests/test_live_ai.py::test_unified_text_live"
50
+ ]
51
+
52
+ # Run each test and collect results
53
+ results = {}
54
+ for pattern in test_patterns:
55
+ print(f"\n{Fore.YELLOW}Running: {pattern}{Style.RESET_ALL}")
56
+ exit_code = pytest.main([pattern, "-v"])
57
+ results[pattern] = exit_code == 0
58
+
59
+ # Print summary
60
+ print(f"\n{Fore.CYAN}===== Test Results Summary ====={Style.RESET_ALL}")
61
+ all_passed = True
62
+ for pattern, passed in results.items():
63
+ status = f"{Fore.GREEN}PASSED{Style.RESET_ALL}" if passed else f"{Fore.RED}FAILED{Style.RESET_ALL}"
64
+ print(f"{pattern}: {status}")
65
+ if not passed:
66
+ all_passed = False
67
+
68
+ # Final status
69
+ if all_passed:
70
+ print(f"\n{Fore.GREEN}All unified tests passed!{Style.RESET_ALL}")
71
+ else:
72
+ print(f"\n{Fore.RED}Some unified tests failed. Check the output above for details.{Style.RESET_ALL}")
73
+
74
+ return 0 if all_passed else 1
75
+
76
+ if __name__ == "__main__":
77
+ sys.exit(main())
scripts/test_ai_form.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import io
3
+
4
+ API_URL = "http://localhost:8000/api/ai/form"
5
+
6
+ text = "Patient with rash and fever for 2 days."
7
+
8
+ # Fetch a small sample image
9
+ img_url = "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png"
10
+ img_bytes = requests.get(img_url, timeout=15).content
11
+
12
+ files = [
13
+ ("images", ("sample.png", io.BytesIO(img_bytes), "image/png")),
14
+ ]
15
+
16
+ data = {
17
+ "text": text,
18
+ "want_stats": "true",
19
+ "location": "Douala, Cameroon",
20
+ }
21
+
22
+ resp = requests.post(API_URL, data=data, files=files)
23
+ print("Status:", resp.status_code)
24
+ try:
25
+ print(resp.json())
26
+ except Exception:
27
+ print(resp.text)
scripts/test_api_ai.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ API_URL = "http://localhost:8000/api/ai"
5
+
6
+ # Test 1 : Texte seul
7
+ payload_text = {
8
+ "text": "Patient présentant une éruption cutanée et de la fièvre depuis 2 jours.",
9
+ "want_stats": True
10
+ }
11
+
12
+ # Test 2 : Image seule
13
+ payload_image = {
14
+ "image": "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png",
15
+ "want_stats": True
16
+ }
17
+
18
+ # Test 3 : Texte + Image
19
+ payload_multimodal = {
20
+ "text": "Patient présentant une éruption cutanée et de la fièvre depuis 2 jours.",
21
+ "image": "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png",
22
+ "want_stats": True
23
+ }
24
+
25
+ for label, payload in [
26
+ ("Texte seul", payload_text),
27
+ ("Image seule", payload_image),
28
+ ("Texte + Image", payload_multimodal)
29
+ ]:
30
+ print(f"\n=== Test : {label} ===")
31
+ response = requests.post(API_URL, json=payload)
32
+ print("Status:", response.status_code)
33
+ try:
34
+ print(json.dumps(response.json(), indent=2, ensure_ascii=False))
35
+ except Exception:
36
+ print(response.text)
scripts/test_multimodal.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ API_URL = "http://localhost:8000/gateway"
5
+
6
+ payload = {
7
+ "action": "analyze-multimodal",
8
+ "payload": {
9
+ "image_url": "https://upload.wikimedia.org/wikipedia/commons/8/89/Portrait_Placeholder.png",
10
+ "text": "Patient présentant une éruption cutanée et de la fièvre depuis 2 jours."
11
+ }
12
+ }
13
+
14
+ response = requests.post(API_URL, json=payload)
15
+ print("Status:", response.status_code)
16
+ try:
17
+ print(json.dumps(response.json(), indent=2, ensure_ascii=False))
18
+ except Exception:
19
+ print(response.text)
scripts/try_unified.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ import sys
5
+
6
+ import requests
7
+
8
+
9
+ def call_unified(api_url: str, message: str, message_type: str, language: str | None, history: list[str] | None = None) -> dict:
10
+ payload = {
11
+ "message": message,
12
+ "message_type": message_type,
13
+ }
14
+ if language:
15
+ payload["language"] = language
16
+ if history:
17
+ payload["history"] = history
18
+ r = requests.post(f"{api_url}/api/chat/unified", json=payload, timeout=120)
19
+ try:
20
+ data = r.json()
21
+ except Exception:
22
+ data = {"status_code": r.status_code, "text": r.text}
23
+ return {"status_code": r.status_code, "data": data}
24
+
25
+
26
+ def pretty(obj: dict) -> str:
27
+ return json.dumps(obj, indent=2, ensure_ascii=False)
28
+
29
+
30
+ def interactive_mode(api_url: str, language: str | None) -> int:
31
+ print("Interactive unified chat tester. Type 'quit' to exit.\n")
32
+ history: list[str] = []
33
+ while True:
34
+ t = input("Type (text/audio/image) [text]: ").strip().lower() or "text"
35
+ if t not in {"text", "audio", "image"}:
36
+ print("Invalid type. Use text/audio/image.")
37
+ continue
38
+ msg = input("Message (text or URL): ").strip()
39
+ if msg.lower() in {"quit", "exit"}:
40
+ return 0
41
+ res = call_unified(api_url, msg, t, language, history=history[-6:])
42
+ print(pretty(res))
43
+ if res.get("status_code") == 200 and isinstance(res.get("data"), dict):
44
+ # Append last user message to history for context
45
+ history.append(msg)
46
+
47
+
48
+ def main() -> int:
49
+ parser = argparse.ArgumentParser(description="Try the unified AI endpoint with text/image/audio")
50
+ parser.add_argument("--api", default=os.environ.get("API_URL", "http://127.0.0.1:8000"), help="Base API URL")
51
+ parser.add_argument("--message", help="Text or URL to test")
52
+ parser.add_argument("--type", dest="type_", default="text", choices=["text", "audio", "image"], help="Message type")
53
+ parser.add_argument("--language", help="Optional language for response (auto-detected if omitted)")
54
+ parser.add_argument("--history", nargs="*", help="Optional prior messages (space-separated)")
55
+ parser.add_argument("--interactive", action="store_true", help="Interactive prompt mode")
56
+ args = parser.parse_args()
57
+
58
+ if args.interactive:
59
+ return interactive_mode(args.api, args.language) or 0
60
+
61
+ if not args.message:
62
+ print("--message is required when not in --interactive mode", file=sys.stderr)
63
+ return 2
64
+
65
+ result = call_unified(args.api, args.message, args.type_, args.language, history=args.history)
66
+ print(pretty(result))
67
+ return 0
68
+
69
+
70
+ if __name__ == "__main__":
71
+ raise SystemExit(main())
72
+
73
+
tests/conftest.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ # Ensure project root is on sys.path so 'app' and 'main' can be imported
5
+ PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
6
+ if PROJECT_ROOT not in sys.path:
7
+ sys.path.insert(0, PROJECT_ROOT)
8
+
9
+
tests/test_ai_services.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest.mock import patch, MagicMock
2
+ from app.ai_services import chat_completion, analyze_image, translate_text, transcribe_audio
3
+
4
+
5
+ @patch("app.ai_services.openai_client")
6
+ def test_chat_completion(mock_client):
7
+ mock = MagicMock()
8
+ mock.chat.completions.create.return_value = MagicMock(choices=[MagicMock(message=MagicMock(content="ok"))])
9
+ mock_client.return_value = mock
10
+ out = chat_completion([{"role": "user", "content": "hello"}], "en")
11
+ assert out == "ok"
12
+
13
+
14
+ @patch("app.ai_services.openai_client")
15
+ def test_analyze_image(mock_client):
16
+ mock = MagicMock()
17
+ mock.chat.completions.create.return_value = MagicMock(choices=[MagicMock(message=MagicMock(content="image ok"))])
18
+ mock_client.return_value = mock
19
+ out = analyze_image("./image-test2.jpg", None)
20
+ assert "image ok" in out
21
+
22
+
23
+ @patch("app.ai_services.openai_client")
24
+ def test_translate_text_fallback_openai(mock_client):
25
+ mock = MagicMock()
26
+ mock.chat.completions.create.return_value = MagicMock(choices=[MagicMock(message=MagicMock(content="bonjour"))])
27
+ mock_client.return_value = mock
28
+ out = translate_text("hello", "fr")
29
+ assert out == "bonjour"
30
+
31
+
32
+ @patch("app.ai_services.openai_client")
33
+ @patch("httpx.Client.get")
34
+ def test_transcribe_audio(mock_get, mock_client):
35
+ # Mock audio download
36
+ mock_response = MagicMock()
37
+ mock_response.content = b"fake-bytes"
38
+ mock_response.raise_for_status.return_value = None
39
+ mock_get.return_value = mock_response
40
+
41
+ # Mock whisper
42
+ mock_openai = MagicMock()
43
+ mock_openai.audio.transcriptions.create.return_value = MagicMock(text="bonjour")
44
+ mock_client.return_value = mock_openai
45
+
46
+ out = transcribe_audio("./test-audio1.wav", language="fr")
47
+ assert out == "bonjour"
48
+
49
+ from app.ai_services import build_system_prompt
50
+
51
+
52
+ def test_system_prompt_contains_context():
53
+ p = build_system_prompt("fr")
54
+ assert "Context" in p or "Contex" in p or len(p) > 0
55
+
tests/test_api.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.testclient import TestClient
2
+ from main import app
3
+
4
+
5
+ client = TestClient(app)
6
+
7
+
8
+ def test_health():
9
+ r = client.get("/health")
10
+ assert r.status_code == 200
11
+
12
+
13
+ def test_cameroon_data_endpoints_exist():
14
+ r1 = client.get("/api/cameroon-data/stats/overview")
15
+ assert r1.status_code in (200, 500) # 500 if CSV not configured
16
+
17
+ r2 = client.get("/api/cameroon-data/patterns/seasonal")
18
+ assert r2.status_code in (200, 500)
19
+
20
+ r3 = client.post("/api/cameroon-data/search/similar-cases", json={"query_text": "fever", "top_k": 3})
21
+ assert r3.status_code in (200, 500)
22
+
tests/test_api_endpoints.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.testclient import TestClient
2
+ from main import app
3
+
4
+
5
+ client = TestClient(app)
6
+
7
+
8
+ def test_unified_chat_text():
9
+ r = client.post("/api/chat/unified", json={
10
+ "message": "J'ai de la fièvre et des frissons",
11
+ "message_type": "text",
12
+ "language": "fr"
13
+ })
14
+ assert r.status_code in (200, 500)
15
+ if r.status_code == 200:
16
+ data = r.json()
17
+ assert "response" in data and "context" in data
18
+
19
+
20
+ def test_cameroon_overview():
21
+ r = client.get("/api/cameroon-data/stats/overview")
22
+ assert r.status_code in (200, 500)
23
+
24
+
tests/test_cameroon_data.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ from app.services.cameroon_data import CameroonMedicalData
4
+
5
+
6
+ def test_service_handles_missing_csv(tmp_path):
7
+ svc = CameroonMedicalData(csv_path=str(tmp_path / "missing.csv"))
8
+ svc.clean()
9
+ assert svc.stats_overview()["total_rows"] == 0
10
+
11
+
12
+ def test_basic_stats(tmp_path):
13
+ # Build a tiny CSV
14
+ df = pd.DataFrame([
15
+ {"summary_id": "1", "patient_id": "p1", "patient_age": 25, "patient_gender": "M", "diagnosis": "Paludisme",
16
+ "body_temp_c": 38.5, "blood_pressure_systolic": 120, "heart_rate": 90, "summary_text": "Fievre et frissons",
17
+ "date_recorded": "2024-01-10"},
18
+ {"summary_id": "2", "patient_id": "p2", "patient_age": 7, "patient_gender": "F", "diagnosis": "Typhoide",
19
+ "body_temp_c": 39.2, "blood_pressure_systolic": 110, "heart_rate": 95, "summary_text": "Fièvre, maux de ventre",
20
+ "date_recorded": "2024-02-15"}
21
+ ])
22
+ csv_path = tmp_path / "clinical_summaries.csv"
23
+ df.to_csv(csv_path, index=False)
24
+
25
+ svc = CameroonMedicalData(csv_path=str(csv_path))
26
+ svc.clean()
27
+ ov = svc.stats_overview()
28
+ assert ov["total_rows"] == 2
29
+ assert "paludisme" in ov["top_diagnoses"] or "typhoide" in ov["top_diagnoses"]
30
+
31
+ disease = svc.stats_disease("paludisme")
32
+ assert disease["disease"] == "paludisme"
33
+
34
+ seasonal = svc.seasonal_patterns()
35
+ assert isinstance(seasonal, dict)
36
+
37
+ age_gender = svc.age_gender_distribution()
38
+ assert "age_buckets" in age_gender and "gender_distribution" in age_gender
39
+
40
+
tests/test_helpers.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.utils.helpers import normalize_gender, clean_diagnosis, emergency_triage
2
+
3
+
4
+ def test_normalize_gender():
5
+ assert normalize_gender("M") == "male"
6
+ assert normalize_gender("masculin") == "male"
7
+ assert normalize_gender("Femme") == "female"
8
+ assert normalize_gender("unknown") is None
9
+
10
+
11
+ def test_clean_diagnosis():
12
+ assert clean_diagnosis(" Paludisme ") == "paludisme"
13
+ assert clean_diagnosis("") is None
14
+
15
+
16
+ def test_emergency_triage():
17
+ assert emergency_triage("douleur poitrine") is True
18
+ assert emergency_triage("tout va bien") is False
19
+
20
+
tests/test_live_ai.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pytest
3
+ from fastapi.testclient import TestClient
4
+ from main import app
5
+
6
+
7
+ requires_live = pytest.mark.skipif(
8
+ not os.getenv("OPENAI_API_KEY"), reason="Skipping live AI tests: OPENAI_API_KEY not set"
9
+ )
10
+
11
+
12
+ @requires_live
13
+ def test_unified_text_live():
14
+ client = TestClient(app)
15
+ r = client.post("/api/chat/unified", json={
16
+ "message": "J'ai de la fièvre et des frissons",
17
+ "message_type": "text",
18
+ "language": "fr"
19
+ })
20
+ assert r.status_code == 200
21
+ data = r.json()
22
+ assert "response" in data and "context" in data
23
+
24
+
tests/test_users_endpoints.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.testclient import TestClient
2
+ from main import app
3
+
4
+
5
+ client = TestClient(app)
6
+
7
+
8
+ def test_users_register_stubbed():
9
+ r = client.post("/api/users/register", json={
10
+ "email": "test@example.com",
11
+ "password": "secret",
12
+ "preferred_language": "fr"
13
+ })
14
+ assert r.status_code == 501
15
+
16
+
17
+ def test_users_login_stubbed():
18
+ r = client.post("/api/users/login", data={
19
+ "username": "test@example.com",
20
+ "password": "secret"
21
+ })
22
+ assert r.status_code == 501
23
+
24
+