SmartHeal commited on
Commit
a02c8c4
·
verified ·
1 Parent(s): c609645

Update src/ai_processor.py

Browse files
Files changed (1) hide show
  1. src/ai_processor.py +230 -73
src/ai_processor.py CHANGED
@@ -3,14 +3,12 @@
3
  # Turn on deep logging: export LOGLEVEL=DEBUG SMARTHEAL_DEBUG=1
4
 
5
  import os
6
- import time
7
  import logging
8
  from datetime import datetime
9
  from typing import Optional, Dict, List, Tuple
10
 
11
- # ---- Environment defaults ----
12
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
13
- os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
14
  LOGLEVEL = os.getenv("LOGLEVEL", "INFO").upper()
15
  SMARTHEAL_DEBUG = os.getenv("SMARTHEAL_DEBUG", "0") == "1"
16
 
@@ -28,22 +26,20 @@ logging.basicConfig(
28
  def _log_kv(prefix: str, kv: Dict):
29
  logging.debug(prefix + " | " + " | ".join(f"{k}={v}" for k, v in kv.items()))
30
 
31
- # --- Optional Spaces GPU stub (harmless) ---
32
- try:
33
- import spaces as _spaces
34
- @_spaces.GPU(enable_queue=False)
35
- def smartheal_gpu_stub(ping: int = 0) -> str:
36
- return "ready"
37
- logging.info("Registered @spaces.GPU stub (enable_queue=False).")
38
- except Exception:
39
- pass
40
 
 
 
 
 
 
41
  UPLOADS_DIR = "uploads"
42
  os.makedirs(UPLOADS_DIR, exist_ok=True)
43
 
44
  HF_TOKEN = os.getenv("HF_TOKEN", None)
45
  YOLO_MODEL_PATH = "src/best.pt"
46
- SEG_MODEL_PATH = "src/segmentation_model.h5" # optional
47
  GUIDELINE_PDFS = ["src/eHealth in Wound Care.pdf", "src/IWGDF Guideline.pdf", "src/evaluation.pdf"]
48
  DATASET_ID = "SmartHeal/wound-image-uploads"
49
  DEFAULT_PX_PER_CM = 38.0
@@ -57,17 +53,35 @@ SEG_THRESH = float(os.getenv("SEG_THRESH", "0.5"))
57
  models_cache: Dict[str, object] = {}
58
  knowledge_base_cache: Dict[str, object] = {}
59
 
60
- # ---------- Lazy imports ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def _import_ultralytics():
62
- from ultralytics import YOLO
 
 
63
  return YOLO
64
 
65
  def _import_tf_loader():
66
  import tensorflow as tf
67
- try:
68
- tf.config.set_visible_devices([], "GPU") # keep TF on CPU
69
- except Exception:
70
- pass
71
  from tensorflow.keras.models import load_model
72
  return load_model
73
 
@@ -91,57 +105,207 @@ def _import_hf_hub():
91
  from huggingface_hub import HfApi, HfFolder
92
  return HfApi, HfFolder
93
 
94
- # ---------- VLM (disabled by default) ----------
95
- def generate_medgemma_report(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  patient_info: str,
97
  visual_results: Dict,
98
  guideline_context: str,
99
  image_pil: Image.Image,
100
  max_new_tokens: Optional[int] = None,
101
  ) -> str:
102
- if os.getenv("SMARTHEAL_ENABLE_VLM", "0") != "1":
 
 
 
 
103
  return "⚠️ VLM disabled"
104
- try:
105
- from transformers import pipeline
106
- pipe = pipeline(
107
- task="image-text-to-text",
108
- model="google/medgemma-4b-it",
109
- device_map=None,
110
- token=HF_TOKEN,
111
- trust_remote_code=True,
112
- model_kwargs={"low_cpu_mem_usage": True},
113
- )
114
- prompt = (
115
- "You are a medical AI assistant. Analyze this wound image and patient data.\n\n"
116
- f"Patient: {patient_info}\n"
117
- f"Wound: {visual_results.get('wound_type', 'Unknown')} - "
118
- f"{visual_results.get('length_cm', 0)}×{visual_results.get('breadth_cm', 0)} cm\n\n"
119
- "Provide a structured report with:\n"
120
- "1. Clinical Summary\n2. Treatment Recommendations\n3. Risk Assessment\n4. Monitoring Plan\n"
121
- )
122
- messages = [{"role": "user", "content": [
123
  {"type": "image", "image": image_pil},
124
- {"type": "text", "text": prompt},
125
- ]}]
126
- out = pipe(text=messages, max_new_tokens=max_new_tokens or 600, do_sample=False, temperature=0.7)
127
- if out and len(out) > 0:
128
- try:
129
- return out[0]["generated_text"][-1].get("content", "").strip() or "⚠️ Empty response"
130
- except Exception:
131
- return (out[0].get("generated_text", "") or "").strip() or "⚠️ Empty response"
132
- return "⚠️ No output generated"
133
  except Exception as e:
134
- logging.error(f" MedGemma generation error: {e}")
135
  return "⚠️ VLM error"
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  # ---------- Initialize CPU models ----------
138
  def load_yolo_model():
139
  YOLO = _import_ultralytics()
140
- return YOLO(YOLO_MODEL_PATH)
 
 
141
 
142
- def load_segmentation_model():
143
- load_model = _import_tf_loader()
144
- return load_model(SEG_MODEL_PATH, compile=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  def load_classification_pipeline():
147
  pipe = _import_hf_cls()
@@ -163,18 +327,18 @@ def initialize_cpu_models() -> None:
163
  if "det" not in models_cache:
164
  try:
165
  models_cache["det"] = load_yolo_model()
166
- logging.info("✅ YOLO loaded (CPU)")
167
  except Exception as e:
168
  logging.error(f"YOLO load failed: {e}")
169
 
170
  if "seg" not in models_cache:
171
  try:
172
  if os.path.exists(SEG_MODEL_PATH):
173
- models_cache["seg"] = load_segmentation_model()
174
- m = models_cache["seg"]
175
- ishape = getattr(m, "input_shape", None)
176
  oshape = getattr(m, "output_shape", None)
177
- logging.info(f"✅ Segmentation model loaded (CPU) | input_shape={ishape} output_shape={oshape}")
178
  else:
179
  models_cache["seg"] = None
180
  logging.warning("Segmentation model file missing; skipping.")
@@ -343,7 +507,7 @@ def _grabcut_refine(bgr: np.ndarray, seed01: np.ndarray, iters: int = 3) -> np.n
343
  seed_dil = cv2.dilate(seed01, k, iterations=1)
344
  gc[seed01.astype(bool)] = cv2.GC_PR_FGD
345
  gc[seed_dil.astype(bool)] = cv2.GC_FGD
346
- gc[0, :], gc[-1, :], gc[:, 0], gc[:, -1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
347
  bgdModel = np.zeros((1, 65), np.float64)
348
  fgdModel = np.zeros((1, 65), np.float64)
349
  cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
@@ -392,11 +556,7 @@ def segment_wound(image_bgr: np.ndarray, ts: str, out_dir: str) -> Tuple[np.ndar
392
  # --- Model path ---
393
  if seg_model is not None:
394
  try:
395
- ishape = getattr(seg_model, "input_shape", None)
396
- if not ishape or len(ishape) < 4:
397
- raise ValueError(f"Bad seg input_shape: {ishape}")
398
- th, tw = int(ishape[1]), int(ishape[2])
399
-
400
  x = _preprocess_for_seg(image_bgr, (th, tw))
401
  roi_seen_path = None
402
  if SMARTHEAL_DEBUG:
@@ -600,6 +760,7 @@ class AIProcessor:
600
  det_model = self.models_cache.get("det")
601
  if det_model is None:
602
  raise RuntimeError("YOLO model not loaded")
 
603
  results = det_model.predict(image_cv, verbose=False, device="cpu")
604
  if (not results) or (not getattr(results[0], "boxes", None)) or (len(results[0].boxes) == 0):
605
  try:
@@ -744,12 +905,8 @@ class AIProcessor:
744
  vs = self.knowledge_base_cache.get("vector_store")
745
  if not vs:
746
  return "Knowledge base is not available."
747
- try:
748
- retriever = vs.as_retriever(search_kwargs={"k": 5})
749
- docs = retriever.get_relevant_documents(query)
750
- except Exception:
751
- retriever = vs.as_retriever(search_kwargs={"k": 5})
752
- docs = retriever.invoke(query)
753
  lines: List[str] = []
754
  for d in docs:
755
  src = (d.metadata or {}).get("source", "N/A")
@@ -803,7 +960,7 @@ Automated analysis provides quantitative measurements; verify via clinical exami
803
  )
804
  if report and report.strip() and not report.startswith(("⚠️", "❌")):
805
  return report
806
- logging.warning("MedGemma unavailable/invalid; using fallback.")
807
  return self._generate_fallback_report(patient_info, visual_results, guideline_context)
808
  except Exception as e:
809
  logging.error(f"Report generation failed: {e}")
 
3
  # Turn on deep logging: export LOGLEVEL=DEBUG SMARTHEAL_DEBUG=1
4
 
5
  import os
 
6
  import logging
7
  from datetime import datetime
8
  from typing import Optional, Dict, List, Tuple
9
 
10
+ # ---- Environment defaults (do NOT globally hint CUDA here) ----
11
  os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 
12
  LOGLEVEL = os.getenv("LOGLEVEL", "INFO").upper()
13
  SMARTHEAL_DEBUG = os.getenv("SMARTHEAL_DEBUG", "0") == "1"
14
 
 
26
  def _log_kv(prefix: str, kv: Dict):
27
  logging.debug(prefix + " | " + " | ".join(f"{k}={v}" for k, v in kv.items()))
28
 
29
+ # --- Spaces GPU decorator (REQUIRED) ---
30
+ from spaces import GPU as _SPACES_GPU
 
 
 
 
 
 
 
31
 
32
+ @_SPACES_GPU(enable_queue=True)
33
+ def smartheal_gpu_stub(ping: int = 0) -> str:
34
+ return "ready"
35
+
36
+ # ---- Paths / constants ----
37
  UPLOADS_DIR = "uploads"
38
  os.makedirs(UPLOADS_DIR, exist_ok=True)
39
 
40
  HF_TOKEN = os.getenv("HF_TOKEN", None)
41
  YOLO_MODEL_PATH = "src/best.pt"
42
+ SEG_MODEL_PATH = "src/segmentation_model.h5" # optional; legacy .h5 supported
43
  GUIDELINE_PDFS = ["src/eHealth in Wound Care.pdf", "src/IWGDF Guideline.pdf", "src/evaluation.pdf"]
44
  DATASET_ID = "SmartHeal/wound-image-uploads"
45
  DEFAULT_PX_PER_CM = 38.0
 
53
  models_cache: Dict[str, object] = {}
54
  knowledge_base_cache: Dict[str, object] = {}
55
 
56
+ # ---------- Utilities to prevent CUDA in main process ----------
57
+ from contextlib import contextmanager
58
+
59
+ @contextmanager
60
+ def _no_cuda_env():
61
+ """
62
+ Mask GPUs so any library imported/constructed in the main process
63
+ cannot see CUDA (required for Spaces Stateless GPU).
64
+ """
65
+ prev = os.environ.get("CUDA_VISIBLE_DEVICES")
66
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
67
+ try:
68
+ yield
69
+ finally:
70
+ if prev is None:
71
+ os.environ.pop("CUDA_VISIBLE_DEVICES", None)
72
+ else:
73
+ os.environ["CUDA_VISIBLE_DEVICES"] = prev
74
+
75
+ # ---------- Lazy imports (wrapped where needed) ----------
76
  def _import_ultralytics():
77
+ # Prevent Ultralytics from probing CUDA on import
78
+ with _no_cuda_env():
79
+ from ultralytics import YOLO
80
  return YOLO
81
 
82
  def _import_tf_loader():
83
  import tensorflow as tf
84
+ tf.config.set_visible_devices([], "GPU")
 
 
 
85
  from tensorflow.keras.models import load_model
86
  return load_model
87
 
 
105
  from huggingface_hub import HfApi, HfFolder
106
  return HfApi, HfFolder
107
 
108
+ # ---------- SmartHeal prompts (system + user prefix) ----------
109
+ SMARTHEAL_SYSTEM_PROMPT = """\
110
+ You are SmartHeal Clinical Assistant, a wound-care decision-support system.
111
+ You analyze wound photographs and brief patient context to produce careful,
112
+ specific, guideline-informed recommendations WITHOUT diagnosing. You always:
113
+ - Use the measurements calculated by the vision pipeline as ground truth.
114
+ - Prefer concise, actionable steps tailored to exudate level, infection risk, and pain.
115
+ - Flag uncertainties and red flags that need escalation to a clinician.
116
+ - Avoid contraindicated advice; do not infer unseen comorbidities.
117
+ - Keep under 300 words and use the requested headings exactly.
118
+ - Tone: professional, clear, and conservative; no definitive medical claims.
119
+ - Safety: remind the user to seek clinician review for changes or red flags.
120
+ """
121
+
122
+ SMARTHEAL_USER_PREFIX = """\
123
+ Patient: {patient_info}
124
+ Visual findings: type={wound_type}, size={length_cm}x{breadth_cm} cm, area={area_cm2} cm^2,
125
+ detection_conf={det_conf:.2f}, calibration={px_per_cm} px/cm.
126
+ Guideline context (snippets you can draw principles from; do not quote at length):
127
+ {guideline_context}
128
+ Write a structured answer with these headings exactly:
129
+ 1. Clinical Summary (max 4 bullet points)
130
+ 2. Likely Stage/Type (if uncertain, say 'uncertain')
131
+ 3. Treatment Plan (specific dressing choices and frequency based on exudate/infection risk)
132
+ 4. Red Flags (what to escalate and when)
133
+ 5. Follow-up Cadence (days)
134
+ 6. Notes (assumptions/uncertainties)
135
+ Keep to 220–300 words. Do NOT provide diagnosis. Avoid contraindicated advice.
136
+ """
137
+
138
+ # ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
139
+ @_SPACES_GPU(enable_queue=True)
140
+ def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
141
+ """
142
+ Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
143
+ """
144
+ from transformers import pipeline
145
+ import torch
146
+ pipe = pipeline(
147
+ task="image-text-to-text",
148
+ model=model_id,
149
+ torch_dtype=torch.bfloat16,
150
+ device_map="auto",
151
+ token=token,
152
+ trust_remote_code=True,
153
+ model_kwargs={"low_cpu_mem_usage": True},
154
+ )
155
+ out = pipe(text=messages, max_new_tokens=max_new_tokens, do_sample=False, temperature=0.2)
156
+ try:
157
+ txt = out[0]["generated_text"][-1].get("content", "")
158
+ except Exception:
159
+ txt = out[0].get("generated_text", "")
160
+ return (txt or "").strip() or "⚠️ Empty response"
161
+
162
+ def generate_medgemma_report( # kept name so callers don't change
163
  patient_info: str,
164
  visual_results: Dict,
165
  guideline_context: str,
166
  image_pil: Image.Image,
167
  max_new_tokens: Optional[int] = None,
168
  ) -> str:
169
+ """
170
+ MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
171
+ Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
172
+ """
173
+ if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
174
  return "⚠️ VLM disabled"
175
+
176
+ model_id = os.getenv("SMARTHEAL_VLM_MODEL", "Qwen/Qwen2-VL-2B-Instruct")
177
+ max_new_tokens = max_new_tokens or int(os.getenv("SMARTHEAL_VLM_MAX_TOKENS", "600"))
178
+
179
+ uprompt = SMARTHEAL_USER_PREFIX.format(
180
+ patient_info=patient_info,
181
+ wound_type=visual_results.get("wound_type", "Unknown"),
182
+ length_cm=visual_results.get("length_cm", 0),
183
+ breadth_cm=visual_results.get("breadth_cm", 0),
184
+ area_cm2=visual_results.get("surface_area_cm2", 0),
185
+ det_conf=float(visual_results.get("detection_confidence", 0.0)),
186
+ px_per_cm=visual_results.get("px_per_cm", "?"),
187
+ guideline_context=(guideline_context or "")[:900],
188
+ )
189
+
190
+ messages = [
191
+ {"role": "system", "content": [{"type": "text", "text": SMARTHEAL_SYSTEM_PROMPT}]},
192
+ {"role": "user", "content": [
 
193
  {"type": "image", "image": image_pil},
194
+ {"type": "text", "text": uprompt},
195
+ ]},
196
+ ]
197
+
198
+ try:
199
+ return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
 
 
 
200
  except Exception as e:
201
+ logging.error(f"VLM call failed: {e}")
202
  return "⚠️ VLM error"
203
 
204
+ # ---------- Input-shape helpers (avoid `.as_list()` on strings) ----------
205
+ def _shape_to_hw(shape) -> Tuple[Optional[int], Optional[int]]:
206
+ try:
207
+ if hasattr(shape, "as_list"):
208
+ shape = shape.as_list()
209
+ except Exception:
210
+ pass
211
+ if isinstance(shape, (tuple, list)):
212
+ if len(shape) == 4: # (None, H, W, C)
213
+ H, W = shape[1], shape[2]
214
+ elif len(shape) == 3: # (H, W, C)
215
+ H, W = shape[0], shape[1]
216
+ else:
217
+ return (None, None)
218
+ try: H = int(H) if (H is not None and str(H).lower() != "none") else None
219
+ except Exception: H = None
220
+ try: W = int(W) if (W is not None and str(W).lower() != "none") else None
221
+ except Exception: W = None
222
+ return (H, W)
223
+ return (None, None)
224
+
225
+ def _get_model_input_hw(model, default_hw: Tuple[int, int] = (224, 224)) -> Tuple[int, int]:
226
+ H, W = _shape_to_hw(getattr(model, "input_shape", None))
227
+ if H and W:
228
+ return H, W
229
+ try:
230
+ inputs = getattr(model, "inputs", None)
231
+ if inputs:
232
+ H, W = _shape_to_hw(inputs[0].shape)
233
+ if H and W:
234
+ return H, W
235
+ except Exception:
236
+ pass
237
+ try:
238
+ cfg = model.get_config() if hasattr(model, "get_config") else None
239
+ if isinstance(cfg, dict):
240
+ for layer in cfg.get("layers", []):
241
+ conf = (layer or {}).get("config", {})
242
+ cand = conf.get("batch_input_shape") or conf.get("batch_shape")
243
+ H, W = _shape_to_hw(cand)
244
+ if H and W:
245
+ return H, W
246
+ except Exception:
247
+ pass
248
+ logging.warning(f"Could not resolve model input shape; using default {default_hw}.")
249
+ return default_hw
250
+
251
  # ---------- Initialize CPU models ----------
252
  def load_yolo_model():
253
  YOLO = _import_ultralytics()
254
+ with _no_cuda_env():
255
+ model = YOLO(YOLO_MODEL_PATH)
256
+ return model
257
 
258
+ def load_segmentation_model(path: Optional[str] = None):
259
+ """
260
+ Robust loader for legacy .h5 models across TF/Keras versions.
261
+ Uses global SEG_MODEL_PATH by default.
262
+ """
263
+ import ast
264
+ import tensorflow as tf
265
+ tf.config.set_visible_devices([], "GPU")
266
+ model_path = path or SEG_MODEL_PATH
267
+
268
+ # Attempt 1: tf.keras with safe_mode=False
269
+ try:
270
+ m = tf.keras.models.load_model(model_path, compile=False, safe_mode=False)
271
+ logging.info("✅ Segmentation model loaded (tf.keras, safe_mode=False).")
272
+ return m
273
+ except Exception as e1:
274
+ logging.warning(f"tf.keras load (safe_mode=False) failed: {e1}")
275
+
276
+ # Attempt 2: patched InputLayer (drop legacy args; coerce string shapes)
277
+ try:
278
+ from tensorflow.keras.layers import InputLayer as _KInputLayer
279
+ def _InputLayerPatched(*args, **kwargs):
280
+ kwargs.pop("batch_shape", None)
281
+ kwargs.pop("batch_input_shape", None)
282
+ if "shape" in kwargs and isinstance(kwargs["shape"], str):
283
+ try:
284
+ kwargs["shape"] = tuple(ast.literal_eval(kwargs["shape"]))
285
+ except Exception:
286
+ kwargs.pop("shape", None)
287
+ return _KInputLayer(**kwargs)
288
+ m = tf.keras.models.load_model(
289
+ model_path,
290
+ compile=False,
291
+ custom_objects={"InputLayer": _InputLayerPatched},
292
+ safe_mode=False,
293
+ )
294
+ logging.info("✅ Segmentation model loaded (patched InputLayer).")
295
+ return m
296
+ except Exception as e2:
297
+ logging.warning(f"Patched InputLayer load failed: {e2}")
298
+
299
+ # Attempt 3: keras 2 shim (tf_keras) if present
300
+ try:
301
+ import tf_keras
302
+ m = tf_keras.models.load_model(model_path, compile=False)
303
+ logging.info("✅ Segmentation model loaded (tf_keras compat).")
304
+ return m
305
+ except Exception as e3:
306
+ logging.warning(f"tf_keras load failed or not installed: {e3}")
307
+
308
+ raise RuntimeError("Segmentation model could not be loaded; please convert/resave the model.")
309
 
310
  def load_classification_pipeline():
311
  pipe = _import_hf_cls()
 
327
  if "det" not in models_cache:
328
  try:
329
  models_cache["det"] = load_yolo_model()
330
+ logging.info("✅ YOLO loaded (CPU; CUDA masked in main)")
331
  except Exception as e:
332
  logging.error(f"YOLO load failed: {e}")
333
 
334
  if "seg" not in models_cache:
335
  try:
336
  if os.path.exists(SEG_MODEL_PATH):
337
+ m = load_segmentation_model() # uses global path by default
338
+ models_cache["seg"] = m
339
+ th, tw = _get_model_input_hw(m, default_hw=(224, 224))
340
  oshape = getattr(m, "output_shape", None)
341
+ logging.info(f"✅ Segmentation model loaded (CPU) | input_hw=({th},{tw}) output_shape={oshape}")
342
  else:
343
  models_cache["seg"] = None
344
  logging.warning("Segmentation model file missing; skipping.")
 
507
  seed_dil = cv2.dilate(seed01, k, iterations=1)
508
  gc[seed01.astype(bool)] = cv2.GC_PR_FGD
509
  gc[seed_dil.astype(bool)] = cv2.GC_FGD
510
+ gc[0, :], gc[-1, :], gc[:, 0], gc[:, 1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
511
  bgdModel = np.zeros((1, 65), np.float64)
512
  fgdModel = np.zeros((1, 65), np.float64)
513
  cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
 
556
  # --- Model path ---
557
  if seg_model is not None:
558
  try:
559
+ th, tw = _get_model_input_hw(seg_model, default_hw=(224, 224))
 
 
 
 
560
  x = _preprocess_for_seg(image_bgr, (th, tw))
561
  roi_seen_path = None
562
  if SMARTHEAL_DEBUG:
 
760
  det_model = self.models_cache.get("det")
761
  if det_model is None:
762
  raise RuntimeError("YOLO model not loaded")
763
+ # Force CPU inference and avoid CUDA touch
764
  results = det_model.predict(image_cv, verbose=False, device="cpu")
765
  if (not results) or (not getattr(results[0], "boxes", None)) or (len(results[0].boxes) == 0):
766
  try:
 
905
  vs = self.knowledge_base_cache.get("vector_store")
906
  if not vs:
907
  return "Knowledge base is not available."
908
+ retriever = vs.as_retriever(search_kwargs={"k": 5})
909
+ docs = retriever.invoke(query)
 
 
 
 
910
  lines: List[str] = []
911
  for d in docs:
912
  src = (d.metadata or {}).get("source", "N/A")
 
960
  )
961
  if report and report.strip() and not report.startswith(("⚠️", "❌")):
962
  return report
963
+ logging.warning("VLM unavailable/invalid; using fallback.")
964
  return self._generate_fallback_report(patient_info, visual_results, guideline_context)
965
  except Exception as e:
966
  logging.error(f"Report generation failed: {e}")