Spaces:

SmartHeal
/

SmartHeal-Agentic-AI

Sleeping

App Files Files Community

SmartHeal commited on Aug 19

Commit

a02c8c4

verified ·

1 Parent(s): c609645

Update src/ai_processor.py

Browse files

Files changed (1) hide show

src/ai_processor.py +230 -73

src/ai_processor.py CHANGED Viewed

@@ -3,14 +3,12 @@
 # Turn on deep logging: export LOGLEVEL=DEBUG SMARTHEAL_DEBUG=1
 import os
-import time
 import logging
 from datetime import datetime
 from typing import Optional, Dict, List, Tuple
-# ---- Environment defaults ----
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
-os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
 LOGLEVEL = os.getenv("LOGLEVEL", "INFO").upper()
 SMARTHEAL_DEBUG = os.getenv("SMARTHEAL_DEBUG", "0") == "1"
@@ -28,22 +26,20 @@ logging.basicConfig(
 def _log_kv(prefix: str, kv: Dict):
     logging.debug(prefix + " | " + " | ".join(f"{k}={v}" for k, v in kv.items()))
-# --- Optional Spaces GPU stub (harmless) ---
-try:
-    import spaces as _spaces
-    @_spaces.GPU(enable_queue=False)
-    def smartheal_gpu_stub(ping: int = 0) -> str:
-        return "ready"
-    logging.info("Registered @spaces.GPU stub (enable_queue=False).")
-except Exception:
-    pass
 UPLOADS_DIR = "uploads"
 os.makedirs(UPLOADS_DIR, exist_ok=True)
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 YOLO_MODEL_PATH = "src/best.pt"
-SEG_MODEL_PATH = "src/segmentation_model.h5"   # optional
 GUIDELINE_PDFS = ["src/eHealth in Wound Care.pdf", "src/IWGDF Guideline.pdf", "src/evaluation.pdf"]
 DATASET_ID = "SmartHeal/wound-image-uploads"
 DEFAULT_PX_PER_CM = 38.0
@@ -57,17 +53,35 @@ SEG_THRESH = float(os.getenv("SEG_THRESH", "0.5"))
 models_cache: Dict[str, object] = {}
 knowledge_base_cache: Dict[str, object] = {}
-# ---------- Lazy imports ----------
 def _import_ultralytics():
-    from ultralytics import YOLO
     return YOLO
 def _import_tf_loader():
     import tensorflow as tf
-    try:
-        tf.config.set_visible_devices([], "GPU")  # keep TF on CPU
-    except Exception:
-        pass
     from tensorflow.keras.models import load_model
     return load_model
@@ -91,57 +105,207 @@ def _import_hf_hub():
     from huggingface_hub import HfApi, HfFolder
     return HfApi, HfFolder
-# ---------- VLM (disabled by default) ----------
-def generate_medgemma_report(
     patient_info: str,
     visual_results: Dict,
     guideline_context: str,
     image_pil: Image.Image,
     max_new_tokens: Optional[int] = None,
 ) -> str:
-    if os.getenv("SMARTHEAL_ENABLE_VLM", "0") != "1":
         return "⚠️ VLM disabled"
-    try:
-        from transformers import pipeline
-        pipe = pipeline(
-            task="image-text-to-text",
-            model="google/medgemma-4b-it",
-            device_map=None,
-            token=HF_TOKEN,
-            trust_remote_code=True,
-            model_kwargs={"low_cpu_mem_usage": True},
-        )
-        prompt = (
-            "You are a medical AI assistant. Analyze this wound image and patient data.\n\n"
-            f"Patient: {patient_info}\n"
-            f"Wound: {visual_results.get('wound_type', 'Unknown')} - "
-            f"{visual_results.get('length_cm', 0)}×{visual_results.get('breadth_cm', 0)} cm\n\n"
-            "Provide a structured report with:\n"
-            "1. Clinical Summary\n2. Treatment Recommendations\n3. Risk Assessment\n4. Monitoring Plan\n"
-        )
-        messages = [{"role": "user", "content": [
             {"type": "image", "image": image_pil},
-            {"type": "text",  "text": prompt},
-        ]}]
-        out = pipe(text=messages, max_new_tokens=max_new_tokens or 600, do_sample=False, temperature=0.7)
-        if out and len(out) > 0:
-            try:
-                return out[0]["generated_text"][-1].get("content", "").strip() or "⚠️ Empty response"
-            except Exception:
-                return (out[0].get("generated_text", "") or "").strip() or "⚠️ Empty response"
-        return "⚠️ No output generated"
     except Exception as e:
-        logging.error(f"❌ MedGemma generation error: {e}")
         return "⚠️ VLM error"
 # ---------- Initialize CPU models ----------
 def load_yolo_model():
     YOLO = _import_ultralytics()
-    return YOLO(YOLO_MODEL_PATH)
-def load_segmentation_model():
-    load_model = _import_tf_loader()
-    return load_model(SEG_MODEL_PATH, compile=False)
 def load_classification_pipeline():
     pipe = _import_hf_cls()
@@ -163,18 +327,18 @@ def initialize_cpu_models() -> None:
     if "det" not in models_cache:
         try:
             models_cache["det"] = load_yolo_model()
-            logging.info("✅ YOLO loaded (CPU)")
         except Exception as e:
             logging.error(f"YOLO load failed: {e}")
     if "seg" not in models_cache:
         try:
             if os.path.exists(SEG_MODEL_PATH):
-                models_cache["seg"] = load_segmentation_model()
-                m = models_cache["seg"]
-                ishape = getattr(m, "input_shape", None)
                 oshape = getattr(m, "output_shape", None)
-                logging.info(f"✅ Segmentation model loaded (CPU) | input_shape={ishape} output_shape={oshape}")
             else:
                 models_cache["seg"] = None
                 logging.warning("Segmentation model file missing; skipping.")
@@ -343,7 +507,7 @@ def _grabcut_refine(bgr: np.ndarray, seed01: np.ndarray, iters: int = 3) -> np.n
     seed_dil = cv2.dilate(seed01, k, iterations=1)
     gc[seed01.astype(bool)] = cv2.GC_PR_FGD
     gc[seed_dil.astype(bool)] = cv2.GC_FGD
-    gc[0, :], gc[-1, :], gc[:, 0], gc[:, -1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
     bgdModel = np.zeros((1, 65), np.float64)
     fgdModel = np.zeros((1, 65), np.float64)
     cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
@@ -392,11 +556,7 @@ def segment_wound(image_bgr: np.ndarray, ts: str, out_dir: str) -> Tuple[np.ndar
     # --- Model path ---
     if seg_model is not None:
         try:
-            ishape = getattr(seg_model, "input_shape", None)
-            if not ishape or len(ishape) < 4:
-                raise ValueError(f"Bad seg input_shape: {ishape}")
-            th, tw = int(ishape[1]), int(ishape[2])
             x = _preprocess_for_seg(image_bgr, (th, tw))
             roi_seen_path = None
             if SMARTHEAL_DEBUG:
@@ -600,6 +760,7 @@ class AIProcessor:
             det_model = self.models_cache.get("det")
             if det_model is None:
                 raise RuntimeError("YOLO model not loaded")
             results = det_model.predict(image_cv, verbose=False, device="cpu")
             if (not results) or (not getattr(results[0], "boxes", None)) or (len(results[0].boxes) == 0):
                 try:
@@ -744,12 +905,8 @@ class AIProcessor:
             vs = self.knowledge_base_cache.get("vector_store")
             if not vs:
                 return "Knowledge base is not available."
-            try:
-                retriever = vs.as_retriever(search_kwargs={"k": 5})
-                docs = retriever.get_relevant_documents(query)
-            except Exception:
-                retriever = vs.as_retriever(search_kwargs={"k": 5})
-                docs = retriever.invoke(query)
             lines: List[str] = []
             for d in docs:
                 src = (d.metadata or {}).get("source", "N/A")
@@ -803,7 +960,7 @@ Automated analysis provides quantitative measurements; verify via clinical exami
             )
             if report and report.strip() and not report.startswith(("⚠️", "❌")):
                 return report
-            logging.warning("MedGemma unavailable/invalid; using fallback.")
             return self._generate_fallback_report(patient_info, visual_results, guideline_context)
         except Exception as e:
             logging.error(f"Report generation failed: {e}")

 # Turn on deep logging: export LOGLEVEL=DEBUG SMARTHEAL_DEBUG=1
 import os
 import logging
 from datetime import datetime
 from typing import Optional, Dict, List, Tuple
+# ---- Environment defaults (do NOT globally hint CUDA here) ----
 os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
 LOGLEVEL = os.getenv("LOGLEVEL", "INFO").upper()
 SMARTHEAL_DEBUG = os.getenv("SMARTHEAL_DEBUG", "0") == "1"
 def _log_kv(prefix: str, kv: Dict):
     logging.debug(prefix + " | " + " | ".join(f"{k}={v}" for k, v in kv.items()))
+# --- Spaces GPU decorator (REQUIRED) ---
+from spaces import GPU as _SPACES_GPU
+@_SPACES_GPU(enable_queue=True)
+def smartheal_gpu_stub(ping: int = 0) -> str:
+    return "ready"
+# ---- Paths / constants ----
 UPLOADS_DIR = "uploads"
 os.makedirs(UPLOADS_DIR, exist_ok=True)
 HF_TOKEN = os.getenv("HF_TOKEN", None)
 YOLO_MODEL_PATH = "src/best.pt"
+SEG_MODEL_PATH = "src/segmentation_model.h5"   # optional; legacy .h5 supported
 GUIDELINE_PDFS = ["src/eHealth in Wound Care.pdf", "src/IWGDF Guideline.pdf", "src/evaluation.pdf"]
 DATASET_ID = "SmartHeal/wound-image-uploads"
 DEFAULT_PX_PER_CM = 38.0
 models_cache: Dict[str, object] = {}
 knowledge_base_cache: Dict[str, object] = {}
+# ---------- Utilities to prevent CUDA in main process ----------
+from contextlib import contextmanager
+@contextmanager
+def _no_cuda_env():
+    """
+    Mask GPUs so any library imported/constructed in the main process
+    cannot see CUDA (required for Spaces Stateless GPU).
+    """
+    prev = os.environ.get("CUDA_VISIBLE_DEVICES")
+    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
+    try:
+        yield
+    finally:
+        if prev is None:
+            os.environ.pop("CUDA_VISIBLE_DEVICES", None)
+        else:
+            os.environ["CUDA_VISIBLE_DEVICES"] = prev
+# ---------- Lazy imports (wrapped where needed) ----------
 def _import_ultralytics():
+    # Prevent Ultralytics from probing CUDA on import
+    with _no_cuda_env():
+        from ultralytics import YOLO
     return YOLO
 def _import_tf_loader():
     import tensorflow as tf
+    tf.config.set_visible_devices([], "GPU")
     from tensorflow.keras.models import load_model
     return load_model
     from huggingface_hub import HfApi, HfFolder
     return HfApi, HfFolder
+# ---------- SmartHeal prompts (system + user prefix) ----------
+SMARTHEAL_SYSTEM_PROMPT = """\
+You are SmartHeal Clinical Assistant, a wound-care decision-support system.
+You analyze wound photographs and brief patient context to produce careful,
+specific, guideline-informed recommendations WITHOUT diagnosing. You always:
+- Use the measurements calculated by the vision pipeline as ground truth.
+- Prefer concise, actionable steps tailored to exudate level, infection risk, and pain.
+- Flag uncertainties and red flags that need escalation to a clinician.
+- Avoid contraindicated advice; do not infer unseen comorbidities.
+- Keep under 300 words and use the requested headings exactly.
+- Tone: professional, clear, and conservative; no definitive medical claims.
+- Safety: remind the user to seek clinician review for changes or red flags.
+"""
+SMARTHEAL_USER_PREFIX = """\
+Patient: {patient_info}
+Visual findings: type={wound_type}, size={length_cm}x{breadth_cm} cm, area={area_cm2} cm^2,
+detection_conf={det_conf:.2f}, calibration={px_per_cm} px/cm.
+Guideline context (snippets you can draw principles from; do not quote at length):
+{guideline_context}
+Write a structured answer with these headings exactly:
+1. Clinical Summary (max 4 bullet points)
+2. Likely Stage/Type (if uncertain, say 'uncertain')
+3. Treatment Plan (specific dressing choices and frequency based on exudate/infection risk)
+4. Red Flags (what to escalate and when)
+5. Follow-up Cadence (days)
+6. Notes (assumptions/uncertainties)
+Keep to 220–300 words. Do NOT provide diagnosis. Avoid contraindicated advice.
+"""
+# ---------- VLM (MedGemma replaced with Qwen2-VL) ----------
+@_SPACES_GPU(enable_queue=True)
+def _vlm_infer_gpu(messages, model_id: str, max_new_tokens: int, token: Optional[str]):
+    """
+    Runs entirely inside a Spaces GPU worker. It's the ONLY place we allow CUDA init.
+    """
+    from transformers import pipeline
+    import torch
+    pipe = pipeline(
+        task="image-text-to-text",
+        model=model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        token=token,
+        trust_remote_code=True,
+        model_kwargs={"low_cpu_mem_usage": True},
+    )
+    out = pipe(text=messages, max_new_tokens=max_new_tokens, do_sample=False, temperature=0.2)
+    try:
+        txt = out[0]["generated_text"][-1].get("content", "")
+    except Exception:
+        txt = out[0].get("generated_text", "")
+    return (txt or "").strip() or "⚠️ Empty response"
+def generate_medgemma_report(  # kept name so callers don't change
     patient_info: str,
     visual_results: Dict,
     guideline_context: str,
     image_pil: Image.Image,
     max_new_tokens: Optional[int] = None,
 ) -> str:
+    """
+    MedGemma replacement using Qwen/Qwen2-VL-2B-Instruct via image-text-to-text.
+    Loads & runs ONLY inside a GPU worker to satisfy Stateless GPU constraints.
+    """
+    if os.getenv("SMARTHEAL_ENABLE_VLM", "1") != "1":
         return "⚠️ VLM disabled"
+    model_id = os.getenv("SMARTHEAL_VLM_MODEL", "Qwen/Qwen2-VL-2B-Instruct")
+    max_new_tokens = max_new_tokens or int(os.getenv("SMARTHEAL_VLM_MAX_TOKENS", "600"))
+    uprompt = SMARTHEAL_USER_PREFIX.format(
+        patient_info=patient_info,
+        wound_type=visual_results.get("wound_type", "Unknown"),
+        length_cm=visual_results.get("length_cm", 0),
+        breadth_cm=visual_results.get("breadth_cm", 0),
+        area_cm2=visual_results.get("surface_area_cm2", 0),
+        det_conf=float(visual_results.get("detection_confidence", 0.0)),
+        px_per_cm=visual_results.get("px_per_cm", "?"),
+        guideline_context=(guideline_context or "")[:900],
+    )
+    messages = [
+        {"role": "system", "content": [{"type": "text", "text": SMARTHEAL_SYSTEM_PROMPT}]},
+        {"role": "user", "content": [
             {"type": "image", "image": image_pil},
+            {"type": "text",  "text": uprompt},
+        ]},
+    ]
+    try:
+        return _vlm_infer_gpu(messages, model_id, max_new_tokens, HF_TOKEN)
     except Exception as e:
+        logging.error(f"VLM call failed: {e}")
         return "⚠️ VLM error"
+# ---------- Input-shape helpers (avoid `.as_list()` on strings) ----------
+def _shape_to_hw(shape) -> Tuple[Optional[int], Optional[int]]:
+    try:
+        if hasattr(shape, "as_list"):
+            shape = shape.as_list()
+    except Exception:
+        pass
+    if isinstance(shape, (tuple, list)):
+        if len(shape) == 4:   # (None, H, W, C)
+            H, W = shape[1], shape[2]
+        elif len(shape) == 3: # (H, W, C)
+            H, W = shape[0], shape[1]
+        else:
+            return (None, None)
+        try: H = int(H) if (H is not None and str(H).lower() != "none") else None
+        except Exception: H = None
+        try: W = int(W) if (W is not None and str(W).lower() != "none") else None
+        except Exception: W = None
+        return (H, W)
+    return (None, None)
+def _get_model_input_hw(model, default_hw: Tuple[int, int] = (224, 224)) -> Tuple[int, int]:
+    H, W = _shape_to_hw(getattr(model, "input_shape", None))
+    if H and W:
+        return H, W
+    try:
+        inputs = getattr(model, "inputs", None)
+        if inputs:
+            H, W = _shape_to_hw(inputs[0].shape)
+            if H and W:
+                return H, W
+    except Exception:
+        pass
+    try:
+        cfg = model.get_config() if hasattr(model, "get_config") else None
+        if isinstance(cfg, dict):
+            for layer in cfg.get("layers", []):
+                conf = (layer or {}).get("config", {})
+                cand = conf.get("batch_input_shape") or conf.get("batch_shape")
+                H, W = _shape_to_hw(cand)
+                if H and W:
+                    return H, W
+    except Exception:
+        pass
+    logging.warning(f"Could not resolve model input shape; using default {default_hw}.")
+    return default_hw
 # ---------- Initialize CPU models ----------
 def load_yolo_model():
     YOLO = _import_ultralytics()
+    with _no_cuda_env():
+        model = YOLO(YOLO_MODEL_PATH)
+    return model
+def load_segmentation_model(path: Optional[str] = None):
+    """
+    Robust loader for legacy .h5 models across TF/Keras versions.
+    Uses global SEG_MODEL_PATH by default.
+    """
+    import ast
+    import tensorflow as tf
+    tf.config.set_visible_devices([], "GPU")
+    model_path = path or SEG_MODEL_PATH
+    # Attempt 1: tf.keras with safe_mode=False
+    try:
+        m = tf.keras.models.load_model(model_path, compile=False, safe_mode=False)
+        logging.info("✅ Segmentation model loaded (tf.keras, safe_mode=False).")
+        return m
+    except Exception as e1:
+        logging.warning(f"tf.keras load (safe_mode=False) failed: {e1}")
+    # Attempt 2: patched InputLayer (drop legacy args; coerce string shapes)
+    try:
+        from tensorflow.keras.layers import InputLayer as _KInputLayer
+        def _InputLayerPatched(*args, **kwargs):
+            kwargs.pop("batch_shape", None)
+            kwargs.pop("batch_input_shape", None)
+            if "shape" in kwargs and isinstance(kwargs["shape"], str):
+                try:
+                    kwargs["shape"] = tuple(ast.literal_eval(kwargs["shape"]))
+                except Exception:
+                    kwargs.pop("shape", None)
+            return _KInputLayer(**kwargs)
+        m = tf.keras.models.load_model(
+            model_path,
+            compile=False,
+            custom_objects={"InputLayer": _InputLayerPatched},
+            safe_mode=False,
+        )
+        logging.info("✅ Segmentation model loaded (patched InputLayer).")
+        return m
+    except Exception as e2:
+        logging.warning(f"Patched InputLayer load failed: {e2}")
+    # Attempt 3: keras 2 shim (tf_keras) if present
+    try:
+        import tf_keras
+        m = tf_keras.models.load_model(model_path, compile=False)
+        logging.info("✅ Segmentation model loaded (tf_keras compat).")
+        return m
+    except Exception as e3:
+        logging.warning(f"tf_keras load failed or not installed: {e3}")
+    raise RuntimeError("Segmentation model could not be loaded; please convert/resave the model.")
 def load_classification_pipeline():
     pipe = _import_hf_cls()
     if "det" not in models_cache:
         try:
             models_cache["det"] = load_yolo_model()
+            logging.info("✅ YOLO loaded (CPU; CUDA masked in main)")
         except Exception as e:
             logging.error(f"YOLO load failed: {e}")
     if "seg" not in models_cache:
         try:
             if os.path.exists(SEG_MODEL_PATH):
+                m = load_segmentation_model()  # uses global path by default
+                models_cache["seg"] = m
+                th, tw = _get_model_input_hw(m, default_hw=(224, 224))
                 oshape = getattr(m, "output_shape", None)
+                logging.info(f"✅ Segmentation model loaded (CPU) | input_hw=({th},{tw}) output_shape={oshape}")
             else:
                 models_cache["seg"] = None
                 logging.warning("Segmentation model file missing; skipping.")
     seed_dil = cv2.dilate(seed01, k, iterations=1)
     gc[seed01.astype(bool)] = cv2.GC_PR_FGD
     gc[seed_dil.astype(bool)] = cv2.GC_FGD
+    gc[0, :], gc[-1, :], gc[:, 0], gc[:, 1] = cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD, cv2.GC_BGD
     bgdModel = np.zeros((1, 65), np.float64)
     fgdModel = np.zeros((1, 65), np.float64)
     cv2.grabCut(bgr, gc, None, bgdModel, fgdModel, iters, cv2.GC_INIT_WITH_MASK)
     # --- Model path ---
     if seg_model is not None:
         try:
+            th, tw = _get_model_input_hw(seg_model, default_hw=(224, 224))
             x = _preprocess_for_seg(image_bgr, (th, tw))
             roi_seen_path = None
             if SMARTHEAL_DEBUG:
             det_model = self.models_cache.get("det")
             if det_model is None:
                 raise RuntimeError("YOLO model not loaded")
+            # Force CPU inference and avoid CUDA touch
             results = det_model.predict(image_cv, verbose=False, device="cpu")
             if (not results) or (not getattr(results[0], "boxes", None)) or (len(results[0].boxes) == 0):
                 try:
             vs = self.knowledge_base_cache.get("vector_store")
             if not vs:
                 return "Knowledge base is not available."
+            retriever = vs.as_retriever(search_kwargs={"k": 5})
+            docs = retriever.invoke(query)
             lines: List[str] = []
             for d in docs:
                 src = (d.metadata or {}).get("source", "N/A")
             )
             if report and report.strip() and not report.startswith(("⚠️", "❌")):
                 return report
+            logging.warning("VLM unavailable/invalid; using fallback.")
             return self._generate_fallback_report(patient_info, visual_results, guideline_context)
         except Exception as e:
             logging.error(f"Report generation failed: {e}")