Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 27

Commit

04ca462

1 Parent(s): ee9591a

Update utils/cv_processing.py

Browse files

Files changed (1) hide show

utils/cv_processing.py +117 -147

utils/cv_processing.py CHANGED Viewed

@@ -8,14 +8,7 @@
   - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
   - replace_background_hq(frame, mask, background, fallback_enabled=True)
   - create_professional_background(key_or_cfg, width, height)
-  - create_gradient_background(spec, width, height)
   - validate_video_file(video_path) -> (bool, reason)
-Design:
-  * NO imports from other utils.* modules → avoids circular imports.
-  * Torch is imported lazily inside functions.
-  * All masks are single-channel float32 in [0..1] at stage boundaries.
-  * MatAnyOne gets (N,C,H,W) — no 5D tensors.
 """
 from __future__ import annotations
@@ -40,33 +33,30 @@
     "white":    {"color": (255, 255, 255), "gradient": False},
     "black":    {"color": (0, 0, 0),       "gradient": False},
 }
-# Optional alias if callers import by this name
-PROFESSIONAL_BACKGROUNDS = PROFESSIONAL_BACKGROUNDS_LOCAL
 # ----------------------------------------------------------------------------
 # Helpers
 # ----------------------------------------------------------------------------
 def _ensure_rgb(img: np.ndarray) -> np.ndarray:
-    """Convert BGR→RGB if it looks like BGR (OpenCV convention)."""
     if img is None:
         return img
     if img.ndim == 3 and img.shape[2] == 3:
         return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     return img
 def _to_mask01(m: np.ndarray) -> np.ndarray:
-    """Return single-channel float32 in [0..1]."""
     if m is None:
         return None
-    if m.ndim == 3:
         m = m[..., 0]
-    m = m.astype(np.float32, copy=False)
     if m.max() > 1.0:
         m = m / 255.0
     return np.clip(m, 0.0, 1.0)
 def _feather(mask01: np.ndarray, k: int = 2) -> np.ndarray:
-    """Tiny Gaussian feather for smoother edges."""
     if mask01.ndim == 3:
         mask01 = mask01[..., 0]
     k = max(1, int(k) * 2 + 1)
@@ -83,13 +73,6 @@ def _vertical_gradient(top: Tuple[int,int,int], bottom: Tuple[int,int,int], widt
         bg[y, :] = (r, g, b)
     return bg
-def _rotate_image(img: np.ndarray, angle_deg: float) -> np.ndarray:
-    if float(angle_deg) % 360 == 0:
-        return img
-    h, w = img.shape[:2]
-    M = cv2.getRotationMatrix2D((w/2, h/2), float(angle_deg), 1.0)
-    return cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
 def _looks_like_mask(x: Any) -> bool:
     return (
         isinstance(x, np.ndarray)
@@ -102,12 +85,6 @@ def _looks_like_mask(x: Any) -> bool:
 # Background creation (RGB)
 # ----------------------------------------------------------------------------
 def create_professional_background(key_or_cfg: Any, width: int, height: int) -> np.ndarray:
-    """
-    Accepts:
-      - key: str in preset dict
-      - cfg: {"color": (r,g,b), "gradient": bool}
-    Returns RGB uint8 image (H,W,3).
-    """
     if isinstance(key_or_cfg, str):
         cfg = PROFESSIONAL_BACKGROUNDS_LOCAL.get(key_or_cfg, PROFESSIONAL_BACKGROUNDS_LOCAL["office"])
     elif isinstance(key_or_cfg, dict):
@@ -124,41 +101,10 @@ def create_professional_background(key_or_cfg: Any, width: int, height: int) ->
     dark = (int(color[0]*0.7), int(color[1]*0.7), int(color[2]*0.7))
     return _vertical_gradient(dark, color, width, height)
-def create_gradient_background(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
-    """
-    spec: {
-      "type": "linear" | "radial",
-      "start": (r,g,b),
-      "end":   (r,g,b),
-      "angle_deg": float   # for linear only
-    }
-    Returns RGB uint8 (H,W,3).
-    """
-    gtype = str(spec.get("type", "linear")).lower()
-    start = tuple(int(c) for c in spec.get("start", (34,34,34)))
-    end   = tuple(int(c) for c in spec.get("end",   (200,200,200)))
-    if gtype == "radial":
-        yy, xx = np.mgrid[0:height, 0:width]
-        cx, cy = width / 2.0, height / 2.0
-        dist = np.sqrt((xx - cx) ** 2 + (yy - cy) ** 2)
-        dist = dist / (dist.max() + 1e-6)
-        dist = np.clip(dist, 0.0, 1.0).astype(np.float32)
-        bg = np.zeros((height, width, 3), dtype=np.uint8)
-        for i, (s, e) in enumerate(zip(start, end)):
-            channel = (s * (1.0 - dist) + e * dist).astype(np.float32)
-            bg[..., i] = np.clip(channel, 0, 255).astype(np.uint8)
-        return bg
-    else:
-        # linear: vertical interpolate then rotate to angle
-        angle = float(spec.get("angle_deg", 0.0))
-        bg = _vertical_gradient(start, end, width, height)
-        return _rotate_image(bg, angle)
 # ----------------------------------------------------------------------------
 # Segmentation
 # ----------------------------------------------------------------------------
 def _simple_person_segmentation(frame_bgr: np.ndarray) -> np.ndarray:
-    """Very simple fallback segmentation by suppressing green/white backgrounds."""
     hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
     lower_green = np.array([40, 40, 40], dtype=np.uint8)
@@ -186,10 +132,6 @@ def segment_person_hq(
     use_sam2: Optional[bool] = None,
     **_compat_kwargs,
 ) -> np.ndarray:
-    """
-    Try SAM2 predictor if available; return single-channel float32 mask in [0..1].
-    Backward-compat: accepts use_sam2 (if False → force fallback).
-    """
     try:
         if use_sam2 is False:
             return _simple_person_segmentation(frame)
@@ -200,26 +142,17 @@ def segment_person_hq(
             h, w = rgb.shape[:2]
             center = np.array([[w // 2, h // 2]])
             labels = np.array([1])
-            res = predictor.predict(
                 point_coords=center,
                 point_labels=labels,
                 multimask_output=True
             )
-            # SAM2 predictors often return (masks, scores, logits)
-            if isinstance(res, tuple) and len(res) >= 1:
-                masks, scores = res[0], (res[1] if len(res) > 1 else None)
-            else:
-                masks, scores = res, None
             m = np.array(masks)
-            if m.ndim == 3:                 # (N,H,W)
                 idx = int(np.argmax(scores)) if scores is not None else 0
                 m = m[idx]
-            elif m.ndim != 2:               # not (H,W)
                 raise RuntimeError(f"Unexpected SAM2 mask shape: {m.shape}")
             return _to_mask01(m)
     except Exception as e:
@@ -227,11 +160,10 @@ def segment_person_hq(
     return _simple_person_segmentation(frame) if fallback_enabled else np.ones(frame.shape[:2], dtype=np.float32)
-# Back-compat alias
-segment_person_hq_original = segment_person_hq
 # ----------------------------------------------------------------------------
-# Refinement (MatAnyOne)
 # ----------------------------------------------------------------------------
 def _to_tensor_chw(img_uint8_bgr: np.ndarray) -> "torch.Tensor":
     import torch
@@ -250,12 +182,93 @@ def _tensor_to_mask01(t: "torch.Tensor") -> np.ndarray:
         t = t[0]
     return np.clip(t.detach().float().cpu().numpy(), 0.0, 1.0)
-def _simple_mask_refinement(mask01: np.ndarray) -> np.ndarray:
     m = (mask01 * 255.0).astype(np.uint8)
-    m = cv2.GaussianBlur(m, (5, 5), 0)
-    m = cv2.bilateralFilter(m, 9, 75, 75)
     return (m.astype(np.float32) / 255.0)
 def refine_mask_hq(
     frame: np.ndarray,
     mask: np.ndarray,
@@ -270,63 +283,32 @@ def refine_mask_hq(
     Backward-compat:
       - accepts use_matanyone (False → skip model)
       - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
-      - accepts mat_core=<processor> in kwargs
     """
     # tolerate legacy order: refine_mask_hq(mask, frame, ...)
-    if _looks_like_mask(frame) and isinstance(mask, np.ndarray) and mask.ndim == 3 and mask.shape[2] == 3:
-        frame, mask = mask, frame
-    # prefer explicitly passed matanyone, else legacy kw
-    if matanyone is None and "mat_core" in _compat_kwargs:
-        matanyone = _compat_kwargs.get("mat_core")
     mask01 = _to_mask01(mask)
-    try:
-        if use_matanyone is False:
-            return _simple_mask_refinement(mask01)
-        if matanyone is not None:
-            import torch
-            img_t  = _to_tensor_chw(frame).unsqueeze(0)  # (1,3,H,W)
-            mask_t = _mask_to_tensor01(mask01)           # (1,1,H,W)
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            img_t  = img_t.to(device)
-            mask_t = mask_t.to(device)
-            # Preferred path
-            if hasattr(matanyone, "step"):
-                try:
-                    with torch.inference_mode():
-                        out = matanyone.step(
-                            image_tensor=img_t,
-                            mask_tensor=mask_t,
-                            objects=None,
-                            first_frame_pred=True
-                        )
-                    if hasattr(matanyone, "output_prob_to_mask"):
-                        out = matanyone.output_prob_to_mask(out)
-                    return _tensor_to_mask01(out)
-                except Exception as e:
-                    logger.warning("MatAnyOne .step path failed: %s ; trying .process fallback if available", e)
-            # Generic fallback
-            if hasattr(matanyone, "process"):
-                try:
-                    refined = matanyone.process(frame, mask01)  # accepts numpy/PIL in many builds
-                    refined = np.asarray(refined).astype(np.float32)
-                    return _to_mask01(refined)
-                except Exception as e:
-                    logger.warning("MatAnyOne .process path also failed: %s", e)
-            logger.warning("MatAnyOne provided but neither 'step' nor 'process' usable.")
-    except Exception as e:
-        logger.warning("MatAnyOne refinement failed: %s", e)
-    return _simple_mask_refinement(mask01) if fallback_enabled else mask01
 # ----------------------------------------------------------------------------
 # Compositing
@@ -338,21 +320,14 @@ def replace_background_hq(
     fallback_enabled: bool = True,
     **_compat,
 ) -> np.ndarray:
-    """
-    Composite frame over background using feathered mask.
-    Inputs:
-      - frame:      (H,W,3) uint8 (BGR or RGB, linear blend anyway)
-      - mask01:     (H,W) or (H,W,1) float32 in [0..1]
-      - background: (H,W,3) uint8
-    Returns:
-      - composited frame (H,W,3) uint8
-    """
     try:
         H, W = frame.shape[:2]
         if background.shape[:2] != (H, W):
             background = cv2.resize(background, (W, H), interpolation=cv2.INTER_LANCZOS4)
-        m = _feather(_to_mask01(mask01), k=2)
         m3 = np.repeat(m[:, :, None], 3, axis=2)
         comp = frame.astype(np.float32) * m3 + background.astype(np.float32) * (1.0 - m3)
@@ -367,10 +342,6 @@ def replace_background_hq(
 # Video validation
 # ----------------------------------------------------------------------------
 def validate_video_file(video_path: str) -> Tuple[bool, str]:
-    """
-    Quick sanity-check before passing a file to OpenCV / FFmpeg.
-    Returns (ok, human_readable_reason)
-    """
     if not video_path or not Path(video_path).exists():
         return False, "Video file not found"
@@ -417,7 +388,6 @@ def validate_video_file(video_path: str) -> Tuple[bool, str]:
     "refine_mask_hq",
     "replace_background_hq",
     "create_professional_background",
-    "create_gradient_background",
     "validate_video_file",
     "PROFESSIONAL_BACKGROUNDS",
 ]

   - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
   - replace_background_hq(frame, mask, background, fallback_enabled=True)
   - create_professional_background(key_or_cfg, width, height)
   - validate_video_file(video_path) -> (bool, reason)
 """
 from __future__ import annotations
     "white":    {"color": (255, 255, 255), "gradient": False},
     "black":    {"color": (0, 0, 0),       "gradient": False},
 }
+PROFESSIONAL_BACKGROUNDS = PROFESSIONAL_BACKGROUNDS_LOCAL  # alias for callers
 # ----------------------------------------------------------------------------
 # Helpers
 # ----------------------------------------------------------------------------
 def _ensure_rgb(img: np.ndarray) -> np.ndarray:
     if img is None:
         return img
     if img.ndim == 3 and img.shape[2] == 3:
+        # Assume OpenCV BGR → convert to RGB
         return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
     return img
 def _to_mask01(m: np.ndarray) -> np.ndarray:
     if m is None:
         return None
+    if m.ndim == 3 and m.shape[2] in (1, 3):
         m = m[..., 0]
+    m = m.astype(np.float32)
     if m.max() > 1.0:
         m = m / 255.0
     return np.clip(m, 0.0, 1.0)
 def _feather(mask01: np.ndarray, k: int = 2) -> np.ndarray:
     if mask01.ndim == 3:
         mask01 = mask01[..., 0]
     k = max(1, int(k) * 2 + 1)
         bg[y, :] = (r, g, b)
     return bg
 def _looks_like_mask(x: Any) -> bool:
     return (
         isinstance(x, np.ndarray)
 # Background creation (RGB)
 # ----------------------------------------------------------------------------
 def create_professional_background(key_or_cfg: Any, width: int, height: int) -> np.ndarray:
     if isinstance(key_or_cfg, str):
         cfg = PROFESSIONAL_BACKGROUNDS_LOCAL.get(key_or_cfg, PROFESSIONAL_BACKGROUNDS_LOCAL["office"])
     elif isinstance(key_or_cfg, dict):
     dark = (int(color[0]*0.7), int(color[1]*0.7), int(color[2]*0.7))
     return _vertical_gradient(dark, color, width, height)
 # ----------------------------------------------------------------------------
 # Segmentation
 # ----------------------------------------------------------------------------
 def _simple_person_segmentation(frame_bgr: np.ndarray) -> np.ndarray:
     hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
     lower_green = np.array([40, 40, 40], dtype=np.uint8)
     use_sam2: Optional[bool] = None,
     **_compat_kwargs,
 ) -> np.ndarray:
     try:
         if use_sam2 is False:
             return _simple_person_segmentation(frame)
             h, w = rgb.shape[:2]
             center = np.array([[w // 2, h // 2]])
             labels = np.array([1])
+            masks, scores, _ = predictor.predict(
                 point_coords=center,
                 point_labels=labels,
                 multimask_output=True
             )
             m = np.array(masks)
+            if m.ndim == 3:
                 idx = int(np.argmax(scores)) if scores is not None else 0
                 m = m[idx]
+            elif m.ndim != 2:
                 raise RuntimeError(f"Unexpected SAM2 mask shape: {m.shape}")
             return _to_mask01(m)
     except Exception as e:
     return _simple_person_segmentation(frame) if fallback_enabled else np.ones(frame.shape[:2], dtype=np.float32)
+segment_person_hq_original = segment_person_hq  # back-compat alias
 # ----------------------------------------------------------------------------
+# MatAnyOne helpers
 # ----------------------------------------------------------------------------
 def _to_tensor_chw(img_uint8_bgr: np.ndarray) -> "torch.Tensor":
     import torch
         t = t[0]
     return np.clip(t.detach().float().cpu().numpy(), 0.0, 1.0)
+def _remap_harden(mask01: np.ndarray, inside: float = 0.70, outside: float = 0.35) -> np.ndarray:
+    """
+    Pull the mask toward {0,1} to avoid 'ghost' translucency.
+    Values <= outside -> 0; >= inside -> 1; linear in between.
+    """
+    m = mask01.astype(np.float32)
+    if inside <= outside:
+        return m
+    m = (m - outside) / max(1e-6, (inside - outside))
+    return np.clip(m, 0.0, 1.0)
+def _pad_and_smooth_edges(mask01: np.ndarray, dilate_px: int = 6, edge_blur_px: int = 2) -> np.ndarray:
     m = (mask01 * 255.0).astype(np.uint8)
+    if dilate_px > 0:
+        k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_px, dilate_px))
+        m = cv2.dilate(m, k, iterations=1)
+    if edge_blur_px > 0:
+        ksize = edge_blur_px * 2 + 1
+        m = cv2.GaussianBlur(m, (ksize, ksize), 0)
     return (m.astype(np.float32) / 255.0)
+def _try_matanyone_refine(
+    matanyone: Any,
+    frame_bgr: np.ndarray,
+    mask01: np.ndarray
+) -> Optional[np.ndarray]:
+    """
+    Try several MatAnyOne interfaces:
+      1) InferenceCore.infer(PIL_image, PIL_mask)
+      2) .step(image_tensor=NCHW, mask_tensor=NCHW)
+      3) .process(image_np, mask_np)
+      4) callable(image_tensor, mask_tensor) → tensor
+    Returns refined mask01 (np.ndarray) or None if not usable.
+    """
+    try:
+        # --- (1) PIL infer path ------------------------------------------------
+        if hasattr(matanyone, "infer"):
+            try:
+                from PIL import Image
+                img_pil  = Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB))
+                m_pil    = Image.fromarray((mask01 * 255.0).astype(np.uint8))
+                out_pil  = matanyone.infer(img_pil, m_pil)
+                out_np   = np.asarray(out_pil).astype(np.float32)
+                return _to_mask01(out_np)
+            except Exception as e:
+                logger.debug("MatAnyOne.infer path failed: %s", e)
+        # --- (2) tensor .step path --------------------------------------------
+        if hasattr(matanyone, "step"):
+            import torch
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            img_t  = _to_tensor_chw(frame_bgr).unsqueeze(0).to(device)   # (1,3,H,W)
+            mask_t = _mask_to_tensor01(mask01).to(device)                # (1,1,H,W)
+            with torch.inference_mode():
+                out = matanyone.step(
+                    image_tensor=img_t,
+                    mask_tensor=mask_t,
+                    objects=None,
+                    first_frame_pred=True
+                )
+            if hasattr(matanyone, "output_prob_to_mask"):
+                out = matanyone.output_prob_to_mask(out)
+            return _tensor_to_mask01(out)
+        # --- (3) numpy .process path ------------------------------------------
+        if hasattr(matanyone, "process"):
+            out = matanyone.process(frame_bgr, mask01)
+            return _to_mask01(np.asarray(out))
+        # --- (4) callable / nn.Module path ------------------------------------
+        if callable(matanyone):
+            import torch
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            img_t  = _to_tensor_chw(frame_bgr).unsqueeze(0).to(device)
+            mask_t = _mask_to_tensor01(mask01).to(device)
+            with torch.inference_mode():
+                out = matanyone(img_t, mask_t)
+            return _tensor_to_mask01(out)
+    except Exception as e:
+        logger.warning("MatAnyOne refine error: %s", e)
+    return None
+# ----------------------------------------------------------------------------
+# Refinement (MatAnyOne)
+# ----------------------------------------------------------------------------
 def refine_mask_hq(
     frame: np.ndarray,
     mask: np.ndarray,
     Backward-compat:
       - accepts use_matanyone (False → skip model)
       - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
     """
     # tolerate legacy order: refine_mask_hq(mask, frame, ...)
+    if _looks_like_mask(frame) and _looks_like_mask(mask) and mask.ndim == 3 and mask.shape[2] == 3:
+        frame, mask = mask, frame  # swap
     mask01 = _to_mask01(mask)
+    # Use MatAnyOne when possible
+    if use_matanyone is not False and matanyone is not None:
+        refined = _try_matanyone_refine(matanyone, frame, mask01)
+        if refined is not None:
+            # Hardening + edge handling to avoid translucent body/halo
+            refined = _remap_harden(refined, inside=0.70, outside=0.35)
+            refined = _pad_and_smooth_edges(refined, dilate_px=4, edge_blur_px=1)
+            return refined
+        else:
+            logger.warning("MatAnyOne provided but no usable interface found; falling back.")
+    # Simple refinement fallback
+    m = (mask01 * 255.0).astype(np.uint8)
+    m = cv2.GaussianBlur(m, (5, 5), 0)
+    m = cv2.bilateralFilter(m, 9, 75, 75)
+    m = (m.astype(np.float32) / 255.0)
+    m = _remap_harden(m, inside=0.68, outside=0.40)
+    m = _pad_and_smooth_edges(m, dilate_px=3, edge_blur_px=1)
+    return m if fallback_enabled else mask01
 # ----------------------------------------------------------------------------
 # Compositing
     fallback_enabled: bool = True,
     **_compat,
 ) -> np.ndarray:
     try:
         H, W = frame.shape[:2]
         if background.shape[:2] != (H, W):
             background = cv2.resize(background, (W, H), interpolation=cv2.INTER_LANCZOS4)
+        m = _to_mask01(mask01)
+        # Very light feather to hide stair-steps; most shaping already done
+        m = _feather(m, k=1)
         m3 = np.repeat(m[:, :, None], 3, axis=2)
         comp = frame.astype(np.float32) * m3 + background.astype(np.float32) * (1.0 - m3)
 # Video validation
 # ----------------------------------------------------------------------------
 def validate_video_file(video_path: str) -> Tuple[bool, str]:
     if not video_path or not Path(video_path).exists():
         return False, "Video file not found"
     "refine_mask_hq",
     "replace_background_hq",
     "create_professional_background",
     "validate_video_file",
     "PROFESSIONAL_BACKGROUNDS",
 ]