Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 27

Commit

04848bf

1 Parent(s): a647170

Update utils/cv_processing.py

Browse files

Files changed (1) hide show

utils/cv_processing.py +76 -16

utils/cv_processing.py CHANGED Viewed

@@ -8,6 +8,7 @@
   - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
   - replace_background_hq(frame, mask, background, fallback_enabled=True)
   - create_professional_background(key_or_cfg, width, height)
   - validate_video_file(video_path) -> (bool, reason)
 Design:
@@ -59,7 +60,7 @@ def _to_mask01(m: np.ndarray) -> np.ndarray:
         return None
     if m.ndim == 3:
         m = m[..., 0]
-    m = m.astype(np.float32)
     if m.max() > 1.0:
         m = m / 255.0
     return np.clip(m, 0.0, 1.0)
@@ -82,6 +83,13 @@ def _vertical_gradient(top: Tuple[int,int,int], bottom: Tuple[int,int,int], widt
         bg[y, :] = (r, g, b)
     return bg
 def _looks_like_mask(x: Any) -> bool:
     return (
         isinstance(x, np.ndarray)
@@ -116,6 +124,36 @@ def create_professional_background(key_or_cfg: Any, width: int, height: int) ->
     dark = (int(color[0]*0.7), int(color[1]*0.7), int(color[2]*0.7))
     return _vertical_gradient(dark, color, width, height)
 # ----------------------------------------------------------------------------
 # Segmentation
 # ----------------------------------------------------------------------------
@@ -162,12 +200,19 @@ def segment_person_hq(
             h, w = rgb.shape[:2]
             center = np.array([[w // 2, h // 2]])
             labels = np.array([1])
-            masks, scores, _ = predictor.predict(
                 point_coords=center,
                 point_labels=labels,
                 multimask_output=True
             )
             m = np.array(masks)
             if m.ndim == 3:                 # (N,H,W)
                 idx = int(np.argmax(scores)) if scores is not None else 0
@@ -225,11 +270,16 @@ def refine_mask_hq(
     Backward-compat:
       - accepts use_matanyone (False → skip model)
       - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
     """
     # tolerate legacy order: refine_mask_hq(mask, frame, ...)
     if _looks_like_mask(frame) and isinstance(mask, np.ndarray) and mask.ndim == 3 and mask.shape[2] == 3:
         frame, mask = mask, frame
     mask01 = _to_mask01(mask)
     try:
@@ -246,23 +296,32 @@ def refine_mask_hq(
             img_t  = img_t.to(device)
             mask_t = mask_t.to(device)
             if hasattr(matanyone, "step"):
-                with torch.inference_mode():
-                    out = matanyone.step(
-                        image_tensor=img_t,
-                        mask_tensor=mask_t,
-                        objects=None,
-                        first_frame_pred=True
-                    )
-                if hasattr(matanyone, "output_prob_to_mask"):
-                    out = matanyone.output_prob_to_mask(out)
-                return _tensor_to_mask01(out)
             if hasattr(matanyone, "process"):
-                refined = matanyone.process(frame, mask01)
-                return _to_mask01(np.asarray(refined))
-            logger.warning("MatAnyOne provided but neither 'step' nor 'process' found.")
     except Exception as e:
         logger.warning("MatAnyOne refinement failed: %s", e)
@@ -358,6 +417,7 @@ def validate_video_file(video_path: str) -> Tuple[bool, str]:
     "refine_mask_hq",
     "replace_background_hq",
     "create_professional_background",
     "validate_video_file",
     "PROFESSIONAL_BACKGROUNDS",
 ]

   - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
   - replace_background_hq(frame, mask, background, fallback_enabled=True)
   - create_professional_background(key_or_cfg, width, height)
+  - create_gradient_background(spec, width, height)
   - validate_video_file(video_path) -> (bool, reason)
 Design:
         return None
     if m.ndim == 3:
         m = m[..., 0]
+    m = m.astype(np.float32, copy=False)
     if m.max() > 1.0:
         m = m / 255.0
     return np.clip(m, 0.0, 1.0)
         bg[y, :] = (r, g, b)
     return bg
+def _rotate_image(img: np.ndarray, angle_deg: float) -> np.ndarray:
+    if float(angle_deg) % 360 == 0:
+        return img
+    h, w = img.shape[:2]
+    M = cv2.getRotationMatrix2D((w/2, h/2), float(angle_deg), 1.0)
+    return cv2.warpAffine(img, M, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
 def _looks_like_mask(x: Any) -> bool:
     return (
         isinstance(x, np.ndarray)
     dark = (int(color[0]*0.7), int(color[1]*0.7), int(color[2]*0.7))
     return _vertical_gradient(dark, color, width, height)
+def create_gradient_background(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
+    """
+    spec: {
+      "type": "linear" | "radial",
+      "start": (r,g,b),
+      "end":   (r,g,b),
+      "angle_deg": float   # for linear only
+    }
+    Returns RGB uint8 (H,W,3).
+    """
+    gtype = str(spec.get("type", "linear")).lower()
+    start = tuple(int(c) for c in spec.get("start", (34,34,34)))
+    end   = tuple(int(c) for c in spec.get("end",   (200,200,200)))
+    if gtype == "radial":
+        yy, xx = np.mgrid[0:height, 0:width]
+        cx, cy = width / 2.0, height / 2.0
+        dist = np.sqrt((xx - cx) ** 2 + (yy - cy) ** 2)
+        dist = dist / (dist.max() + 1e-6)
+        dist = np.clip(dist, 0.0, 1.0).astype(np.float32)
+        bg = np.zeros((height, width, 3), dtype=np.uint8)
+        for i, (s, e) in enumerate(zip(start, end)):
+            channel = (s * (1.0 - dist) + e * dist).astype(np.float32)
+            bg[..., i] = np.clip(channel, 0, 255).astype(np.uint8)
+        return bg
+    else:
+        # linear: vertical interpolate then rotate to angle
+        angle = float(spec.get("angle_deg", 0.0))
+        bg = _vertical_gradient(start, end, width, height)
+        return _rotate_image(bg, angle)
 # ----------------------------------------------------------------------------
 # Segmentation
 # ----------------------------------------------------------------------------
             h, w = rgb.shape[:2]
             center = np.array([[w // 2, h // 2]])
             labels = np.array([1])
+            res = predictor.predict(
                 point_coords=center,
                 point_labels=labels,
                 multimask_output=True
             )
+            # SAM2 predictors often return (masks, scores, logits)
+            if isinstance(res, tuple) and len(res) >= 1:
+                masks, scores = res[0], (res[1] if len(res) > 1 else None)
+            else:
+                masks, scores = res, None
             m = np.array(masks)
             if m.ndim == 3:                 # (N,H,W)
                 idx = int(np.argmax(scores)) if scores is not None else 0
     Backward-compat:
       - accepts use_matanyone (False → skip model)
       - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
+      - accepts mat_core=<processor> in kwargs
     """
     # tolerate legacy order: refine_mask_hq(mask, frame, ...)
     if _looks_like_mask(frame) and isinstance(mask, np.ndarray) and mask.ndim == 3 and mask.shape[2] == 3:
         frame, mask = mask, frame
+    # prefer explicitly passed matanyone, else legacy kw
+    if matanyone is None and "mat_core" in _compat_kwargs:
+        matanyone = _compat_kwargs.get("mat_core")
     mask01 = _to_mask01(mask)
     try:
             img_t  = img_t.to(device)
             mask_t = mask_t.to(device)
+            # Preferred path
             if hasattr(matanyone, "step"):
+                try:
+                    with torch.inference_mode():
+                        out = matanyone.step(
+                            image_tensor=img_t,
+                            mask_tensor=mask_t,
+                            objects=None,
+                            first_frame_pred=True
+                        )
+                    if hasattr(matanyone, "output_prob_to_mask"):
+                        out = matanyone.output_prob_to_mask(out)
+                    return _tensor_to_mask01(out)
+                except Exception as e:
+                    logger.warning("MatAnyOne .step path failed: %s ; trying .process fallback if available", e)
+            # Generic fallback
             if hasattr(matanyone, "process"):
+                try:
+                    refined = matanyone.process(frame, mask01)  # accepts numpy/PIL in many builds
+                    refined = np.asarray(refined).astype(np.float32)
+                    return _to_mask01(refined)
+                except Exception as e:
+                    logger.warning("MatAnyOne .process path also failed: %s", e)
+            logger.warning("MatAnyOne provided but neither 'step' nor 'process' usable.")
     except Exception as e:
         logger.warning("MatAnyOne refinement failed: %s", e)
     "refine_mask_hq",
     "replace_background_hq",
     "create_professional_background",
+    "create_gradient_background",
     "validate_video_file",
     "PROFESSIONAL_BACKGROUNDS",
 ]