Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 26

Commit

69083e6

1 Parent(s): 1109131

Update utils/refinement.py

Browse files

Files changed (1) hide show

utils/refinement.py +97 -15

utils/refinement.py CHANGED Viewed

@@ -10,6 +10,7 @@
 import cv2
 import numpy as np
 log = logging.getLogger(__name__)
@@ -84,21 +85,102 @@ def _refine_with_matanyone(
     model: Any
 ) -> np.ndarray:
     """Use MatAnyone model for mask refinement."""
-    # Check if model has expected interface
-    if hasattr(model, 'process'):
-        result = model.process(image, mask)
-    elif hasattr(model, 'refine'):
-        result = model.refine(image, mask)
-    elif callable(model):
-        result = model(image, mask)
-    else:
-        raise MaskRefinementError("MatAnyone model doesn't have expected interface")
-    # Convert result to binary mask
-    if result is None:
-        raise MaskRefinementError("MatAnyone returned None")
-    return _process_mask(result)
 # ============================================================================
 # CLASSICAL REFINEMENT

 import cv2
 import numpy as np
+import torch
 log = logging.getLogger(__name__)
     model: Any
 ) -> np.ndarray:
     """Use MatAnyone model for mask refinement."""
+    try:
+        # MatAnyone's InferenceCore expects torch tensors
+        # Convert BGR to RGB and normalize
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        h, w = image_rgb.shape[:2]
+        # Convert to torch tensor format (C, H, W) and normalize to [0, 1]
+        image_tensor = torch.from_numpy(image_rgb).permute(2, 0, 1).float() / 255.0
+        image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension (1, C, H, W)
+        # Ensure mask is binary uint8
+        if mask.dtype != np.uint8:
+            mask = (mask * 255).astype(np.uint8) if mask.max() <= 1 else mask.astype(np.uint8)
+        if mask.ndim == 3:
+            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+        # Convert mask to tensor
+        mask_tensor = torch.from_numpy(mask).float() / 255.0
+        mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0)  # (1, 1, H, W)
+        # MatAnyone InferenceCore workflow for single frame
+        # The model should have been initialized as InferenceCore(matanyone_model)
+        result = None
+        if hasattr(model, 'process_frame'):
+            # Single frame processing method
+            with torch.no_grad():
+                result = model.process_frame(image_tensor, mask_tensor)
+        elif hasattr(model, 'step'):
+            # Step method for iterative processing
+            with torch.no_grad():
+                # Initialize memory with first frame
+                model.reset()
+                # Process frame with mask
+                result = model.step(image_tensor, mask_tensor)
+        elif hasattr(model, 'forward'):
+            # Direct forward pass
+            with torch.no_grad():
+                result = model.forward(image_tensor, mask_tensor)
+        elif hasattr(model, 'predict'):
+            # Predict method
+            with torch.no_grad():
+                result = model.predict(image_tensor, mask_tensor)
+        elif hasattr(model, '__call__'):
+            # Callable model
+            with torch.no_grad():
+                result = model(image_tensor, mask_tensor)
+        else:
+            # Try to find any method that might work
+            methods = [m for m in dir(model) if not m.startswith('_')]
+            processing_methods = [m for m in methods if any(keyword in m.lower()
+                                 for keyword in ['process', 'refine', 'matte', 'alpha', 'predict'])]
+            if processing_methods:
+                method = getattr(model, processing_methods[0])
+                with torch.no_grad():
+                    result = method(image_tensor, mask_tensor)
+            else:
+                raise MaskRefinementError(f"MatAnyone model has no recognized processing method. Available methods: {methods}")
+        if result is None:
+            raise MaskRefinementError("MatAnyone returned None")
+        # Handle different return types
+        if isinstance(result, tuple) or isinstance(result, list):
+            # Extract alpha matte from tuple/list result
+            alpha = result[0] if len(result) > 0 else None
+        elif isinstance(result, dict):
+            # Extract from dictionary result
+            alpha = result.get('alpha', result.get('matte', result.get('mask', None)))
+        else:
+            alpha = result
+        if alpha is None:
+            raise MaskRefinementError("Could not extract alpha matte from MatAnyone result")
+        # Convert back to numpy
+        if isinstance(alpha, torch.Tensor):
+            alpha = alpha.squeeze().cpu().numpy()  # Remove batch dimensions
+        # Ensure proper shape
+        if alpha.ndim == 3:
+            alpha = alpha[0] if alpha.shape[0] == 1 else alpha.mean(axis=0)
+        # Convert to uint8
+        if alpha.dtype != np.uint8:
+            alpha = (alpha * 255).clip(0, 255).astype(np.uint8)
+        # Resize if needed
+        if alpha.shape != (h, w):
+            alpha = cv2.resize(alpha, (w, h), interpolation=cv2.INTER_LINEAR)
+        return _process_mask(alpha)
+    except Exception as e:
+        log.error(f"MatAnyone processing error: {str(e)}")
+        raise MaskRefinementError(f"MatAnyone processing failed: {str(e)}")
 # ============================================================================
 # CLASSICAL REFINEMENT