Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28

Commit

0f94e43

1 Parent(s): 345218c

Update models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +96 -44

models/loaders/matanyone_loader.py CHANGED Viewed

@@ -87,20 +87,52 @@ def _patch_processor(self, processor):
         """
         Patch the MatAnyone processor to handle device placement and tensor formats correctly
         """
-        original_step = None
-        original_process = None
-        if hasattr(processor, 'step'):
-            original_step = processor.step
-        if hasattr(processor, 'process'):
-            original_process = processor.process
         device = self.device
-        def safe_step(image, mask, idx_mask=False, **kwargs):
-            """Wrapped step function with proper device handling"""
             try:
-                # Ensure inputs are tensors on the correct device
                 if isinstance(image, np.ndarray):
                     image = torch.from_numpy(image).to(device)
                 elif isinstance(image, torch.Tensor):
@@ -111,61 +143,81 @@ def safe_step(image, mask, idx_mask=False, **kwargs):
                 elif isinstance(mask, torch.Tensor):
                     mask = mask.to(device)
-                # Handle image format (ensure CHW or NCHW)
-                if image.dim() == 3:
-                    # HWC to CHW if needed
-                    if image.shape[-1] in [1, 3, 4]:
-                        image = image.permute(2, 0, 1)
-                    # Add batch dimension if needed
-                    if image.dim() == 3:
-                        image = image.unsqueeze(0)
-                # Handle mask format
                 if mask.dim() == 2:
-                    mask = mask.unsqueeze(0)  # Add channel dimension
-                # Ensure float tensors
                 if image.dtype != torch.float32:
                     image = image.float()
                 if not idx_mask and mask.dtype != torch.float32:
                     mask = mask.float()
-                # Normalize if needed
                 if image.max() > 1.0:
                     image = image / 255.0
                 if not idx_mask and mask.max() > 1.0:
                     mask = mask / 255.0
-                # Call original method
                 if original_step:
-                    return original_step(image, mask, idx_mask=idx_mask, **kwargs)
-                else:
-                    # Fallback if no original method
-                    return mask
-            except Exception as e:
-                logger.error(f"MatAnyone step failed: {e}")
-                logger.debug(traceback.format_exc())
-                # Return input mask as fallback
                 return mask
-        def safe_process(image, mask, **kwargs):
-            """Wrapped process function with proper device handling"""
-            try:
-                # Use safe_step for processing
-                return safe_step(image, mask, idx_mask=False, **kwargs)
             except Exception as e:
-                logger.error(f"MatAnyone process failed: {e}")
-                return mask
-        # Apply patches
         if hasattr(processor, 'step'):
-            processor.step = safe_step
-            logger.info("Patched MatAnyone step method for device safety")
         if hasattr(processor, 'process'):
-            processor.process = safe_process
-            logger.info("Patched MatAnyone process method for device safety")
     def _load_fallback(self) -> Optional[Any]:
         """Create fallback processor for testing"""

         """
         Patch the MatAnyone processor to handle device placement and tensor formats correctly
         """
+        original_step = getattr(processor, 'step', None)
+        original_process = getattr(processor, 'process', None)
         device = self.device
+        def safe_wrapper(*args, **kwargs):
+            """Universal wrapper that handles both step and process calls"""
             try:
+                # Handle different calling patterns
+                # Pattern 1: step(image, mask, idx_mask=False)
+                # Pattern 2: process(image, mask)
+                # Pattern 3: Called with just args
+                # Pattern 4: Called with kwargs
+                image = None
+                mask = None
+                idx_mask = kwargs.get('idx_mask', False)
+                # Extract image and mask
+                if 'image' in kwargs and 'mask' in kwargs:
+                    image = kwargs['image']
+                    mask = kwargs['mask']
+                elif len(args) >= 2:
+                    image = args[0]
+                    mask = args[1]
+                    if len(args) > 2:
+                        idx_mask = args[2]
+                elif len(args) == 1:
+                    # Might be called with just mask for refinement
+                    mask = args[0]
+                    # Create dummy image if needed
+                    if isinstance(mask, np.ndarray):
+                        h, w = mask.shape[:2] if mask.ndim >= 2 else (512, 512)
+                        image = np.zeros((h, w, 3), dtype=np.uint8)
+                    elif isinstance(mask, torch.Tensor):
+                        h, w = mask.shape[-2:] if mask.dim() >= 2 else (512, 512)
+                        image = torch.zeros((h, w, 3), dtype=torch.uint8)
+                if image is None or mask is None:
+                    logger.error(f"MatAnyone called with invalid args: {len(args)} args, kwargs: {kwargs.keys()}")
+                    # Return something safe
+                    if mask is not None:
+                        return mask
+                    return np.ones((512, 512), dtype=np.float32) * 0.5
+                # Convert to tensors on correct device
                 if isinstance(image, np.ndarray):
                     image = torch.from_numpy(image).to(device)
                 elif isinstance(image, torch.Tensor):
                 elif isinstance(mask, torch.Tensor):
                     mask = mask.to(device)
+                # Fix image format (ensure CHW or NCHW)
+                if image.dim() == 2:  # Grayscale HW
+                    image = image.unsqueeze(0)  # CHW
+                elif image.dim() == 3:
+                    # Check if HWC or CHW
+                    if image.shape[-1] in [1, 3, 4]:  # HWC
+                        image = image.permute(2, 0, 1)  # CHW
+                    # Add batch if needed
+                    if image.shape[0] in [1, 3, 4]:  # CHW
+                        image = image.unsqueeze(0)  # NCHW
+                elif image.dim() == 4:
+                    # Already NCHW, ensure correct channel position
+                    if image.shape[-1] in [1, 3, 4]:  # NHWC
+                        image = image.permute(0, 3, 1, 2)  # NCHW
+                # Fix mask format
                 if mask.dim() == 2:
+                    mask = mask.unsqueeze(0)  # Add channel: CHW
+                elif mask.dim() == 3:
+                    if mask.shape[0] > 4:  # Likely HWC
+                        mask = mask.permute(2, 0, 1)  # CHW
+                # Ensure float and normalized
                 if image.dtype != torch.float32:
                     image = image.float()
                 if not idx_mask and mask.dtype != torch.float32:
                     mask = mask.float()
                 if image.max() > 1.0:
                     image = image / 255.0
                 if not idx_mask and mask.max() > 1.0:
                     mask = mask / 255.0
+                # Call original method if it exists
                 if original_step:
+                    try:
+                        result = original_step(image, mask, idx_mask=idx_mask)
+                        # Convert result back to numpy if needed
+                        if isinstance(result, torch.Tensor):
+                            result = result.cpu().numpy()
+                        return result
+                    except Exception as e:
+                        logger.error(f"MatAnyone original step failed: {e}")
+                # Fallback: return slightly processed mask
+                if isinstance(mask, torch.Tensor):
+                    # Apply slight smoothing
+                    import torch.nn.functional as F
+                    mask = F.avg_pool2d(mask.unsqueeze(0), 3, stride=1, padding=1)
+                    mask = mask.squeeze(0).cpu().numpy()
                 return mask
             except Exception as e:
+                logger.error(f"MatAnyone safe_wrapper failed: {e}")
+                import traceback
+                logger.debug(traceback.format_exc())
+                # Return safe fallback
+                if 'mask' in locals() and mask is not None:
+                    if isinstance(mask, torch.Tensor):
+                        return mask.cpu().numpy()
+                    return mask
+                return np.ones((512, 512), dtype=np.float32) * 0.5
+        # Apply patches to both methods
         if hasattr(processor, 'step'):
+            processor.step = safe_wrapper
+            logger.info("Patched MatAnyone step method")
         if hasattr(processor, 'process'):
+            processor.process = safe_wrapper
+            logger.info("Patched MatAnyone process method")
+        # Also add a direct call method
+        processor.__call__ = safe_wrapper
     def _load_fallback(self) -> Optional[Any]:
         """Create fallback processor for testing"""