Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28

Commit

1aea709

1 Parent(s): a099dfd

Create models/loaders/matanyone_loader.py

Browse files

Files changed (1) hide show

models/loaders/matanyone_loader.py +215 -0

models/loaders/matanyone_loader.py ADDED Viewed

	@@ -0,0 +1,215 @@

+#!/usr/bin/env python3
+"""
+MatAnyone Model Loader
+Handles MatAnyone loading with proper device initialization
+"""
+import os
+import time
+import logging
+import traceback
+from pathlib import Path
+from typing import Optional, Dict, Any
+import torch
+import numpy as np
+logger = logging.getLogger(__name__)
+class MatAnyoneLoader:
+    """Dedicated loader for MatAnyone models"""
+    def __init__(self, device: str = "cuda", cache_dir: str = "./checkpoints/matanyone_cache"):
+        self.device = device
+        self.cache_dir = cache_dir
+        os.makedirs(self.cache_dir, exist_ok=True)
+        self.model = None
+        self.model_id = "PeiqingYang/MatAnyone"
+        self.load_time = 0.0
+    def load(self) -> Optional[Any]:
+        """
+        Load MatAnyone model
+        Returns:
+            Loaded model or None
+        """
+        logger.info(f"Loading MatAnyone model: {self.model_id}")
+        # Try loading strategies in order
+        strategies = [
+            ("official", self._load_official),
+            ("fallback", self._load_fallback)
+        ]
+        for strategy_name, strategy_func in strategies:
+            try:
+                logger.info(f"Trying MatAnyone loading strategy: {strategy_name}")
+                start_time = time.time()
+                model = strategy_func()
+                if model:
+                    self.load_time = time.time() - start_time
+                    self.model = model
+                    logger.info(f"MatAnyone loaded successfully via {strategy_name} in {self.load_time:.2f}s")
+                    return model
+            except Exception as e:
+                logger.error(f"MatAnyone {strategy_name} strategy failed: {e}")
+                logger.debug(traceback.format_exc())
+                continue
+        logger.error("All MatAnyone loading strategies failed")
+        return None
+    def _load_official(self) -> Optional[Any]:
+        """Load using official MatAnyone API"""
+        from matanyone import InferenceCore
+        # Create processor - pass model ID as positional argument
+        processor = InferenceCore(self.model_id)
+        # Ensure processor is properly initialized for the device
+        if hasattr(processor, 'device'):
+            processor.device = self.device
+        # Move model components to device if they exist
+        if hasattr(processor, 'model'):
+            if hasattr(processor.model, 'to'):
+                processor.model = processor.model.to(self.device)
+                processor.model.eval()
+        # Patch the processor to handle inputs properly
+        self._patch_processor(processor)
+        return processor
+    def _patch_processor(self, processor):
+        """
+        Patch the MatAnyone processor to handle device placement and tensor formats correctly
+        """
+        original_step = None
+        original_process = None
+        if hasattr(processor, 'step'):
+            original_step = processor.step
+        if hasattr(processor, 'process'):
+            original_process = processor.process
+        device = self.device
+        def safe_step(image, mask, idx_mask=False, **kwargs):
+            """Wrapped step function with proper device handling"""
+            try:
+                # Ensure inputs are tensors on the correct device
+                if isinstance(image, np.ndarray):
+                    image = torch.from_numpy(image).to(device)
+                elif isinstance(image, torch.Tensor):
+                    image = image.to(device)
+                if isinstance(mask, np.ndarray):
+                    mask = torch.from_numpy(mask).to(device)
+                elif isinstance(mask, torch.Tensor):
+                    mask = mask.to(device)
+                # Handle image format (ensure CHW or NCHW)
+                if image.dim() == 3:
+                    # HWC to CHW if needed
+                    if image.shape[-1] in [1, 3, 4]:
+                        image = image.permute(2, 0, 1)
+                    # Add batch dimension if needed
+                    if image.dim() == 3:
+                        image = image.unsqueeze(0)
+                # Handle mask format
+                if mask.dim() == 2:
+                    mask = mask.unsqueeze(0)  # Add channel dimension
+                # Ensure float tensors
+                if image.dtype != torch.float32:
+                    image = image.float()
+                if not idx_mask and mask.dtype != torch.float32:
+                    mask = mask.float()
+                # Normalize if needed
+                if image.max() > 1.0:
+                    image = image / 255.0
+                if not idx_mask and mask.max() > 1.0:
+                    mask = mask / 255.0
+                # Call original method
+                if original_step:
+                    return original_step(image, mask, idx_mask=idx_mask, **kwargs)
+                else:
+                    # Fallback if no original method
+                    return mask
+            except Exception as e:
+                logger.error(f"MatAnyone step failed: {e}")
+                logger.debug(traceback.format_exc())
+                # Return input mask as fallback
+                return mask
+        def safe_process(image, mask, **kwargs):
+            """Wrapped process function with proper device handling"""
+            try:
+                # Use safe_step for processing
+                return safe_step(image, mask, idx_mask=False, **kwargs)
+            except Exception as e:
+                logger.error(f"MatAnyone process failed: {e}")
+                return mask
+        # Apply patches
+        if hasattr(processor, 'step'):
+            processor.step = safe_step
+            logger.info("Patched MatAnyone step method for device safety")
+        if hasattr(processor, 'process'):
+            processor.process = safe_process
+            logger.info("Patched MatAnyone process method for device safety")
+    def _load_fallback(self) -> Optional[Any]:
+        """Create fallback processor for testing"""
+        class FallbackMatAnyone:
+            def __init__(self, device):
+                self.device = device
+            def step(self, image, mask, idx_mask=False, **kwargs):
+                """Pass through mask with minor smoothing"""
+                if isinstance(mask, np.ndarray):
+                    # Apply slight Gaussian blur for edge smoothing
+                    import cv2
+                    if mask.ndim == 2:
+                        smoothed = cv2.GaussianBlur(mask, (5, 5), 1.0)
+                        return smoothed
+                    elif mask.ndim == 3:
+                        smoothed = np.zeros_like(mask)
+                        for i in range(mask.shape[0]):
+                            smoothed[i] = cv2.GaussianBlur(mask[i], (5, 5), 1.0)
+                        return smoothed
+                return mask
+            def process(self, image, mask, **kwargs):
+                """Alias for step"""
+                return self.step(image, mask, **kwargs)
+        logger.warning("Using fallback MatAnyone (limited refinement)")
+        return FallbackMatAnyone(self.device)
+    def cleanup(self):
+        """Clean up resources"""
+        if self.model:
+            del self.model
+            self.model = None
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+    def get_info(self) -> Dict[str, Any]:
+        """Get loader information"""
+        return {
+            "loaded": self.model is not None,
+            "model_id": self.model_id,
+            "device": self.device,
+            "load_time": self.load_time,
+            "model_type": type(self.model).__name__ if self.model else None
+        }