Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 24

Commit

e5e6fe5

1 Parent(s): 94a9c1b

Create processing/matting.py

Browse files

Files changed (1) hide show

processing/matting.py +450 -0

processing/matting.py ADDED Viewed

	@@ -0,0 +1,450 @@

+"""
+Advanced matting algorithms for BackgroundFX Pro.
+Implements multiple matting techniques with automatic fallback.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import cv2
+from typing import Dict, Tuple, Optional, List
+from dataclasses import dataclass
+import logging
+from ..utils.logger import setup_logger
+from ..utils.device import DeviceManager
+from ..utils.config import ConfigManager
+from ..core.models import ModelFactory, ModelType
+from ..core.quality import QualityAnalyzer
+from ..core.edge import EdgeRefinement
+logger = setup_logger(__name__)
+@dataclass
+class MattingConfig:
+    """Configuration for matting operations."""
+    alpha_threshold: float = 0.5
+    erode_iterations: int = 2
+    dilate_iterations: int = 2
+    blur_radius: int = 3
+    trimap_size: int = 30
+    confidence_threshold: float = 0.7
+    use_guided_filter: bool = True
+    guided_filter_radius: int = 8
+    guided_filter_eps: float = 1e-6
+    use_temporal_smoothing: bool = False
+    temporal_window: int = 5
+class AlphaMatting:
+    """Advanced alpha matting using multiple techniques."""
+    def __init__(self, config: Optional[MattingConfig] = None):
+        self.config = config or MattingConfig()
+        self.device_manager = DeviceManager()
+        self.quality_analyzer = QualityAnalyzer()
+        self.edge_refinement = EdgeRefinement()
+    def create_trimap(self, mask: np.ndarray,
+                     dilation_size: int = None) -> np.ndarray:
+        """
+        Create trimap from binary mask.
+        Args:
+            mask: Binary mask (H, W)
+            dilation_size: Size of uncertain region
+        Returns:
+            Trimap with 0 (background), 128 (unknown), 255 (foreground)
+        """
+        dilation_size = dilation_size or self.config.trimap_size
+        # Ensure binary mask
+        if mask.dtype != np.uint8:
+            mask = (mask * 255).astype(np.uint8)
+        # Create trimap
+        trimap = np.copy(mask)
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE,
+            (dilation_size, dilation_size)
+        )
+        # Dilate and erode to create unknown region
+        dilated = cv2.dilate(mask, kernel, iterations=1)
+        eroded = cv2.erode(mask, kernel, iterations=1)
+        # Set unknown region
+        trimap[dilated == 255] = 128
+        trimap[eroded == 255] = 255
+        return trimap
+    def guided_filter(self, image: np.ndarray,
+                     guide: np.ndarray,
+                     radius: int = None,
+                     eps: float = None) -> np.ndarray:
+        """
+        Apply guided filter for edge-preserving smoothing.
+        Args:
+            image: Input image to filter
+            guide: Guide image (usually RGB image)
+            radius: Filter radius
+            eps: Regularization parameter
+        Returns:
+            Filtered image
+        """
+        radius = radius or self.config.guided_filter_radius
+        eps = eps or self.config.guided_filter_eps
+        if len(guide.shape) == 3:
+            guide = cv2.cvtColor(guide, cv2.COLOR_BGR2GRAY)
+        # Convert to float32
+        guide = guide.astype(np.float32) / 255.0
+        image = image.astype(np.float32) / 255.0
+        # Box filter helper
+        def box_filter(img, r):
+            return cv2.boxFilter(img, -1, (r, r))
+        # Guided filter implementation
+        mean_I = box_filter(guide, radius)
+        mean_p = box_filter(image, radius)
+        mean_Ip = box_filter(guide * image, radius)
+        cov_Ip = mean_Ip - mean_I * mean_p
+        mean_II = box_filter(guide * guide, radius)
+        var_I = mean_II - mean_I * mean_I
+        a = cov_Ip / (var_I + eps)
+        b = mean_p - a * mean_I
+        mean_a = box_filter(a, radius)
+        mean_b = box_filter(b, radius)
+        output = mean_a * guide + mean_b
+        return np.clip(output * 255, 0, 255).astype(np.uint8)
+    def closed_form_matting(self, image: np.ndarray,
+                          trimap: np.ndarray) -> np.ndarray:
+        """
+        Closed-form matting using Laplacian matrix.
+        Simplified version for real-time processing.
+        Args:
+            image: RGB image
+            trimap: Trimap with known regions
+        Returns:
+            Alpha matte
+        """
+        h, w = trimap.shape
+        # Initialize alpha with trimap
+        alpha = np.copy(trimap).astype(np.float32) / 255.0
+        # Known regions
+        is_fg = trimap == 255
+        is_bg = trimap == 0
+        is_unknown = trimap == 128
+        if not np.any(is_unknown):
+            return alpha
+        # Simple propagation from known to unknown regions
+        # Using distance transform for efficiency
+        dist_fg = cv2.distanceTransform(
+            is_fg.astype(np.uint8),
+            cv2.DIST_L2, 5
+        )
+        dist_bg = cv2.distanceTransform(
+            is_bg.astype(np.uint8),
+            cv2.DIST_L2, 5
+        )
+        # Normalize distances
+        total_dist = dist_fg + dist_bg + 1e-10
+        alpha_unknown = dist_fg / total_dist
+        # Apply only to unknown regions
+        alpha[is_unknown] = alpha_unknown[is_unknown]
+        # Apply guided filter for smoothing
+        if self.config.use_guided_filter:
+            alpha = self.guided_filter(
+                (alpha * 255).astype(np.uint8),
+                image
+            ) / 255.0
+        return np.clip(alpha, 0, 1)
+    def deep_matting(self, image: np.ndarray,
+                    mask: np.ndarray,
+                    model: Optional[nn.Module] = None) -> np.ndarray:
+        """
+        Apply deep learning-based matting refinement.
+        Args:
+            image: RGB image
+            mask: Initial mask
+            model: Optional pre-trained model
+        Returns:
+            Refined alpha matte
+        """
+        device = self.device_manager.get_device()
+        # Prepare input
+        h, w = image.shape[:2]
+        # Resize for model input
+        input_size = (512, 512)
+        image_resized = cv2.resize(image, input_size)
+        mask_resized = cv2.resize(mask, input_size)
+        # Convert to tensor
+        image_tensor = torch.from_numpy(
+            image_resized.transpose(2, 0, 1)
+        ).float().unsqueeze(0) / 255.0
+        mask_tensor = torch.from_numpy(mask_resized).float().unsqueeze(0).unsqueeze(0) / 255.0
+        # Move to device
+        image_tensor = image_tensor.to(device)
+        mask_tensor = mask_tensor.to(device)
+        # If no model provided, use simple refinement
+        if model is None:
+            # Simple CNN-based refinement
+            with torch.no_grad():
+                # Concatenate image and mask
+                x = torch.cat([image_tensor, mask_tensor], dim=1)
+                # Simple refinement network simulation
+                refined = self._simple_refine_network(x)
+                # Convert back to numpy
+                alpha = refined.squeeze().cpu().numpy()
+        else:
+            with torch.no_grad():
+                alpha = model(image_tensor, mask_tensor)
+                alpha = alpha.squeeze().cpu().numpy()
+        # Resize back to original size
+        alpha = cv2.resize(alpha, (w, h))
+        return np.clip(alpha, 0, 1)
+    def _simple_refine_network(self, x: torch.Tensor) -> torch.Tensor:
+        """Simple refinement network for demonstration."""
+        # Extract mask channel
+        mask = x[:, 3:4, :, :]
+        # Apply series of filters
+        refined = mask
+        # Edge-aware smoothing
+        for _ in range(3):
+            refined = F.avg_pool2d(refined, 3, stride=1, padding=1)
+            refined = torch.sigmoid((refined - 0.5) * 10)
+        return refined
+    def morphological_refinement(self, alpha: np.ndarray) -> np.ndarray:
+        """
+        Apply morphological operations for refinement.
+        Args:
+            alpha: Alpha matte
+        Returns:
+            Refined alpha matte
+        """
+        # Convert to uint8
+        alpha_uint8 = (alpha * 255).astype(np.uint8)
+        # Morphological operations
+        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+        # Remove small holes
+        alpha_uint8 = cv2.morphologyEx(
+            alpha_uint8, cv2.MORPH_CLOSE, kernel,
+            iterations=self.config.erode_iterations
+        )
+        # Remove small components
+        alpha_uint8 = cv2.morphologyEx(
+            alpha_uint8, cv2.MORPH_OPEN, kernel,
+            iterations=self.config.dilate_iterations
+        )
+        # Smooth boundaries
+        if self.config.blur_radius > 0:
+            alpha_uint8 = cv2.GaussianBlur(
+                alpha_uint8,
+                (self.config.blur_radius * 2 + 1, self.config.blur_radius * 2 + 1),
+                0
+            )
+        return alpha_uint8.astype(np.float32) / 255.0
+    def process(self, image: np.ndarray,
+               mask: np.ndarray,
+               method: str = 'auto') -> Dict[str, np.ndarray]:
+        """
+        Process image with selected matting method.
+        Args:
+            image: RGB image
+            mask: Initial segmentation mask
+            method: Matting method ('auto', 'trimap', 'deep', 'guided')
+        Returns:
+            Dictionary with alpha matte and confidence
+        """
+        try:
+            # Analyze quality
+            quality_metrics = self.quality_analyzer.analyze_frame(image)
+            # Select method based on quality
+            if method == 'auto':
+                if quality_metrics['blur_score'] > 50:
+                    method = 'guided'
+                elif quality_metrics['edge_clarity'] > 0.7:
+                    method = 'trimap'
+                else:
+                    method = 'deep'
+            logger.info(f"Using matting method: {method}")
+            # Apply selected method
+            if method == 'trimap':
+                trimap = self.create_trimap(mask)
+                alpha = self.closed_form_matting(image, trimap)
+            elif method == 'deep':
+                alpha = self.deep_matting(image, mask)
+            elif method == 'guided':
+                alpha = mask.astype(np.float32) / 255.0
+                if self.config.use_guided_filter:
+                    alpha = self.guided_filter(
+                        (alpha * 255).astype(np.uint8),
+                        image
+                    ) / 255.0
+            else:
+                # Default to simple refinement
+                alpha = mask.astype(np.float32) / 255.0
+            # Apply morphological refinement
+            alpha = self.morphological_refinement(alpha)
+            # Edge refinement
+            alpha = self.edge_refinement.refine_edges(
+                image,
+                (alpha * 255).astype(np.uint8)
+            ) / 255.0
+            # Calculate confidence
+            confidence = self._calculate_confidence(alpha, quality_metrics)
+            return {
+                'alpha': alpha,
+                'confidence': confidence,
+                'method_used': method,
+                'quality_metrics': quality_metrics
+            }
+        except Exception as e:
+            logger.error(f"Matting processing failed: {e}")
+            # Return original mask as fallback
+            return {
+                'alpha': mask.astype(np.float32) / 255.0,
+                'confidence': 0.0,
+                'method_used': 'fallback',
+                'error': str(e)
+            }
+    def _calculate_confidence(self, alpha: np.ndarray,
+                            quality_metrics: Dict) -> float:
+        """Calculate confidence score for the matting result."""
+        # Base confidence from quality metrics
+        confidence = quality_metrics.get('overall_quality', 0.5)
+        # Adjust based on alpha distribution
+        alpha_mean = np.mean(alpha)
+        alpha_std = np.std(alpha)
+        # Good matting should have clear separation
+        if 0.3 < alpha_mean < 0.7 and alpha_std > 0.3:
+            confidence *= 1.2
+        # Check for edge clarity
+        edges = cv2.Canny((alpha * 255).astype(np.uint8), 50, 150)
+        edge_ratio = np.sum(edges > 0) / edges.size
+        if edge_ratio < 0.1:  # Clear boundaries
+            confidence *= 1.1
+        return np.clip(confidence, 0.0, 1.0)
+class CompositingEngine:
+    """Handle alpha compositing and blending."""
+    def __init__(self):
+        self.logger = setup_logger(f"{__name__}.CompositingEngine")
+    def composite(self, foreground: np.ndarray,
+                 background: np.ndarray,
+                 alpha: np.ndarray) -> np.ndarray:
+        """
+        Composite foreground over background using alpha.
+        Args:
+            foreground: Foreground image (H, W, 3)
+            background: Background image (H, W, 3)
+            alpha: Alpha matte (H, W) or (H, W, 1)
+        Returns:
+            Composited image
+        """
+        # Ensure alpha is 3-channel
+        if len(alpha.shape) == 2:
+            alpha = np.expand_dims(alpha, axis=2)
+        if alpha.shape[2] == 1:
+            alpha = np.repeat(alpha, 3, axis=2)
+        # Ensure float32
+        fg = foreground.astype(np.float32) / 255.0
+        bg = background.astype(np.float32) / 255.0
+        a = alpha.astype(np.float32)
+        if a.max() > 1.0:
+            a = a / 255.0
+        # Alpha blending
+        result = fg * a + bg * (1 - a)
+        # Convert back to uint8
+        result = np.clip(result * 255, 0, 255).astype(np.uint8)
+        return result
+    def premultiply_alpha(self, image: np.ndarray,
+                         alpha: np.ndarray) -> np.ndarray:
+        """Premultiply image by alpha channel."""
+        if len(alpha.shape) == 2:
+            alpha = np.expand_dims(alpha, axis=2)
+        result = image.astype(np.float32) * alpha.astype(np.float32)
+        if alpha.max() > 1.0:
+            result = result / 255.0
+        return np.clip(result, 0, 255).astype(np.uint8)