Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 26

Commit

a8a12b2

1 Parent(s): 7b9f1c5

Update utils/refinement.py

Browse files

Files changed (1) hide show

utils/refinement.py +21 -13

utils/refinement.py CHANGED Viewed

@@ -130,13 +130,16 @@ def _refine_with_matanyone(
 ) -> np.ndarray:
     """Use MatAnyone model for mask refinement."""
     try:
         # Convert BGR to RGB and normalize
         image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         h, w = image_rgb.shape[:2]
         # Convert to torch tensor format (C, H, W) and normalize to [0, 1]
         image_tensor = torch.from_numpy(image_rgb).permute(2, 0, 1).float() / 255.0
-        image_tensor = image_tensor.unsqueeze(0)  # Add batch dimension (1, C, H, W)
         # Ensure mask is binary uint8
         if mask.dtype != np.uint8:
@@ -144,9 +147,9 @@ def _refine_with_matanyone(
         if mask.ndim == 3:
             mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
-        # Convert mask to tensor
         mask_tensor = torch.from_numpy(mask).float() / 255.0
-        mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0)  # (1, 1, H, W)
         # Try different methods on InferenceCore
         result = None
@@ -157,7 +160,7 @@ def _refine_with_matanyone(
         with torch.no_grad():
             if hasattr(model, 'step'):
-                # Step method for iterative processing (don't call reset)
                 result = model.step(image_tensor, mask_tensor)
             elif hasattr(model, 'process_frame'):
                 result = model.process_frame(image_tensor, mask_tensor)
@@ -203,18 +206,21 @@ def _refine_batch_with_matanyone(
 ) -> List[np.ndarray]:
     """Process batch of frames through MatAnyone for temporal consistency."""
     try:
         batch_size = len(frames)
         h, w = frames[0].shape[:2]
-        # Convert frames to tensor batch
         frame_tensors = []
         for frame in frames:
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             tensor = torch.from_numpy(frame_rgb).permute(2, 0, 1).float() / 255.0
             frame_tensors.append(tensor)
-        # Stack into batch (N, C, H, W)
-        batch_tensor = torch.stack(frame_tensors)
         # Prepare first mask for initialization
         first_mask = masks[0]
@@ -223,9 +229,9 @@ def _refine_batch_with_matanyone(
         if first_mask.ndim == 3:
             first_mask = cv2.cvtColor(first_mask, cv2.COLOR_BGR2GRAY)
-        # Convert first mask to tensor
         first_mask_tensor = torch.from_numpy(first_mask).float() / 255.0
-        first_mask_tensor = first_mask_tensor.unsqueeze(0).unsqueeze(0)
         refined_masks = []
@@ -241,12 +247,13 @@ def _refine_batch_with_matanyone(
             elif hasattr(model, 'step'):
                 # Process frames sequentially with memory
                 for i, frame_tensor in enumerate(frame_tensors):
                     if i == 0:
                         # First frame with mask
-                        result = model.step(frame_tensor.unsqueeze(0), first_mask_tensor)
                     else:
                         # Subsequent frames use memory from previous
-                        result = model.step(frame_tensor.unsqueeze(0), None)
                     alpha = _extract_alpha_from_result(result)
                     refined_masks.append(_tensor_to_mask(alpha, h, w))
@@ -256,9 +263,10 @@ def _refine_batch_with_matanyone(
                 log.warning("MatAnyone batch processing not available, using frame-by-frame")
                 for frame_tensor, mask in zip(frame_tensors, masks):
                     mask_tensor = torch.from_numpy(mask).float() / 255.0
-                    mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0)
-                    result = model(frame_tensor.unsqueeze(0), mask_tensor)
                     alpha = _extract_alpha_from_result(result)
                     refined_masks.append(_tensor_to_mask(alpha, h, w))

 ) -> np.ndarray:
     """Use MatAnyone model for mask refinement."""
     try:
+        # Set device to GPU (Tesla T4 on cuda:0)
+        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
         # Convert BGR to RGB and normalize
         image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
         h, w = image_rgb.shape[:2]
         # Convert to torch tensor format (C, H, W) and normalize to [0, 1]
         image_tensor = torch.from_numpy(image_rgb).permute(2, 0, 1).float() / 255.0
+        image_tensor = image_tensor.unsqueeze(0).to(device)  # Add batch dimension and move to GPU
         # Ensure mask is binary uint8
         if mask.dtype != np.uint8:
         if mask.ndim == 3:
             mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+        # Convert mask to tensor and move to GPU
         mask_tensor = torch.from_numpy(mask).float() / 255.0
+        mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0).to(device)  # (1, 1, H, W) on GPU
         # Try different methods on InferenceCore
         result = None
         with torch.no_grad():
             if hasattr(model, 'step'):
+                # Step method for iterative processing
                 result = model.step(image_tensor, mask_tensor)
             elif hasattr(model, 'process_frame'):
                 result = model.process_frame(image_tensor, mask_tensor)
 ) -> List[np.ndarray]:
     """Process batch of frames through MatAnyone for temporal consistency."""
     try:
+        # Set device to GPU (Tesla T4 on cuda:0)
+        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
         batch_size = len(frames)
         h, w = frames[0].shape[:2]
+        # Convert frames to tensor batch and move to GPU
         frame_tensors = []
         for frame in frames:
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             tensor = torch.from_numpy(frame_rgb).permute(2, 0, 1).float() / 255.0
             frame_tensors.append(tensor)
+        # Stack into batch (N, C, H, W) and move to GPU
+        batch_tensor = torch.stack(frame_tensors).to(device)
         # Prepare first mask for initialization
         first_mask = masks[0]
         if first_mask.ndim == 3:
             first_mask = cv2.cvtColor(first_mask, cv2.COLOR_BGR2GRAY)
+        # Convert first mask to tensor and move to GPU
         first_mask_tensor = torch.from_numpy(first_mask).float() / 255.0
+        first_mask_tensor = first_mask_tensor.unsqueeze(0).unsqueeze(0).to(device)
         refined_masks = []
             elif hasattr(model, 'step'):
                 # Process frames sequentially with memory
                 for i, frame_tensor in enumerate(frame_tensors):
+                    frame_on_device = frame_tensor.unsqueeze(0).to(device)
                     if i == 0:
                         # First frame with mask
+                        result = model.step(frame_on_device, first_mask_tensor)
                     else:
                         # Subsequent frames use memory from previous
+                        result = model.step(frame_on_device, None)
                     alpha = _extract_alpha_from_result(result)
                     refined_masks.append(_tensor_to_mask(alpha, h, w))
                 log.warning("MatAnyone batch processing not available, using frame-by-frame")
                 for frame_tensor, mask in zip(frame_tensors, masks):
                     mask_tensor = torch.from_numpy(mask).float() / 255.0
+                    mask_tensor = mask_tensor.unsqueeze(0).unsqueeze(0).to(device)
+                    frame_on_device = frame_tensor.unsqueeze(0).to(device)
+                    result = model(frame_on_device, mask_tensor)
                     alpha = _extract_alpha_from_result(result)
                     refined_masks.append(_tensor_to_mask(alpha, h, w))