Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 27

Commit

de84d79

1 Parent(s): 843b297

Update processing/video/video_processor.py

Browse files

Files changed (1) hide show

processing/video/video_processor.py +137 -76

processing/video/video_processor.py CHANGED Viewed

@@ -4,35 +4,36 @@
 Bridges the legacy import
     from processing.video.video_processor import CoreVideoProcessor
-to the modern pipeline functions in utils.cv_processing, using whatever
-models provider is passed in (e.g., models.loaders.ModelLoader).
 Requirements for the models provider:
-- get_sam2() -> predictor or None
-- get_matanyone() -> processor or None
 """
 from __future__ import annotations
 from dataclasses import dataclass
-from typing import Optional, Dict, Any, Tuple, Callable
 import time
 import threading
 import cv2
 import numpy as np
-# Try project logger; fall back to std logging
 try:
-    from utils.logger import get_logger
-    _log = get_logger(__name__)
 except Exception:
     import logging
     logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
     _log = logging.getLogger(__name__)
-# CV pipeline helpers
-from utils.cv_processing import (
     segment_person_hq,
     refine_mask_hq,
     replace_background_hq,
@@ -44,17 +45,18 @@
 @dataclass
 class ProcessorConfig:
-    background_preset: str = "minimalist"   # key in PROFESSIONAL_BACKGROUNDS
-    write_fps: Optional[float] = None       # None -> keep source fps
 class CoreVideoProcessor:
     """
-    Minimal, safe implementation used by core/app.py.
     It relies on a models provider (e.g., ModelLoader) that implements:
         - get_sam2()
         - get_matanyone()
-    and uses utils.cv_processing for the pipeline.
     Supports progress callback and cancellation via stop_event.
     """
@@ -62,41 +64,11 @@ class CoreVideoProcessor:
     def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
         self.log = _log
         self.config = config or ProcessorConfig()
-        self.models = models  # do NOT load here; core/app handles loading
         if self.models is None:
             self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
-    # ---------- Single frame ----------
-    def process_frame(self, frame: np.ndarray, background: np.ndarray) -> Dict[str, Any]:
-        """Return dict with composited frame + mask; always attempts fallbacks."""
-        predictor = None
-        try:
-            if self.models and hasattr(self.models, "get_sam2"):
-                predictor = self.models.get_sam2()
-                # Some wrappers expose predictor directly, others are already usable
-                # segment_person_hq checks for set_image/predict itself.
-        except Exception as e:
-            self.log.warning(f"SAM2 predictor unavailable: {e}")
-        # 1) segmentation (with fallbacks inside)
-        mask = segment_person_hq(frame, predictor, fallback_enabled=True)
-        # 2) refinement (MatAnyOne if available, else robust OpenCV path)
-        matanyone = None
-        try:
-            if self.models and hasattr(self.models, "get_matanyone"):
-                matanyone = self.models.get_matanyone()
-        except Exception as e:
-            self.log.warning(f"MatAnyOne unavailable: {e}")
-        mask_refined = refine_mask_hq(frame, mask, matanyone, fallback_enabled=True)
-        # 3) compositing
-        out = replace_background_hq(frame, mask_refined, background, fallback_enabled=True)
-        return {"frame": out, "mask": mask_refined}
-    # ---------- Build background once per video ----------
     def _prepare_background_from_config(
         self,
         bg_config: Optional[Dict[str, Any]],
@@ -105,30 +77,36 @@ def _prepare_background_from_config(
     ) -> np.ndarray:
         """
         Accepts either:
-          - {"custom_path": "/path/to/image.png"} → load image
-          - {"background_choice": "minimalist"} → preset
           - None → use self.config.background_preset
         """
-        # 1) custom image?
         if bg_config and bg_config.get("custom_path"):
             path = bg_config["custom_path"]
-            img = cv2.imread(path, cv2.IMREAD_COLOR)
-            if img is None:
-                self.log.warning(f"Custom background at '{path}' could not be read. Falling back to preset.")
             else:
-                return cv2.resize(img, (width, height), interpolation=cv2.INTER_LANCZOS4)
-        # 2) preset (explicit choice or default)
         choice = None
         if bg_config and "background_choice" in bg_config:
             choice = bg_config["background_choice"]
         if not choice:
             choice = self.config.background_preset
-        cfg = PROFESSIONAL_BACKGROUNDS.get(choice, PROFESSIONAL_BACKGROUNDS["minimalist"])
-        return create_professional_background(cfg, width, height)
-    # ---------- Full video ----------
     def process_video(
         self,
         input_path: str,
@@ -140,11 +118,19 @@ def process_video(
         """
         Process a full video with live progress and optional cancel.
         progress_callback(current_frame, total_frames, fps_live)
         """
-        ok, msg = validate_video_file(input_path)
         if not ok:
-            raise ValueError(f"Invalid video: {msg}")
-        self.log.info(f"Video validation: {msg}")
         cap = cv2.VideoCapture(input_path)
         if not cap.isOpened():
@@ -152,51 +138,126 @@ def process_video(
         width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps    = cap.get(cv2.CAP_PROP_FPS)
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
         if not writer.isOpened():
             cap.release()
             raise RuntimeError(f"Could not open writer for: {output_path}")
-        # Build background once
-        background = self._prepare_background_from_config(bg_config, width, height)
         frame_count = 0
         start_time = time.time()
         try:
             while True:
-                ret, frame = cap.read()
-                if not ret:
-                    break
-                # Cancel support
                 if stop_event is not None and stop_event.is_set():
                     self.log.info("Processing stopped by user request.")
                     break
-                # Process single frame
-                result = self.process_frame(frame, background)
-                writer.write(result["frame"])
                 frame_count += 1
-                # Progress callback
                 if progress_callback:
                     elapsed = time.time() - start_time
                     fps_live = frame_count / elapsed if elapsed > 0 else 0.0
                     try:
                         progress_callback(frame_count, total_frames, fps_live)
                     except Exception:
-                        # Don’t break processing due to a UI callback error
                         pass
         finally:
             cap.release()
             writer.release()
-        self.log.info(f"Processed {frame_count} frames → {output_path}")
         return {
             "frames": frame_count,
             "width": width,

 Bridges the legacy import
     from processing.video.video_processor import CoreVideoProcessor
+to the modern pipeline functions in utils (segment, refine, composite),
+using whatever models provider is passed in (e.g., models.loaders.ModelLoader).
 Requirements for the models provider:
+- get_sam2()      -> predictor or None
+- get_matanyone() -> InferenceCore or compatible (or None)
 """
 from __future__ import annotations
 from dataclasses import dataclass
+from typing import Optional, Dict, Any, Callable
 import time
 import threading
 import cv2
 import numpy as np
+import torch
+# Logger (fallback to std logging if your project logger isn't available)
 try:
+    from utils.logging_setup import make_logger
+    _log = make_logger("processing.video.video_processor")
 except Exception:
     import logging
     logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
     _log = logging.getLogger(__name__)
+# New, hardened utils (device-safe, SAM2↔MatAnyOne interop)
+from utils import (
     segment_person_hq,
     refine_mask_hq,
     replace_background_hq,
 @dataclass
 class ProcessorConfig:
+    # Use a valid preset key from PROFESSIONAL_BACKGROUNDS (e.g., "office", "studio", …)
+    background_preset: str = "office"
+    # None -> keep source fps (if available), else default to 25.0
+    write_fps: Optional[float] = None
 class CoreVideoProcessor:
     """
+    Minimal, safe implementation used by app entrypoint.
     It relies on a models provider (e.g., ModelLoader) that implements:
         - get_sam2()
         - get_matanyone()
     Supports progress callback and cancellation via stop_event.
     """
     def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
         self.log = _log
         self.config = config or ProcessorConfig()
+        self.models = models  # app sets this to a provider with get_sam2/get_matanyone
         if self.models is None:
             self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
+    # ---------- Internals: background builder ----------
     def _prepare_background_from_config(
         self,
         bg_config: Optional[Dict[str, Any]],
     ) -> np.ndarray:
         """
         Accepts either:
+          - {"custom_path": "/path/to/image.png"} → load that image
+          - {"background_choice": "<preset_key>"} → use preset key
           - None → use self.config.background_preset
+        Returns an RGB np.uint8 image (H x W x 3).
         """
+        # 1) Custom image?
         if bg_config and bg_config.get("custom_path"):
             path = bg_config["custom_path"]
+            img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
+            if img_bgr is None:
+                self.log.warning("Custom background at '%s' could not be read. Falling back to preset.", path)
             else:
+                img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
+                return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+        # 2) Preset (explicit or default)
         choice = None
         if bg_config and "background_choice" in bg_config:
             choice = bg_config["background_choice"]
         if not choice:
             choice = self.config.background_preset
+        if choice not in PROFESSIONAL_BACKGROUNDS:
+            self.log.warning("Unknown background preset '%s'; using 'office'.", choice)
+            choice = "office"
+        bg_rgb = create_professional_background(choice, width, height)  # returns RGB
+        return bg_rgb
+    # ---------- Full video pipeline (first-frame seed + propagate) ----------
     def process_video(
         self,
         input_path: str,
         """
         Process a full video with live progress and optional cancel.
         progress_callback(current_frame, total_frames, fps_live)
+        Pipeline:
+          - Read video (OpenCV)
+          - Build background (once)
+          - Frame 0: SAM2 segmentation → MatAnyOne refine (seed)
+          - Frames 1..N: MatAnyOne propagate (no mask)
+          - Composite each frame and write to MP4
         """
+        # Validate input video
+        ok = validate_video_file(input_path)
         if not ok:
+            raise ValueError("Invalid or unreadable video file")
+        self.log.info("Video validation OK: %s", input_path)
         cap = cv2.VideoCapture(input_path)
         if not cap.isOpened():
         width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        src_fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
+        fps_out = self.config.write_fps or (src_fps if src_fps and src_fps > 0 else 25.0)
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
         if not writer.isOpened():
             cap.release()
             raise RuntimeError(f"Could not open writer for: {output_path}")
+        # Build background (RGB)
+        background_rgb = self._prepare_background_from_config(bg_config, width, height)
+        # Models (allow fallbacks provided by app)
+        predictor = None
+        mat_core = None
+        try:
+            if self.models and hasattr(self.models, "get_sam2"):
+                predictor = self.models.get_sam2()
+        except Exception as e:
+            self.log.warning("SAM2 predictor unavailable: %s", e)
+        try:
+            if self.models and hasattr(self.models, "get_matanyone"):
+                mat_core = self.models.get_matanyone()
+        except Exception as e:
+            self.log.warning("MatAnyOne core unavailable: %s", e)
+        # Device (only used by helpers internally; we keep tensors on that device)
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.log.info("Starting processing on device=%s (size=%dx%d, fps_out=%.2f, frames=%s)",
+                      device, width, height, float(fps_out), total_frames or "unknown")
         frame_count = 0
         start_time = time.time()
+        refined_mask_prev: Optional[np.ndarray] = None
         try:
+            # -------- First frame (seed) --------
+            ret, f0_bgr = cap.read()
+            if not ret:
+                raise RuntimeError("Empty video")
+            f0_rgb = cv2.cvtColor(f0_bgr, cv2.COLOR_BGR2RGB)
+            # Segmentation (SAM2 preferred, else fallback)
+            m0_hw = segment_person_hq(
+                frame_rgb=f0_rgb,
+                use_sam2=True,
+                sam2_predictor=predictor
+            )
+            if m0_hw is None:
+                # As an absolute last resort, use a solid foreground mask (keeps pipeline alive)
+                self.log.warning("First-frame segmentation failed; using full-foreground mask.")
+                m0_hw = np.ones((f0_rgb.shape[0], f0_rgb.shape[1]), dtype=np.float32)
+            # Refine / seed MatAnyOne (first_frame=True makes the helper pass the mask)
+            refined_mask_0 = refine_mask_hq(
+                mask_hw_float01=m0_hw,
+                frame_rgb=f0_rgb,
+                use_matanyone=True,
+                mat_core=mat_core,
+                first_frame=True,
+                device=device
+            )
+            refined_mask_prev = refined_mask_0
+            # Composite & write
+            comp0_rgb = replace_background_hq(f0_rgb, refined_mask_0, background_rgb)
+            writer.write(cv2.cvtColor(comp0_rgb, cv2.COLOR_RGB2BGR))
+            frame_count = 1
+            if progress_callback:
+                elapsed = time.time() - start_time
+                fps_live = frame_count / elapsed if elapsed > 0 else 0.0
+                try:
+                    progress_callback(frame_count, total_frames, fps_live)
+                except Exception:
+                    pass
+            # -------- Remaining frames (propagate) --------
             while True:
                 if stop_event is not None and stop_event.is_set():
                     self.log.info("Processing stopped by user request.")
                     break
+                ret, fbgr = cap.read()
+                if not ret:
+                    break
+                frgb = cv2.cvtColor(fbgr, cv2.COLOR_BGR2RGB)
+                # Propagate (first_frame=False -> mask ignored internally, MatAnyOne uses memory)
+                refined_mask_t = refine_mask_hq(
+                    mask_hw_float01=refined_mask_prev if refined_mask_prev is not None else m0_hw,
+                    frame_rgb=frgb,
+                    use_matanyone=True,
+                    mat_core=mat_core,
+                    first_frame=False,
+                    device=device
+                )
+                refined_mask_prev = refined_mask_t
+                comp_rgb = replace_background_hq(frgb, refined_mask_t, background_rgb)
+                writer.write(cv2.cvtColor(comp_rgb, cv2.COLOR_RGB2BGR))
                 frame_count += 1
                 if progress_callback:
                     elapsed = time.time() - start_time
                     fps_live = frame_count / elapsed if elapsed > 0 else 0.0
                     try:
                         progress_callback(frame_count, total_frames, fps_live)
                     except Exception:
                         pass
         finally:
             cap.release()
             writer.release()
+        self.log.info("Processed %d frames → %s", frame_count, output_path)
         return {
             "frames": frame_count,
             "width": width,