Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28

Commit

e21220a

1 Parent(s): 31fec2e

Update processing/video/video_processor.py

Browse files

Files changed (1) hide show

processing/video/video_processor.py +54 -107

processing/video/video_processor.py CHANGED Viewed

@@ -126,10 +126,24 @@ class ProcessorConfig:
     use_windowed: bool = True          # enable two-phase SAM2→MatAnyone per chunk
     window_size: int = 8               # frames per window
-# Back-compat name used elsewhere in the app
 ProcessingConfig = ProcessorConfig
 class _FFmpegPipe:
     """
     Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
@@ -254,12 +268,10 @@ def write(self, frame_bgr: np.ndarray):
         if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
             raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
-        # ensure contiguous for tobytes()
         frame_bgr = np.ascontiguousarray(frame_bgr)
         try:
             self.proc.stdin.write(frame_bgr.tobytes())
         except Exception as e:
-            # collect stderr for diagnostics
             stderr = b""
             try:
                 if self.proc and self.proc.stderr:
@@ -284,7 +296,6 @@ def close(self):
                     self.proc.stdin.close()
                 except Exception:
                     pass
-            # drain a bit of stderr for logs
             if self.proc.stderr:
                 try:
                     err = self.proc.stderr.read()
@@ -316,26 +327,23 @@ class CoreVideoProcessor:
     def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
         self.log = _log
         self.config = config or ProcessorConfig()
-        self.models = models  # do NOT load here; core/app handles loading
         if self.models is None:
             self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
         self._ffmpeg = shutil.which("ffmpeg")
         # state for temporal smoothing
         self._prev_mask: Optional[np.ndarray] = None
-        # --- ENV overrides (tunable without code change) ---
-        try:
-            if "MATANYONE_WINDOWED" in os.environ:
-                self.config.use_windowed = os.environ["MATANYONE_WINDOWED"].strip().lower() not in ("0", "false", "no")
-            if "MATANYONE_WINDOW" in os.environ:
-                self.config.window_size = max(1, int(os.environ["MATANYONE_WINDOW"]))
-            if "MAX_MODEL_SIZE" in os.environ:
-                self.config.max_model_size = max(0, int(os.environ["MAX_MODEL_SIZE"]))
-        except Exception:
-            pass
-        # Legacy per-frame stateful chunking (used only if use_windowed=False)
         try:
             self._chunk_size = max(1, int(os.environ.get("MATANYONE_CHUNK", "12")))
         except Exception:
@@ -351,24 +359,22 @@ def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
         return (inter / union) if union else 0.0
     def _harden(self, m: np.ndarray) -> np.ndarray:
-        # optional gamma
-        g = float(self.config.mask_gamma)
         if abs(g - 1.0) > 1e-6:
             m = np.clip(m, 0, 1) ** g
-        lo = float(self.config.hard_low)
-        hi = float(self.config.hard_high)
         if hi > lo + 1e-6:
             m = (m - lo) / (hi - lo)
             m = np.clip(m, 0.0, 1.0)
-        # pad edges then tiny blur
-        k = int(self.config.dilate_px)
         if k > 0:
             se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*k+1, 2*k+1))
             m = cv2.dilate(m, se, iterations=1)
-        eb = int(self.config.edge_blur_px)
         if eb > 0:
             m = cv2.GaussianBlur(m, (2*eb+1, 2*eb+1), 0)
@@ -379,42 +385,31 @@ def _stabilize(self, m: np.ndarray) -> np.ndarray:
             self._prev_mask = m
             return m
-        # outlier rejection
-        if self._iou(self._prev_mask, m, 0.5) < float(self.config.min_iou_to_accept):
-            # ignore this frame's mask → keep previous
             return self._prev_mask
-        # EMA
-        a = float(self.config.temporal_ema_alpha)
         m_ema = a * self._prev_mask + (1.0 - a) * m
         self._prev_mask = m_ema
         return m_ema
     # ---------- Single frame (fallback path) ----------
     def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
-        """
-        Process one frame (legacy per-frame path):
-          - optionally downscale for model work,
-          - segment + refine,
-          - temporal stabilize + harden,
-          - upsample mask,
-          - composite full-res.
-        Returns dict with composited frame (BGR for writer) and mask (H,W float).
-        """
         H, W = frame_bgr.shape[:2]
         max_side = max(H, W)
         scale = 1.0
         proc_frame_bgr = frame_bgr
         # Model-only downscale
-        if self.config.max_model_size and max_side > self.config.max_model_size:
-            scale = self.config.max_model_size / float(max_side)
             newW = int(round(W * scale))
             newH = int(round(H * scale))
             proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
             self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
-        # RGB for models
         proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
         predictor = None
@@ -424,10 +419,8 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
         except Exception as e:
             self.log.warning(f"SAM2 predictor unavailable: {e}")
-        # 1) segmentation (with internal fallbacks)
         mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
-        # 2) refinement (MatAnyOne if available) — stateful chunking
         matanyone = None
         try:
             if self.models and hasattr(self.models, "get_matanyone"):
@@ -441,16 +434,14 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
             except Exception:
                 pass
-        # IMPORTANT: call order is (frame, mask, matanyone=...)
         mask_small_ref = refine_mask_hq(
             proc_frame_rgb,
             mask_small,
             matanyone=matanyone,
             use_matanyone=True,
-            frame_idx=self._chunk_idx,   # enable stateful first-frame + propagate
         )
-        # advance chunk + optional defrag
         self._chunk_idx = (self._chunk_idx + 1) % max(1, self._chunk_size)
         if self._chunk_idx == 0:
             try:
@@ -460,40 +451,25 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
             except Exception:
                 pass
-        # Stabilize + harden at model scale
         mask_small_ref = np.clip(mask_small_ref.astype(np.float32), 0.0, 1.0)
         mask_stable = self._stabilize(mask_small_ref)
         mask_stable = self._harden(mask_stable)
-        # Upsample mask back to full-res
         if scale != 1.0:
             mask_full = cv2.resize(mask_stable, (W, H), interpolation=cv2.INTER_LINEAR)
         else:
             mask_full = mask_stable
-        # 3) compositing (helpers expect RGB inputs; return RGB)
         frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
         out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
-        # Convert to BGR for writer
         out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
         return {"frame": out_bgr, "mask": mask_full}
     # ---------- Build background once per video ----------
     def _prepare_background_from_config(
-        self,
-        bg_config: Optional[Dict[str, Any]],
-        width: int,
-        height: int
     ) -> np.ndarray:
-        """
-        Accepts either:
-          - {"custom_path": "/path/to/image.png"} → load image (RGB out)
-          - {"background_choice": "office"} → preset
-          - {"gradient": {type,start,end,angle_deg}} → generated gradient
-        Returns RGB np.uint8
-        """
-        # 1) custom image?
         if bg_config and bg_config.get("custom_path"):
             path = bg_config["custom_path"]
             img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
@@ -503,19 +479,17 @@ def _prepare_background_from_config(
                 img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
                 return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
-        # 2) gradient?
         if bg_config and isinstance(bg_config.get("gradient"), dict):
             try:
                 return _create_gradient_background_local(bg_config["gradient"], width, height)
             except Exception as e:
                 self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
-        # 3) preset (explicit choice or default)
         choice = None
         if bg_config and "background_choice" in bg_config:
             choice = bg_config["background_choice"]
         if not choice:
-            choice = self.config.background_preset
         if choice not in PROFESSIONAL_BACKGROUNDS:
             self.log.warning(f"Unknown background preset '{choice}'; using 'office'.")
@@ -525,11 +499,11 @@ def _prepare_background_from_config(
     # ---------- Windowed two-phase helpers ----------
     def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
-        """Apply model-only downscale; return (resized_bgr, scale)."""
         H, W = frame_bgr.shape[:2]
         max_side = max(H, W)
-        if self.config.max_model_size and max_side > self.config.max_model_size:
-            s = self.config.max_model_size / float(max_side)
             newW = int(round(W * s))
             newH = int(round(H * s))
             small = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
@@ -537,12 +511,10 @@ def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
         return frame_bgr, 1.0
     def _prepare_sam2_gpu(self, predictor):
-        """Best-effort: ensure SAM2 is on CUDA before SAM2 phase."""
         try:
-            import torch  # local import to avoid hard dependency at import-time
             if predictor is None or not torch.cuda.is_available():
                 return
-            # Try common patterns
             if hasattr(predictor, "to"):
                 try:
                     predictor.to("cuda")  # type: ignore[attr-defined]
@@ -558,18 +530,15 @@ def _prepare_sam2_gpu(self, predictor):
             pass
     def _release_sam2_gpu(self, predictor):
-        """Best-effort release of SAM2 GPU residency between phases."""
         try:
             if predictor is None:
                 return
-            # Clear any sticky per-image state if exposed
             for name in ("reset_image", "release_image", "clear_image", "clear_state"):
                 if hasattr(predictor, name) and callable(getattr(predictor, name)):
                     try:
                         getattr(predictor, name)()
                     except Exception:
                         pass
-            # Try moving large parts off-GPU (best-effort, may be no-op)
             for name in ("to", "cpu"):
                 if hasattr(predictor, name):
                     try:
@@ -597,10 +566,6 @@ def process_video(
         progress_callback: Optional[Callable[[int, int, float], None]] = None,
         stop_event: Optional[threading.Event] = None
     ) -> Dict[str, Any]:
-        """
-        Process a full video with live progress and optional cancel.
-        progress_callback(current_frame, total_frames, fps_live)
-        """
         ok, msg = validate_video_file(input_path)
         if not ok:
             raise ValueError(f"Invalid or unreadable video: {msg}")
@@ -614,20 +579,17 @@ def process_video(
         fps    = cap.get(cv2.CAP_PROP_FPS)
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
-        # Background once (RGB)
         background_rgb = self._prepare_background_from_config(bg_config, width, height)
-        # reset temporal state for a new video
         self._prev_mask = None
-        # Writer selection
         ffmpeg_pipe: _FFmpegPipe | None = None
         writer: cv2.VideoWriter | None = None
         ffmpeg_failed_reason = None
-        if self.config.use_nvenc and self._ffmpeg:
             try:
                 ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
             except Exception as e:
@@ -641,7 +603,6 @@ def process_video(
                 cap.release()
                 raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
-        # Determine models and decide execution mode
         predictor = None
         matanyone = None
         try:
@@ -656,14 +617,13 @@ def process_video(
         except Exception as e:
             self.log.warning(f"MatAnyOne unavailable: {e}")
-        use_windowed = bool(self.config.use_windowed and predictor is not None and matanyone is not None)
         frame_count = 0
         start_time = time.time()
         try:
             if not use_windowed:
-                # --------- Legacy per-frame path (fallback) ----------
                 while True:
                     ret, frame_bgr = cap.read()
                     if not ret:
@@ -698,15 +658,15 @@ def process_video(
                     if progress_callback:
                         elapsed = time.time() - start_time
                         fps_live = frame_count / elapsed if elapsed > 0 else 0.0
-                        try: progress_callback(frame_count, total_frames, fps_live)
-                        except Exception: pass
             else:
-                # --------- Windowed two-phase path ----------
-                WINDOW = max(1, int(self.config.window_size))
                 while True:
-                    # Read a window of frames
                     frames_bgr: List[np.ndarray] = []
                     for _ in range(WINDOW):
                         ret, fr = cap.read()
@@ -715,26 +675,22 @@ def process_video(
                         frames_bgr.append(fr)
                     if not frames_bgr:
-                        break  # no more frames
                     if stop_event is not None and stop_event.is_set():
                         self.log.info("Processing stopped by user request.")
                         break
-                    # Model-only downscale frames for model work (consistent per window)
                     frames_small_bgr: List[np.ndarray] = []
                     scales: List[float] = []
                     for fr in frames_bgr:
                         fr_small, s = self._model_downscale(fr)
                         frames_small_bgr.append(fr_small)
                         scales.append(s)
-                    # Use the first scale (frames normally same size)
                     scale = scales[0] if scales else 1.0
-                    # Convert small frames to RGB for models
                     frames_small_rgb = [cv2.cvtColor(fb, cv2.COLOR_BGR2RGB) for fb in frames_small_bgr]
-                    # -------- SAM2 phase (prime with first frame's mask) --------
                     self._prepare_sam2_gpu(predictor)
                     try:
                         mask_small = segment_person_hq(frames_small_rgb[0], predictor, use_sam2=True)
@@ -742,10 +698,8 @@ def process_video(
                         self.log.warning(f"SAM2 segmentation error on window start: {e}")
                         mask_small = segment_person_hq(frames_small_rgb[0], None, use_sam2=False)
-                    # Release SAM2 GPU residency before MatAnyone phase
                     self._release_sam2_gpu(predictor)
-                    # -------- MatAnyone phase (prime + propagate) --------
                     if hasattr(matanyone, "reset"):
                         try:
                             matanyone.reset()
@@ -758,29 +712,25 @@ def process_video(
                                 m2d = mask_small
                                 if m2d.ndim == 3:
                                     m2d = m2d[..., 0]
-                                alpha_small = matanyone(fr_rgb_small, m2d)  # adapter returns float32 [h,w]
                             else:
-                                alpha_small = matanyone(fr_rgb_small)      # propagate (no mask)
-                            # Stabilize + harden at model scale
                             alpha_small = np.clip(alpha_small.astype(np.float32), 0.0, 1.0)
                             alpha_stable = self._stabilize(alpha_small)
                             alpha_harden = self._harden(alpha_stable)
-                            # Upsample back to full-res
                             if scale != 1.0:
                                 H, W = frames_bgr[j].shape[:2]
                                 alpha_full = cv2.resize(alpha_harden, (W, H), interpolation=cv2.INTER_LINEAR)
                             else:
                                 alpha_full = alpha_harden
-                            # Composite at full-res (expects RGB)
                             frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
                             out_rgb = replace_background_hq(frame_rgb_full, alpha_full, background_rgb)
                             out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
                             out_bgr = np.ascontiguousarray(out_bgr)
-                            # Write
                             if ffmpeg_pipe is not None:
                                 try:
                                     ffmpeg_pipe.write(out_bgr)
@@ -803,7 +753,6 @@ def process_video(
                             frame_count += 1
                         except Exception as e:
-                            # If MatAnyone fails, log and fall back to SAM-only for this frame
                             self.log.warning(f"MatAnyone failed at window frame {j}: {e}")
                             if j == 0:
                                 alpha_small_fb = np.clip(mask_small.astype(np.float32), 0.0, 1.0)
@@ -839,7 +788,6 @@ def process_video(
                                 writer.write(np.ascontiguousarray(out_bgr_fb))
                             frame_count += 1
-                        # Progress update
                         if progress_callback:
                             elapsed = time.time() - start_time
                             fps_live = frame_count / elapsed if elapsed > 0 else 0.0
@@ -848,7 +796,6 @@ def process_video(
                             except Exception:
                                 pass
-                    # Clean per-window buffers (CPU) and let CUDA defrag
                     del frames_bgr, frames_small_bgr, frames_small_rgb, mask_small
                     try:
                         import torch

     use_windowed: bool = True          # enable two-phase SAM2→MatAnyone per chunk
     window_size: int = 8               # frames per window
+# Back-compat alias used elsewhere in the app
 ProcessingConfig = ProcessorConfig
+def _env_bool(name: str, default: bool) -> bool:
+    v = os.environ.get(name, None)
+    if v is None:
+        return default
+    return str(v).strip().lower() not in ("0", "no", "false", "off", "")
+def _env_int(name: str, default: int) -> int:
+    try:
+        return int(os.environ.get(name, "").strip() or default)
+    except Exception:
+        return default
 class _FFmpegPipe:
     """
     Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
         if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
             raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
         frame_bgr = np.ascontiguousarray(frame_bgr)
         try:
             self.proc.stdin.write(frame_bgr.tobytes())
         except Exception as e:
             stderr = b""
             try:
                 if self.proc and self.proc.stderr:
                     self.proc.stdin.close()
                 except Exception:
                     pass
             if self.proc.stderr:
                 try:
                     err = self.proc.stderr.read()
     def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
         self.log = _log
         self.config = config or ProcessorConfig()
+        self.models = models
         if self.models is None:
             self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
         self._ffmpeg = shutil.which("ffmpeg")
+        # -------- Back-compat safe config flags (do not require attrs on user config)
+        self._use_windowed = _env_bool(
+            "MATANYONE_WINDOWED",
+            bool(getattr(self.config, "use_windowed", False)),
+        )
+        self._window_size = max(1, _env_int("MATANYONE_WINDOW", int(getattr(self.config, "window_size", 8))))
+        self._max_model_size = int(os.environ.get("MAX_MODEL_SIZE", getattr(self.config, "max_model_size", 1280) or 0)) or None
         # state for temporal smoothing
         self._prev_mask: Optional[np.ndarray] = None
+        # Legacy per-frame stateful chunking (used only if windowed=False)
         try:
             self._chunk_size = max(1, int(os.environ.get("MATANYONE_CHUNK", "12")))
         except Exception:
         return (inter / union) if union else 0.0
     def _harden(self, m: np.ndarray) -> np.ndarray:
+        g = float(getattr(self.config, "mask_gamma", 0.90))
         if abs(g - 1.0) > 1e-6:
             m = np.clip(m, 0, 1) ** g
+        lo = float(getattr(self.config, "hard_low", 0.35))
+        hi = float(getattr(self.config, "hard_high", 0.70))
         if hi > lo + 1e-6:
             m = (m - lo) / (hi - lo)
             m = np.clip(m, 0.0, 1.0)
+        k = int(getattr(self.config, "dilate_px", 6))
         if k > 0:
             se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*k+1, 2*k+1))
             m = cv2.dilate(m, se, iterations=1)
+        eb = int(getattr(self.config, "edge_blur_px", 1))
         if eb > 0:
             m = cv2.GaussianBlur(m, (2*eb+1, 2*eb+1), 0)
             self._prev_mask = m
             return m
+        thr = float(getattr(self.config, "min_iou_to_accept", 0.05))
+        if self._iou(self._prev_mask, m, 0.5) < thr:
             return self._prev_mask
+        a = float(getattr(self.config, "temporal_ema_alpha", 0.75))
         m_ema = a * self._prev_mask + (1.0 - a) * m
         self._prev_mask = m_ema
         return m_ema
     # ---------- Single frame (fallback path) ----------
     def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
         H, W = frame_bgr.shape[:2]
         max_side = max(H, W)
         scale = 1.0
         proc_frame_bgr = frame_bgr
         # Model-only downscale
+        mms = self._max_model_size
+        if mms and max_side > mms:
+            scale = mms / float(max_side)
             newW = int(round(W * scale))
             newH = int(round(H * scale))
             proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
             self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
         proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
         predictor = None
         except Exception as e:
             self.log.warning(f"SAM2 predictor unavailable: {e}")
         mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
         matanyone = None
         try:
             if self.models and hasattr(self.models, "get_matanyone"):
             except Exception:
                 pass
         mask_small_ref = refine_mask_hq(
             proc_frame_rgb,
             mask_small,
             matanyone=matanyone,
             use_matanyone=True,
+            frame_idx=self._chunk_idx,
         )
         self._chunk_idx = (self._chunk_idx + 1) % max(1, self._chunk_size)
         if self._chunk_idx == 0:
             try:
             except Exception:
                 pass
         mask_small_ref = np.clip(mask_small_ref.astype(np.float32), 0.0, 1.0)
         mask_stable = self._stabilize(mask_small_ref)
         mask_stable = self._harden(mask_stable)
         if scale != 1.0:
             mask_full = cv2.resize(mask_stable, (W, H), interpolation=cv2.INTER_LINEAR)
         else:
             mask_full = mask_stable
         frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
         out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
         out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
         return {"frame": out_bgr, "mask": mask_full}
     # ---------- Build background once per video ----------
     def _prepare_background_from_config(
+        self, bg_config: Optional[Dict[str, Any]], width: int, height: int
     ) -> np.ndarray:
         if bg_config and bg_config.get("custom_path"):
             path = bg_config["custom_path"]
             img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
                 img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
                 return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
         if bg_config and isinstance(bg_config.get("gradient"), dict):
             try:
                 return _create_gradient_background_local(bg_config["gradient"], width, height)
             except Exception as e:
                 self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
         choice = None
         if bg_config and "background_choice" in bg_config:
             choice = bg_config["background_choice"]
         if not choice:
+            choice = getattr(self.config, "background_preset", "office")
         if choice not in PROFESSIONAL_BACKGROUNDS:
             self.log.warning(f"Unknown background preset '{choice}'; using 'office'.")
     # ---------- Windowed two-phase helpers ----------
     def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
         H, W = frame_bgr.shape[:2]
         max_side = max(H, W)
+        mms = self._max_model_size
+        if mms and max_side > mms:
+            s = mms / float(max_side)
             newW = int(round(W * s))
             newH = int(round(H * s))
             small = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
         return frame_bgr, 1.0
     def _prepare_sam2_gpu(self, predictor):
         try:
+            import torch
             if predictor is None or not torch.cuda.is_available():
                 return
             if hasattr(predictor, "to"):
                 try:
                     predictor.to("cuda")  # type: ignore[attr-defined]
             pass
     def _release_sam2_gpu(self, predictor):
         try:
             if predictor is None:
                 return
             for name in ("reset_image", "release_image", "clear_image", "clear_state"):
                 if hasattr(predictor, name) and callable(getattr(predictor, name)):
                     try:
                         getattr(predictor, name)()
                     except Exception:
                         pass
             for name in ("to", "cpu"):
                 if hasattr(predictor, name):
                     try:
         progress_callback: Optional[Callable[[int, int, float], None]] = None,
         stop_event: Optional[threading.Event] = None
     ) -> Dict[str, Any]:
         ok, msg = validate_video_file(input_path)
         if not ok:
             raise ValueError(f"Invalid or unreadable video: {msg}")
         fps    = cap.get(cv2.CAP_PROP_FPS)
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps_out = getattr(self.config, "write_fps", None) or (fps if fps and fps > 0 else 25.0)
         background_rgb = self._prepare_background_from_config(bg_config, width, height)
         self._prev_mask = None
         ffmpeg_pipe: _FFmpegPipe | None = None
         writer: cv2.VideoWriter | None = None
         ffmpeg_failed_reason = None
+        if getattr(self.config, "use_nvenc", True) and shutil.which("ffmpeg"):
             try:
                 ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
             except Exception as e:
                 cap.release()
                 raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
         predictor = None
         matanyone = None
         try:
         except Exception as e:
             self.log.warning(f"MatAnyOne unavailable: {e}")
+        use_windowed = bool(self._use_windowed and predictor is not None and matanyone is not None)
         frame_count = 0
         start_time = time.time()
         try:
             if not use_windowed:
                 while True:
                     ret, frame_bgr = cap.read()
                     if not ret:
                     if progress_callback:
                         elapsed = time.time() - start_time
                         fps_live = frame_count / elapsed if elapsed > 0 else 0.0
+                        try:
+                            progress_callback(frame_count, total_frames, fps_live)
+                        except Exception:
+                            pass
             else:
+                WINDOW = max(1, int(self._window_size))
                 while True:
                     frames_bgr: List[np.ndarray] = []
                     for _ in range(WINDOW):
                         ret, fr = cap.read()
                         frames_bgr.append(fr)
                     if not frames_bgr:
+                        break
                     if stop_event is not None and stop_event.is_set():
                         self.log.info("Processing stopped by user request.")
                         break
                     frames_small_bgr: List[np.ndarray] = []
                     scales: List[float] = []
                     for fr in frames_bgr:
                         fr_small, s = self._model_downscale(fr)
                         frames_small_bgr.append(fr_small)
                         scales.append(s)
                     scale = scales[0] if scales else 1.0
                     frames_small_rgb = [cv2.cvtColor(fb, cv2.COLOR_BGR2RGB) for fb in frames_small_bgr]
                     self._prepare_sam2_gpu(predictor)
                     try:
                         mask_small = segment_person_hq(frames_small_rgb[0], predictor, use_sam2=True)
                         self.log.warning(f"SAM2 segmentation error on window start: {e}")
                         mask_small = segment_person_hq(frames_small_rgb[0], None, use_sam2=False)
                     self._release_sam2_gpu(predictor)
                     if hasattr(matanyone, "reset"):
                         try:
                             matanyone.reset()
                                 m2d = mask_small
                                 if m2d.ndim == 3:
                                     m2d = m2d[..., 0]
+                                alpha_small = matanyone(fr_rgb_small, m2d)
                             else:
+                                alpha_small = matanyone(fr_rgb_small)
                             alpha_small = np.clip(alpha_small.astype(np.float32), 0.0, 1.0)
                             alpha_stable = self._stabilize(alpha_small)
                             alpha_harden = self._harden(alpha_stable)
                             if scale != 1.0:
                                 H, W = frames_bgr[j].shape[:2]
                                 alpha_full = cv2.resize(alpha_harden, (W, H), interpolation=cv2.INTER_LINEAR)
                             else:
                                 alpha_full = alpha_harden
                             frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
                             out_rgb = replace_background_hq(frame_rgb_full, alpha_full, background_rgb)
                             out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
                             out_bgr = np.ascontiguousarray(out_bgr)
                             if ffmpeg_pipe is not None:
                                 try:
                                     ffmpeg_pipe.write(out_bgr)
                             frame_count += 1
                         except Exception as e:
                             self.log.warning(f"MatAnyone failed at window frame {j}: {e}")
                             if j == 0:
                                 alpha_small_fb = np.clip(mask_small.astype(np.float32), 0.0, 1.0)
                                 writer.write(np.ascontiguousarray(out_bgr_fb))
                             frame_count += 1
                         if progress_callback:
                             elapsed = time.time() - start_time
                             fps_live = frame_count / elapsed if elapsed > 0 else 0.0
                             except Exception:
                                 pass
                     del frames_bgr, frames_small_bgr, frames_small_rgb, mask_small
                     try:
                         import torch