MogensR commited on
Commit
e21220a
·
1 Parent(s): 31fec2e

Update processing/video/video_processor.py

Browse files
Files changed (1) hide show
  1. processing/video/video_processor.py +54 -107
processing/video/video_processor.py CHANGED
@@ -126,10 +126,24 @@ class ProcessorConfig:
126
  use_windowed: bool = True # enable two-phase SAM2→MatAnyone per chunk
127
  window_size: int = 8 # frames per window
128
 
129
- # Back-compat name used elsewhere in the app
130
  ProcessingConfig = ProcessorConfig
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  class _FFmpegPipe:
134
  """
135
  Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
@@ -254,12 +268,10 @@ def write(self, frame_bgr: np.ndarray):
254
  if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
255
  raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
256
 
257
- # ensure contiguous for tobytes()
258
  frame_bgr = np.ascontiguousarray(frame_bgr)
259
  try:
260
  self.proc.stdin.write(frame_bgr.tobytes())
261
  except Exception as e:
262
- # collect stderr for diagnostics
263
  stderr = b""
264
  try:
265
  if self.proc and self.proc.stderr:
@@ -284,7 +296,6 @@ def close(self):
284
  self.proc.stdin.close()
285
  except Exception:
286
  pass
287
- # drain a bit of stderr for logs
288
  if self.proc.stderr:
289
  try:
290
  err = self.proc.stderr.read()
@@ -316,26 +327,23 @@ class CoreVideoProcessor:
316
  def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
317
  self.log = _log
318
  self.config = config or ProcessorConfig()
319
- self.models = models # do NOT load here; core/app handles loading
320
  if self.models is None:
321
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
322
  self._ffmpeg = shutil.which("ffmpeg")
323
 
 
 
 
 
 
 
 
 
324
  # state for temporal smoothing
325
  self._prev_mask: Optional[np.ndarray] = None
326
 
327
- # --- ENV overrides (tunable without code change) ---
328
- try:
329
- if "MATANYONE_WINDOWED" in os.environ:
330
- self.config.use_windowed = os.environ["MATANYONE_WINDOWED"].strip().lower() not in ("0", "false", "no")
331
- if "MATANYONE_WINDOW" in os.environ:
332
- self.config.window_size = max(1, int(os.environ["MATANYONE_WINDOW"]))
333
- if "MAX_MODEL_SIZE" in os.environ:
334
- self.config.max_model_size = max(0, int(os.environ["MAX_MODEL_SIZE"]))
335
- except Exception:
336
- pass
337
-
338
- # Legacy per-frame stateful chunking (used only if use_windowed=False)
339
  try:
340
  self._chunk_size = max(1, int(os.environ.get("MATANYONE_CHUNK", "12")))
341
  except Exception:
@@ -351,24 +359,22 @@ def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
351
  return (inter / union) if union else 0.0
352
 
353
  def _harden(self, m: np.ndarray) -> np.ndarray:
354
- # optional gamma
355
- g = float(self.config.mask_gamma)
356
  if abs(g - 1.0) > 1e-6:
357
  m = np.clip(m, 0, 1) ** g
358
 
359
- lo = float(self.config.hard_low)
360
- hi = float(self.config.hard_high)
361
  if hi > lo + 1e-6:
362
  m = (m - lo) / (hi - lo)
363
  m = np.clip(m, 0.0, 1.0)
364
 
365
- # pad edges then tiny blur
366
- k = int(self.config.dilate_px)
367
  if k > 0:
368
  se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*k+1, 2*k+1))
369
  m = cv2.dilate(m, se, iterations=1)
370
 
371
- eb = int(self.config.edge_blur_px)
372
  if eb > 0:
373
  m = cv2.GaussianBlur(m, (2*eb+1, 2*eb+1), 0)
374
 
@@ -379,42 +385,31 @@ def _stabilize(self, m: np.ndarray) -> np.ndarray:
379
  self._prev_mask = m
380
  return m
381
 
382
- # outlier rejection
383
- if self._iou(self._prev_mask, m, 0.5) < float(self.config.min_iou_to_accept):
384
- # ignore this frame's mask → keep previous
385
  return self._prev_mask
386
 
387
- # EMA
388
- a = float(self.config.temporal_ema_alpha)
389
  m_ema = a * self._prev_mask + (1.0 - a) * m
390
  self._prev_mask = m_ema
391
  return m_ema
392
 
393
  # ---------- Single frame (fallback path) ----------
394
  def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
395
- """
396
- Process one frame (legacy per-frame path):
397
- - optionally downscale for model work,
398
- - segment + refine,
399
- - temporal stabilize + harden,
400
- - upsample mask,
401
- - composite full-res.
402
- Returns dict with composited frame (BGR for writer) and mask (H,W float).
403
- """
404
  H, W = frame_bgr.shape[:2]
405
  max_side = max(H, W)
406
  scale = 1.0
407
  proc_frame_bgr = frame_bgr
408
 
409
  # Model-only downscale
410
- if self.config.max_model_size and max_side > self.config.max_model_size:
411
- scale = self.config.max_model_size / float(max_side)
 
412
  newW = int(round(W * scale))
413
  newH = int(round(H * scale))
414
  proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
415
  self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
416
 
417
- # RGB for models
418
  proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
419
 
420
  predictor = None
@@ -424,10 +419,8 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
424
  except Exception as e:
425
  self.log.warning(f"SAM2 predictor unavailable: {e}")
426
 
427
- # 1) segmentation (with internal fallbacks)
428
  mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
429
 
430
- # 2) refinement (MatAnyOne if available) — stateful chunking
431
  matanyone = None
432
  try:
433
  if self.models and hasattr(self.models, "get_matanyone"):
@@ -441,16 +434,14 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
441
  except Exception:
442
  pass
443
 
444
- # IMPORTANT: call order is (frame, mask, matanyone=...)
445
  mask_small_ref = refine_mask_hq(
446
  proc_frame_rgb,
447
  mask_small,
448
  matanyone=matanyone,
449
  use_matanyone=True,
450
- frame_idx=self._chunk_idx, # enable stateful first-frame + propagate
451
  )
452
 
453
- # advance chunk + optional defrag
454
  self._chunk_idx = (self._chunk_idx + 1) % max(1, self._chunk_size)
455
  if self._chunk_idx == 0:
456
  try:
@@ -460,40 +451,25 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
460
  except Exception:
461
  pass
462
 
463
- # Stabilize + harden at model scale
464
  mask_small_ref = np.clip(mask_small_ref.astype(np.float32), 0.0, 1.0)
465
  mask_stable = self._stabilize(mask_small_ref)
466
  mask_stable = self._harden(mask_stable)
467
 
468
- # Upsample mask back to full-res
469
  if scale != 1.0:
470
  mask_full = cv2.resize(mask_stable, (W, H), interpolation=cv2.INTER_LINEAR)
471
  else:
472
  mask_full = mask_stable
473
 
474
- # 3) compositing (helpers expect RGB inputs; return RGB)
475
  frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
476
  out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
477
 
478
- # Convert to BGR for writer
479
  out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
480
  return {"frame": out_bgr, "mask": mask_full}
481
 
482
  # ---------- Build background once per video ----------
483
  def _prepare_background_from_config(
484
- self,
485
- bg_config: Optional[Dict[str, Any]],
486
- width: int,
487
- height: int
488
  ) -> np.ndarray:
489
- """
490
- Accepts either:
491
- - {"custom_path": "/path/to/image.png"} → load image (RGB out)
492
- - {"background_choice": "office"} → preset
493
- - {"gradient": {type,start,end,angle_deg}} → generated gradient
494
- Returns RGB np.uint8
495
- """
496
- # 1) custom image?
497
  if bg_config and bg_config.get("custom_path"):
498
  path = bg_config["custom_path"]
499
  img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
@@ -503,19 +479,17 @@ def _prepare_background_from_config(
503
  img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
504
  return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
505
 
506
- # 2) gradient?
507
  if bg_config and isinstance(bg_config.get("gradient"), dict):
508
  try:
509
  return _create_gradient_background_local(bg_config["gradient"], width, height)
510
  except Exception as e:
511
  self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
512
 
513
- # 3) preset (explicit choice or default)
514
  choice = None
515
  if bg_config and "background_choice" in bg_config:
516
  choice = bg_config["background_choice"]
517
  if not choice:
518
- choice = self.config.background_preset
519
 
520
  if choice not in PROFESSIONAL_BACKGROUNDS:
521
  self.log.warning(f"Unknown background preset '{choice}'; using 'office'.")
@@ -525,11 +499,11 @@ def _prepare_background_from_config(
525
 
526
  # ---------- Windowed two-phase helpers ----------
527
  def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
528
- """Apply model-only downscale; return (resized_bgr, scale)."""
529
  H, W = frame_bgr.shape[:2]
530
  max_side = max(H, W)
531
- if self.config.max_model_size and max_side > self.config.max_model_size:
532
- s = self.config.max_model_size / float(max_side)
 
533
  newW = int(round(W * s))
534
  newH = int(round(H * s))
535
  small = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
@@ -537,12 +511,10 @@ def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
537
  return frame_bgr, 1.0
538
 
539
  def _prepare_sam2_gpu(self, predictor):
540
- """Best-effort: ensure SAM2 is on CUDA before SAM2 phase."""
541
  try:
542
- import torch # local import to avoid hard dependency at import-time
543
  if predictor is None or not torch.cuda.is_available():
544
  return
545
- # Try common patterns
546
  if hasattr(predictor, "to"):
547
  try:
548
  predictor.to("cuda") # type: ignore[attr-defined]
@@ -558,18 +530,15 @@ def _prepare_sam2_gpu(self, predictor):
558
  pass
559
 
560
  def _release_sam2_gpu(self, predictor):
561
- """Best-effort release of SAM2 GPU residency between phases."""
562
  try:
563
  if predictor is None:
564
  return
565
- # Clear any sticky per-image state if exposed
566
  for name in ("reset_image", "release_image", "clear_image", "clear_state"):
567
  if hasattr(predictor, name) and callable(getattr(predictor, name)):
568
  try:
569
  getattr(predictor, name)()
570
  except Exception:
571
  pass
572
- # Try moving large parts off-GPU (best-effort, may be no-op)
573
  for name in ("to", "cpu"):
574
  if hasattr(predictor, name):
575
  try:
@@ -597,10 +566,6 @@ def process_video(
597
  progress_callback: Optional[Callable[[int, int, float], None]] = None,
598
  stop_event: Optional[threading.Event] = None
599
  ) -> Dict[str, Any]:
600
- """
601
- Process a full video with live progress and optional cancel.
602
- progress_callback(current_frame, total_frames, fps_live)
603
- """
604
  ok, msg = validate_video_file(input_path)
605
  if not ok:
606
  raise ValueError(f"Invalid or unreadable video: {msg}")
@@ -614,20 +579,17 @@ def process_video(
614
  fps = cap.get(cv2.CAP_PROP_FPS)
615
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
616
 
617
- fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
618
 
619
- # Background once (RGB)
620
  background_rgb = self._prepare_background_from_config(bg_config, width, height)
621
 
622
- # reset temporal state for a new video
623
  self._prev_mask = None
624
 
625
- # Writer selection
626
  ffmpeg_pipe: _FFmpegPipe | None = None
627
  writer: cv2.VideoWriter | None = None
628
  ffmpeg_failed_reason = None
629
 
630
- if self.config.use_nvenc and self._ffmpeg:
631
  try:
632
  ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
633
  except Exception as e:
@@ -641,7 +603,6 @@ def process_video(
641
  cap.release()
642
  raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
643
 
644
- # Determine models and decide execution mode
645
  predictor = None
646
  matanyone = None
647
  try:
@@ -656,14 +617,13 @@ def process_video(
656
  except Exception as e:
657
  self.log.warning(f"MatAnyOne unavailable: {e}")
658
 
659
- use_windowed = bool(self.config.use_windowed and predictor is not None and matanyone is not None)
660
 
661
  frame_count = 0
662
  start_time = time.time()
663
 
664
  try:
665
  if not use_windowed:
666
- # --------- Legacy per-frame path (fallback) ----------
667
  while True:
668
  ret, frame_bgr = cap.read()
669
  if not ret:
@@ -698,15 +658,15 @@ def process_video(
698
  if progress_callback:
699
  elapsed = time.time() - start_time
700
  fps_live = frame_count / elapsed if elapsed > 0 else 0.0
701
- try: progress_callback(frame_count, total_frames, fps_live)
702
- except Exception: pass
 
 
703
 
704
  else:
705
- # --------- Windowed two-phase path ----------
706
- WINDOW = max(1, int(self.config.window_size))
707
 
708
  while True:
709
- # Read a window of frames
710
  frames_bgr: List[np.ndarray] = []
711
  for _ in range(WINDOW):
712
  ret, fr = cap.read()
@@ -715,26 +675,22 @@ def process_video(
715
  frames_bgr.append(fr)
716
 
717
  if not frames_bgr:
718
- break # no more frames
719
 
720
  if stop_event is not None and stop_event.is_set():
721
  self.log.info("Processing stopped by user request.")
722
  break
723
 
724
- # Model-only downscale frames for model work (consistent per window)
725
  frames_small_bgr: List[np.ndarray] = []
726
  scales: List[float] = []
727
  for fr in frames_bgr:
728
  fr_small, s = self._model_downscale(fr)
729
  frames_small_bgr.append(fr_small)
730
  scales.append(s)
731
- # Use the first scale (frames normally same size)
732
  scale = scales[0] if scales else 1.0
733
 
734
- # Convert small frames to RGB for models
735
  frames_small_rgb = [cv2.cvtColor(fb, cv2.COLOR_BGR2RGB) for fb in frames_small_bgr]
736
 
737
- # -------- SAM2 phase (prime with first frame's mask) --------
738
  self._prepare_sam2_gpu(predictor)
739
  try:
740
  mask_small = segment_person_hq(frames_small_rgb[0], predictor, use_sam2=True)
@@ -742,10 +698,8 @@ def process_video(
742
  self.log.warning(f"SAM2 segmentation error on window start: {e}")
743
  mask_small = segment_person_hq(frames_small_rgb[0], None, use_sam2=False)
744
 
745
- # Release SAM2 GPU residency before MatAnyone phase
746
  self._release_sam2_gpu(predictor)
747
 
748
- # -------- MatAnyone phase (prime + propagate) --------
749
  if hasattr(matanyone, "reset"):
750
  try:
751
  matanyone.reset()
@@ -758,29 +712,25 @@ def process_video(
758
  m2d = mask_small
759
  if m2d.ndim == 3:
760
  m2d = m2d[..., 0]
761
- alpha_small = matanyone(fr_rgb_small, m2d) # adapter returns float32 [h,w]
762
  else:
763
- alpha_small = matanyone(fr_rgb_small) # propagate (no mask)
764
 
765
- # Stabilize + harden at model scale
766
  alpha_small = np.clip(alpha_small.astype(np.float32), 0.0, 1.0)
767
  alpha_stable = self._stabilize(alpha_small)
768
  alpha_harden = self._harden(alpha_stable)
769
 
770
- # Upsample back to full-res
771
  if scale != 1.0:
772
  H, W = frames_bgr[j].shape[:2]
773
  alpha_full = cv2.resize(alpha_harden, (W, H), interpolation=cv2.INTER_LINEAR)
774
  else:
775
  alpha_full = alpha_harden
776
 
777
- # Composite at full-res (expects RGB)
778
  frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
779
  out_rgb = replace_background_hq(frame_rgb_full, alpha_full, background_rgb)
780
  out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
781
  out_bgr = np.ascontiguousarray(out_bgr)
782
 
783
- # Write
784
  if ffmpeg_pipe is not None:
785
  try:
786
  ffmpeg_pipe.write(out_bgr)
@@ -803,7 +753,6 @@ def process_video(
803
  frame_count += 1
804
 
805
  except Exception as e:
806
- # If MatAnyone fails, log and fall back to SAM-only for this frame
807
  self.log.warning(f"MatAnyone failed at window frame {j}: {e}")
808
  if j == 0:
809
  alpha_small_fb = np.clip(mask_small.astype(np.float32), 0.0, 1.0)
@@ -839,7 +788,6 @@ def process_video(
839
  writer.write(np.ascontiguousarray(out_bgr_fb))
840
  frame_count += 1
841
 
842
- # Progress update
843
  if progress_callback:
844
  elapsed = time.time() - start_time
845
  fps_live = frame_count / elapsed if elapsed > 0 else 0.0
@@ -848,7 +796,6 @@ def process_video(
848
  except Exception:
849
  pass
850
 
851
- # Clean per-window buffers (CPU) and let CUDA defrag
852
  del frames_bgr, frames_small_bgr, frames_small_rgb, mask_small
853
  try:
854
  import torch
 
126
  use_windowed: bool = True # enable two-phase SAM2→MatAnyone per chunk
127
  window_size: int = 8 # frames per window
128
 
129
+ # Back-compat alias used elsewhere in the app
130
  ProcessingConfig = ProcessorConfig
131
 
132
 
133
+ def _env_bool(name: str, default: bool) -> bool:
134
+ v = os.environ.get(name, None)
135
+ if v is None:
136
+ return default
137
+ return str(v).strip().lower() not in ("0", "no", "false", "off", "")
138
+
139
+
140
+ def _env_int(name: str, default: int) -> int:
141
+ try:
142
+ return int(os.environ.get(name, "").strip() or default)
143
+ except Exception:
144
+ return default
145
+
146
+
147
  class _FFmpegPipe:
148
  """
149
  Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
 
268
  if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
269
  raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
270
 
 
271
  frame_bgr = np.ascontiguousarray(frame_bgr)
272
  try:
273
  self.proc.stdin.write(frame_bgr.tobytes())
274
  except Exception as e:
 
275
  stderr = b""
276
  try:
277
  if self.proc and self.proc.stderr:
 
296
  self.proc.stdin.close()
297
  except Exception:
298
  pass
 
299
  if self.proc.stderr:
300
  try:
301
  err = self.proc.stderr.read()
 
327
  def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
328
  self.log = _log
329
  self.config = config or ProcessorConfig()
330
+ self.models = models
331
  if self.models is None:
332
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
333
  self._ffmpeg = shutil.which("ffmpeg")
334
 
335
+ # -------- Back-compat safe config flags (do not require attrs on user config)
336
+ self._use_windowed = _env_bool(
337
+ "MATANYONE_WINDOWED",
338
+ bool(getattr(self.config, "use_windowed", False)),
339
+ )
340
+ self._window_size = max(1, _env_int("MATANYONE_WINDOW", int(getattr(self.config, "window_size", 8))))
341
+ self._max_model_size = int(os.environ.get("MAX_MODEL_SIZE", getattr(self.config, "max_model_size", 1280) or 0)) or None
342
+
343
  # state for temporal smoothing
344
  self._prev_mask: Optional[np.ndarray] = None
345
 
346
+ # Legacy per-frame stateful chunking (used only if windowed=False)
 
 
 
 
 
 
 
 
 
 
 
347
  try:
348
  self._chunk_size = max(1, int(os.environ.get("MATANYONE_CHUNK", "12")))
349
  except Exception:
 
359
  return (inter / union) if union else 0.0
360
 
361
  def _harden(self, m: np.ndarray) -> np.ndarray:
362
+ g = float(getattr(self.config, "mask_gamma", 0.90))
 
363
  if abs(g - 1.0) > 1e-6:
364
  m = np.clip(m, 0, 1) ** g
365
 
366
+ lo = float(getattr(self.config, "hard_low", 0.35))
367
+ hi = float(getattr(self.config, "hard_high", 0.70))
368
  if hi > lo + 1e-6:
369
  m = (m - lo) / (hi - lo)
370
  m = np.clip(m, 0.0, 1.0)
371
 
372
+ k = int(getattr(self.config, "dilate_px", 6))
 
373
  if k > 0:
374
  se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*k+1, 2*k+1))
375
  m = cv2.dilate(m, se, iterations=1)
376
 
377
+ eb = int(getattr(self.config, "edge_blur_px", 1))
378
  if eb > 0:
379
  m = cv2.GaussianBlur(m, (2*eb+1, 2*eb+1), 0)
380
 
 
385
  self._prev_mask = m
386
  return m
387
 
388
+ thr = float(getattr(self.config, "min_iou_to_accept", 0.05))
389
+ if self._iou(self._prev_mask, m, 0.5) < thr:
 
390
  return self._prev_mask
391
 
392
+ a = float(getattr(self.config, "temporal_ema_alpha", 0.75))
 
393
  m_ema = a * self._prev_mask + (1.0 - a) * m
394
  self._prev_mask = m_ema
395
  return m_ema
396
 
397
  # ---------- Single frame (fallback path) ----------
398
  def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
399
  H, W = frame_bgr.shape[:2]
400
  max_side = max(H, W)
401
  scale = 1.0
402
  proc_frame_bgr = frame_bgr
403
 
404
  # Model-only downscale
405
+ mms = self._max_model_size
406
+ if mms and max_side > mms:
407
+ scale = mms / float(max_side)
408
  newW = int(round(W * scale))
409
  newH = int(round(H * scale))
410
  proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
411
  self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
412
 
 
413
  proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
414
 
415
  predictor = None
 
419
  except Exception as e:
420
  self.log.warning(f"SAM2 predictor unavailable: {e}")
421
 
 
422
  mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
423
 
 
424
  matanyone = None
425
  try:
426
  if self.models and hasattr(self.models, "get_matanyone"):
 
434
  except Exception:
435
  pass
436
 
 
437
  mask_small_ref = refine_mask_hq(
438
  proc_frame_rgb,
439
  mask_small,
440
  matanyone=matanyone,
441
  use_matanyone=True,
442
+ frame_idx=self._chunk_idx,
443
  )
444
 
 
445
  self._chunk_idx = (self._chunk_idx + 1) % max(1, self._chunk_size)
446
  if self._chunk_idx == 0:
447
  try:
 
451
  except Exception:
452
  pass
453
 
 
454
  mask_small_ref = np.clip(mask_small_ref.astype(np.float32), 0.0, 1.0)
455
  mask_stable = self._stabilize(mask_small_ref)
456
  mask_stable = self._harden(mask_stable)
457
 
 
458
  if scale != 1.0:
459
  mask_full = cv2.resize(mask_stable, (W, H), interpolation=cv2.INTER_LINEAR)
460
  else:
461
  mask_full = mask_stable
462
 
 
463
  frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
464
  out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
465
 
 
466
  out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
467
  return {"frame": out_bgr, "mask": mask_full}
468
 
469
  # ---------- Build background once per video ----------
470
  def _prepare_background_from_config(
471
+ self, bg_config: Optional[Dict[str, Any]], width: int, height: int
 
 
 
472
  ) -> np.ndarray:
 
 
 
 
 
 
 
 
473
  if bg_config and bg_config.get("custom_path"):
474
  path = bg_config["custom_path"]
475
  img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
 
479
  img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
480
  return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
481
 
 
482
  if bg_config and isinstance(bg_config.get("gradient"), dict):
483
  try:
484
  return _create_gradient_background_local(bg_config["gradient"], width, height)
485
  except Exception as e:
486
  self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
487
 
 
488
  choice = None
489
  if bg_config and "background_choice" in bg_config:
490
  choice = bg_config["background_choice"]
491
  if not choice:
492
+ choice = getattr(self.config, "background_preset", "office")
493
 
494
  if choice not in PROFESSIONAL_BACKGROUNDS:
495
  self.log.warning(f"Unknown background preset '{choice}'; using 'office'.")
 
499
 
500
  # ---------- Windowed two-phase helpers ----------
501
  def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
 
502
  H, W = frame_bgr.shape[:2]
503
  max_side = max(H, W)
504
+ mms = self._max_model_size
505
+ if mms and max_side > mms:
506
+ s = mms / float(max_side)
507
  newW = int(round(W * s))
508
  newH = int(round(H * s))
509
  small = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
 
511
  return frame_bgr, 1.0
512
 
513
  def _prepare_sam2_gpu(self, predictor):
 
514
  try:
515
+ import torch
516
  if predictor is None or not torch.cuda.is_available():
517
  return
 
518
  if hasattr(predictor, "to"):
519
  try:
520
  predictor.to("cuda") # type: ignore[attr-defined]
 
530
  pass
531
 
532
  def _release_sam2_gpu(self, predictor):
 
533
  try:
534
  if predictor is None:
535
  return
 
536
  for name in ("reset_image", "release_image", "clear_image", "clear_state"):
537
  if hasattr(predictor, name) and callable(getattr(predictor, name)):
538
  try:
539
  getattr(predictor, name)()
540
  except Exception:
541
  pass
 
542
  for name in ("to", "cpu"):
543
  if hasattr(predictor, name):
544
  try:
 
566
  progress_callback: Optional[Callable[[int, int, float], None]] = None,
567
  stop_event: Optional[threading.Event] = None
568
  ) -> Dict[str, Any]:
 
 
 
 
569
  ok, msg = validate_video_file(input_path)
570
  if not ok:
571
  raise ValueError(f"Invalid or unreadable video: {msg}")
 
579
  fps = cap.get(cv2.CAP_PROP_FPS)
580
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
581
 
582
+ fps_out = getattr(self.config, "write_fps", None) or (fps if fps and fps > 0 else 25.0)
583
 
 
584
  background_rgb = self._prepare_background_from_config(bg_config, width, height)
585
 
 
586
  self._prev_mask = None
587
 
 
588
  ffmpeg_pipe: _FFmpegPipe | None = None
589
  writer: cv2.VideoWriter | None = None
590
  ffmpeg_failed_reason = None
591
 
592
+ if getattr(self.config, "use_nvenc", True) and shutil.which("ffmpeg"):
593
  try:
594
  ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
595
  except Exception as e:
 
603
  cap.release()
604
  raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
605
 
 
606
  predictor = None
607
  matanyone = None
608
  try:
 
617
  except Exception as e:
618
  self.log.warning(f"MatAnyOne unavailable: {e}")
619
 
620
+ use_windowed = bool(self._use_windowed and predictor is not None and matanyone is not None)
621
 
622
  frame_count = 0
623
  start_time = time.time()
624
 
625
  try:
626
  if not use_windowed:
 
627
  while True:
628
  ret, frame_bgr = cap.read()
629
  if not ret:
 
658
  if progress_callback:
659
  elapsed = time.time() - start_time
660
  fps_live = frame_count / elapsed if elapsed > 0 else 0.0
661
+ try:
662
+ progress_callback(frame_count, total_frames, fps_live)
663
+ except Exception:
664
+ pass
665
 
666
  else:
667
+ WINDOW = max(1, int(self._window_size))
 
668
 
669
  while True:
 
670
  frames_bgr: List[np.ndarray] = []
671
  for _ in range(WINDOW):
672
  ret, fr = cap.read()
 
675
  frames_bgr.append(fr)
676
 
677
  if not frames_bgr:
678
+ break
679
 
680
  if stop_event is not None and stop_event.is_set():
681
  self.log.info("Processing stopped by user request.")
682
  break
683
 
 
684
  frames_small_bgr: List[np.ndarray] = []
685
  scales: List[float] = []
686
  for fr in frames_bgr:
687
  fr_small, s = self._model_downscale(fr)
688
  frames_small_bgr.append(fr_small)
689
  scales.append(s)
 
690
  scale = scales[0] if scales else 1.0
691
 
 
692
  frames_small_rgb = [cv2.cvtColor(fb, cv2.COLOR_BGR2RGB) for fb in frames_small_bgr]
693
 
 
694
  self._prepare_sam2_gpu(predictor)
695
  try:
696
  mask_small = segment_person_hq(frames_small_rgb[0], predictor, use_sam2=True)
 
698
  self.log.warning(f"SAM2 segmentation error on window start: {e}")
699
  mask_small = segment_person_hq(frames_small_rgb[0], None, use_sam2=False)
700
 
 
701
  self._release_sam2_gpu(predictor)
702
 
 
703
  if hasattr(matanyone, "reset"):
704
  try:
705
  matanyone.reset()
 
712
  m2d = mask_small
713
  if m2d.ndim == 3:
714
  m2d = m2d[..., 0]
715
+ alpha_small = matanyone(fr_rgb_small, m2d)
716
  else:
717
+ alpha_small = matanyone(fr_rgb_small)
718
 
 
719
  alpha_small = np.clip(alpha_small.astype(np.float32), 0.0, 1.0)
720
  alpha_stable = self._stabilize(alpha_small)
721
  alpha_harden = self._harden(alpha_stable)
722
 
 
723
  if scale != 1.0:
724
  H, W = frames_bgr[j].shape[:2]
725
  alpha_full = cv2.resize(alpha_harden, (W, H), interpolation=cv2.INTER_LINEAR)
726
  else:
727
  alpha_full = alpha_harden
728
 
 
729
  frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
730
  out_rgb = replace_background_hq(frame_rgb_full, alpha_full, background_rgb)
731
  out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
732
  out_bgr = np.ascontiguousarray(out_bgr)
733
 
 
734
  if ffmpeg_pipe is not None:
735
  try:
736
  ffmpeg_pipe.write(out_bgr)
 
753
  frame_count += 1
754
 
755
  except Exception as e:
 
756
  self.log.warning(f"MatAnyone failed at window frame {j}: {e}")
757
  if j == 0:
758
  alpha_small_fb = np.clip(mask_small.astype(np.float32), 0.0, 1.0)
 
788
  writer.write(np.ascontiguousarray(out_bgr_fb))
789
  frame_count += 1
790
 
 
791
  if progress_callback:
792
  elapsed = time.time() - start_time
793
  fps_live = frame_count / elapsed if elapsed > 0 else 0.0
 
796
  except Exception:
797
  pass
798
 
 
799
  del frames_bgr, frames_small_bgr, frames_small_rgb, mask_small
800
  try:
801
  import torch