MogensR commited on
Commit
b8dd531
·
1 Parent(s): 4f1de42

Update processing/video/video_processor.py

Browse files
Files changed (1) hide show
  1. processing/video/video_processor.py +259 -39
processing/video/video_processor.py CHANGED
@@ -10,6 +10,8 @@
10
  - FFmpeg pipe writer with encoder fallbacks and stderr surfacing; falls back
11
  to OpenCV VideoWriter if FFmpeg isn't available or fails mid-run.
12
  - Temporal smoothing + mask hardening to avoid flicker/ghosting.
 
 
13
 
14
  Requirements for the models provider:
15
  - get_sam2() -> predictor or None
@@ -19,7 +21,7 @@
19
  from __future__ import annotations
20
 
21
  from dataclasses import dataclass
22
- from typing import Optional, Dict, Any, Callable
23
  import time
24
  import threading
25
  import shutil
@@ -119,6 +121,10 @@ class ProcessorConfig:
119
  hard_high: float = 0.70 # values above -> 1
120
  mask_gamma: float = 0.90 # <1 boosts mid-tones slightly
121
 
 
 
 
 
122
 
123
  class _FFmpegPipe:
124
  """
@@ -362,10 +368,10 @@ def _stabilize(self, m: np.ndarray) -> np.ndarray:
362
  self._prev_mask = m_ema
363
  return m_ema
364
 
365
- # ---------- Single frame ----------
366
  def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
367
  """
368
- Process one frame:
369
  - optionally downscale for model work,
370
  - segment + refine,
371
  - temporal stabilize + harden,
@@ -473,6 +479,50 @@ def _prepare_background_from_config(
473
 
474
  return create_professional_background(choice, width, height) # RGB
475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
476
  # ---------- Full video ----------
477
  def process_video(
478
  self,
@@ -526,54 +576,224 @@ def process_video(
526
  cap.release()
527
  raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
528
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
529
  frame_count = 0
530
  start_time = time.time()
 
531
  try:
532
- while True:
533
- ret, frame_bgr = cap.read()
534
- if not ret:
535
- break
536
-
537
- if stop_event is not None and stop_event.is_set():
538
- self.log.info("Processing stopped by user request.")
539
- break
540
-
541
- # Process single frame
542
- result = self.process_frame(frame_bgr, background_rgb)
543
- out_bgr = result["frame"]
544
- out_bgr = np.ascontiguousarray(out_bgr) # ensure contiguous for tobytes()
545
-
546
- # Write
547
- if ffmpeg_pipe is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  try:
549
- ffmpeg_pipe.write(out_bgr)
550
  except Exception as e:
551
- # Switch to OpenCV writer mid-run and continue
552
- self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
 
 
 
 
 
 
 
 
553
  try:
554
- ffmpeg_pipe.close()
555
  except Exception:
556
  pass
557
- ffmpeg_pipe = None
558
- if writer is None:
559
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
560
- writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
561
- if not writer.isOpened():
562
- raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
563
- writer.write(out_bgr)
564
- else:
565
- writer.write(out_bgr)
566
-
567
- frame_count += 1
568
 
569
- # Progress
570
- if progress_callback:
571
- elapsed = time.time() - start_time
572
- fps_live = frame_count / elapsed if elapsed > 0 else 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  try:
574
- progress_callback(frame_count, total_frames, fps_live)
 
 
575
  except Exception:
576
  pass
 
577
  finally:
578
  cap.release()
579
  if writer is not None:
 
10
  - FFmpeg pipe writer with encoder fallbacks and stderr surfacing; falls back
11
  to OpenCV VideoWriter if FFmpeg isn't available or fails mid-run.
12
  - Temporal smoothing + mask hardening to avoid flicker/ghosting.
13
+ - NEW: Windowed two-phase execution (SAM2 window → release → MatAnyone window)
14
+ to avoid GPU fragmentation/OOM on T4 (16GB).
15
 
16
  Requirements for the models provider:
17
  - get_sam2() -> predictor or None
 
21
  from __future__ import annotations
22
 
23
  from dataclasses import dataclass
24
+ from typing import Optional, Dict, Any, Callable, List, Tuple
25
  import time
26
  import threading
27
  import shutil
 
121
  hard_high: float = 0.70 # values above -> 1
122
  mask_gamma: float = 0.90 # <1 boosts mid-tones slightly
123
 
124
+ # ---------- NEW: windowed two-phase control ----------
125
+ use_windowed: bool = True # enable two-phase SAM2→MatAnyone per chunk
126
+ window_size: int = 8 # frames per window
127
+
128
 
129
  class _FFmpegPipe:
130
  """
 
368
  self._prev_mask = m_ema
369
  return m_ema
370
 
371
+ # ---------- Single frame (fallback path) ----------
372
  def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
373
  """
374
+ Process one frame (legacy per-frame path):
375
  - optionally downscale for model work,
376
  - segment + refine,
377
  - temporal stabilize + harden,
 
479
 
480
  return create_professional_background(choice, width, height) # RGB
481
 
482
+ # ---------- Windowed two-phase helpers ----------
483
+ def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
484
+ """Apply model-only downscale; return (resized_bgr, scale)."""
485
+ H, W = frame_bgr.shape[:2]
486
+ max_side = max(H, W)
487
+ if self.config.max_model_size and max_side > self.config.max_model_size:
488
+ s = self.config.max_model_size / float(max_side)
489
+ newW = int(round(W * s))
490
+ newH = int(round(H * s))
491
+ small = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
492
+ return small, s
493
+ return frame_bgr, 1.0
494
+
495
+ def _release_sam2_gpu(self, predictor):
496
+ """Best-effort release of SAM2 GPU residency between phases."""
497
+ try:
498
+ if predictor is None:
499
+ return
500
+ # Clear any sticky per-image state if exposed
501
+ for name in ("reset_image", "release_image", "clear_image", "clear_state"):
502
+ if hasattr(predictor, name) and callable(getattr(predictor, name)):
503
+ try:
504
+ getattr(predictor, name)()
505
+ except Exception:
506
+ pass
507
+ # Try moving large parts off-GPU (best-effort, may be no-op)
508
+ for name in ("to", "cpu"):
509
+ if hasattr(predictor, name):
510
+ try:
511
+ if name == "to":
512
+ predictor.to("cpu") # type: ignore[attr-defined]
513
+ else:
514
+ predictor.cpu() # type: ignore[attr-defined]
515
+ except Exception:
516
+ pass
517
+ except Exception:
518
+ pass
519
+ try:
520
+ import torch
521
+ if torch.cuda.is_available():
522
+ torch.cuda.empty_cache()
523
+ except Exception:
524
+ pass
525
+
526
  # ---------- Full video ----------
527
  def process_video(
528
  self,
 
576
  cap.release()
577
  raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
578
 
579
+ # Determine models and decide execution mode
580
+ predictor = None
581
+ matanyone = None
582
+ try:
583
+ if self.models and hasattr(self.models, "get_sam2"):
584
+ predictor = self.models.get_sam2()
585
+ except Exception as e:
586
+ self.log.warning(f"SAM2 predictor unavailable: {e}")
587
+
588
+ try:
589
+ if self.models and hasattr(self.models, "get_matanyone"):
590
+ matanyone = self.models.get_matanyone()
591
+ except Exception as e:
592
+ self.log.warning(f"MatAnyOne unavailable: {e}")
593
+
594
+ use_windowed = bool(self.config.use_windowed and predictor is not None and matanyone is not None)
595
+
596
  frame_count = 0
597
  start_time = time.time()
598
+
599
  try:
600
+ if not use_windowed:
601
+ # --------- Legacy per-frame path (fallback) ----------
602
+ while True:
603
+ ret, frame_bgr = cap.read()
604
+ if not ret:
605
+ break
606
+ if stop_event is not None and stop_event.is_set():
607
+ self.log.info("Processing stopped by user request.")
608
+ break
609
+
610
+ result = self.process_frame(frame_bgr, background_rgb)
611
+ out_bgr = np.ascontiguousarray(result["frame"])
612
+
613
+ if ffmpeg_pipe is not None:
614
+ try:
615
+ ffmpeg_pipe.write(out_bgr)
616
+ except Exception as e:
617
+ self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
618
+ try:
619
+ ffmpeg_pipe.close()
620
+ except Exception:
621
+ pass
622
+ ffmpeg_pipe = None
623
+ if writer is None:
624
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
625
+ writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
626
+ if not writer.isOpened():
627
+ raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
628
+ writer.write(out_bgr)
629
+ else:
630
+ writer.write(out_bgr)
631
+
632
+ frame_count += 1
633
+ if progress_callback:
634
+ elapsed = time.time() - start_time
635
+ fps_live = frame_count / elapsed if elapsed > 0 else 0.0
636
+ try: progress_callback(frame_count, total_frames, fps_live)
637
+ except Exception: pass
638
+
639
+ else:
640
+ # --------- Windowed two-phase path ----------
641
+ WINDOW = max(1, int(self.config.window_size))
642
+
643
+ while True:
644
+ # Read a window of frames
645
+ frames_bgr: List[np.ndarray] = []
646
+ for _ in range(WINDOW):
647
+ ret, fr = cap.read()
648
+ if not ret:
649
+ break
650
+ frames_bgr.append(fr)
651
+
652
+ if not frames_bgr:
653
+ break # no more frames
654
+
655
+ if stop_event is not None and stop_event.is_set():
656
+ self.log.info("Processing stopped by user request.")
657
+ break
658
+
659
+ # Model-only downscale frames for model work (consistent per window)
660
+ frames_small_bgr: List[np.ndarray] = []
661
+ scales: List[float] = []
662
+ for fr in frames_bgr:
663
+ fr_small, s = self._model_downscale(fr)
664
+ frames_small_bgr.append(fr_small)
665
+ scales.append(s)
666
+ # Use the first scale (frames normally same size)
667
+ scale = scales[0] if scales else 1.0
668
+
669
+ # Convert small frames to RGB for models
670
+ frames_small_rgb = [cv2.cvtColor(fb, cv2.COLOR_BGR2RGB) for fb in frames_small_bgr]
671
+
672
+ # -------- SAM2 phase (prime with first frame's mask) --------
673
+ # We only need the mask for the first frame in the window.
674
  try:
675
+ mask_small = segment_person_hq(frames_small_rgb[0], predictor, use_sam2=True)
676
  except Exception as e:
677
+ self.log.warning(f"SAM2 segmentation error on window start: {e}")
678
+ # Fall back to simple segmentation in helper
679
+ mask_small = segment_person_hq(frames_small_rgb[0], None, use_sam2=False)
680
+
681
+ # Release SAM2 GPU residency before MatAnyone phase
682
+ self._release_sam2_gpu(predictor)
683
+
684
+ # -------- MatAnyone phase (prime + propagate) --------
685
+ # Reset session at window start if supported
686
+ if hasattr(matanyone, "reset"):
687
  try:
688
+ matanyone.reset()
689
  except Exception:
690
  pass
 
 
 
 
 
 
 
 
 
 
 
691
 
692
+ # j==0: pass 2-D mask; j>0: propagate without mask
693
+ for j, fr_rgb_small in enumerate(frames_small_rgb):
694
+ try:
695
+ if j == 0:
696
+ m2d = mask_small
697
+ if m2d.ndim == 3:
698
+ m2d = m2d[..., 0]
699
+ alpha_small = matanyone(fr_rgb_small, m2d) # adapter returns float32 [h,w]
700
+ else:
701
+ alpha_small = matanyone(fr_rgb_small) # propagate (no mask)
702
+
703
+ # Stabilize + harden at model scale
704
+ alpha_small = np.clip(alpha_small.astype(np.float32), 0.0, 1.0)
705
+ alpha_stable = self._stabilize(alpha_small)
706
+ alpha_harden = self._harden(alpha_stable)
707
+
708
+ # Upsample back to full-res
709
+ if scale != 1.0:
710
+ H, W = frames_bgr[j].shape[:2]
711
+ alpha_full = cv2.resize(alpha_harden, (W, H), interpolation=cv2.INTER_LINEAR)
712
+ else:
713
+ alpha_full = alpha_harden
714
+
715
+ # Composite at full-res (expects RGB)
716
+ frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
717
+ out_rgb = replace_background_hq(frame_rgb_full, alpha_full, background_rgb)
718
+ out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
719
+ out_bgr = np.ascontiguousarray(out_bgr)
720
+
721
+ # Write
722
+ if ffmpeg_pipe is not None:
723
+ try:
724
+ ffmpeg_pipe.write(out_bgr)
725
+ except Exception as e:
726
+ self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
727
+ try:
728
+ ffmpeg_pipe.close()
729
+ except Exception:
730
+ pass
731
+ ffmpeg_pipe = None
732
+ if writer is None:
733
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
734
+ writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
735
+ if not writer.isOpened():
736
+ raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
737
+ writer.write(out_bgr)
738
+ else:
739
+ writer.write(out_bgr)
740
+
741
+ frame_count += 1
742
+
743
+ except Exception as e:
744
+ # If MatAnyone fails, log and fall back to SAM-only for this frame
745
+ self.log.warning(f"MatAnyone failed at window frame {j}: {e}")
746
+ # basic fallback: composite with original SAM mask for j==0, else reuse prev mask
747
+ if j == 0:
748
+ alpha_small_fb = np.clip(mask_small.astype(np.float32), 0.0, 1.0)
749
+ else:
750
+ alpha_small_fb = self._prev_mask if self._prev_mask is not None else np.zeros_like(alpha_small, dtype=np.float32)
751
+
752
+ if scale != 1.0:
753
+ H, W = frames_bgr[j].shape[:2]
754
+ alpha_full_fb = cv2.resize(alpha_small_fb, (W, H), interpolation=cv2.INTER_LINEAR)
755
+ else:
756
+ alpha_full_fb = alpha_small_fb
757
+
758
+ frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
759
+ out_rgb_fb = replace_background_hq(frame_rgb_full, alpha_full_fb, background_rgb)
760
+ out_bgr_fb = cv2.cvtColor(out_rgb_fb, cv2.COLOR_RGB2BGR)
761
+
762
+ if ffmpeg_pipe is not None:
763
+ try:
764
+ ffmpeg_pipe.write(np.ascontiguousarray(out_bgr_fb))
765
+ except Exception:
766
+ try:
767
+ ffmpeg_pipe.close()
768
+ except Exception:
769
+ pass
770
+ ffmpeg_pipe = None
771
+ if writer is None:
772
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
773
+ writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
774
+ if not writer.isOpened():
775
+ raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
776
+ writer.write(np.ascontiguousarray(out_bgr_fb))
777
+ else:
778
+ writer.write(np.ascontiguousarray(out_bgr_fb))
779
+ frame_count += 1
780
+
781
+ # Progress update
782
+ if progress_callback:
783
+ elapsed = time.time() - start_time
784
+ fps_live = frame_count / elapsed if elapsed > 0 else 0.0
785
+ try: progress_callback(frame_count, total_frames, fps_live)
786
+ except Exception: pass
787
+
788
+ # Clean per-window buffers (CPU) and let CUDA defrag
789
+ del frames_bgr, frames_small_bgr, frames_small_rgb, mask_small
790
  try:
791
+ import torch
792
+ if torch.cuda.is_available():
793
+ torch.cuda.empty_cache()
794
  except Exception:
795
  pass
796
+
797
  finally:
798
  cap.release()
799
  if writer is not None: