MogensR commited on
Commit
de84d79
Β·
1 Parent(s): 843b297

Update processing/video/video_processor.py

Browse files
Files changed (1) hide show
  1. processing/video/video_processor.py +137 -76
processing/video/video_processor.py CHANGED
@@ -4,35 +4,36 @@
4
 
5
  Bridges the legacy import
6
  from processing.video.video_processor import CoreVideoProcessor
7
- to the modern pipeline functions in utils.cv_processing, using whatever
8
- models provider is passed in (e.g., models.loaders.ModelLoader).
9
 
10
  Requirements for the models provider:
11
- - get_sam2() -> predictor or None
12
- - get_matanyone() -> processor or None
13
  """
14
 
15
  from __future__ import annotations
16
 
17
  from dataclasses import dataclass
18
- from typing import Optional, Dict, Any, Tuple, Callable
19
  import time
20
  import threading
21
 
22
  import cv2
23
  import numpy as np
 
24
 
25
- # Try project logger; fall back to std logging
26
  try:
27
- from utils.logger import get_logger
28
- _log = get_logger(__name__)
29
  except Exception:
30
  import logging
31
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
32
  _log = logging.getLogger(__name__)
33
 
34
- # CV pipeline helpers
35
- from utils.cv_processing import (
36
  segment_person_hq,
37
  refine_mask_hq,
38
  replace_background_hq,
@@ -44,17 +45,18 @@
44
 
45
  @dataclass
46
  class ProcessorConfig:
47
- background_preset: str = "minimalist" # key in PROFESSIONAL_BACKGROUNDS
48
- write_fps: Optional[float] = None # None -> keep source fps
 
 
49
 
50
 
51
  class CoreVideoProcessor:
52
  """
53
- Minimal, safe implementation used by core/app.py.
54
  It relies on a models provider (e.g., ModelLoader) that implements:
55
  - get_sam2()
56
  - get_matanyone()
57
- and uses utils.cv_processing for the pipeline.
58
 
59
  Supports progress callback and cancellation via stop_event.
60
  """
@@ -62,41 +64,11 @@ class CoreVideoProcessor:
62
  def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
63
  self.log = _log
64
  self.config = config or ProcessorConfig()
65
- self.models = models # do NOT load here; core/app handles loading
66
  if self.models is None:
67
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
68
 
69
- # ---------- Single frame ----------
70
- def process_frame(self, frame: np.ndarray, background: np.ndarray) -> Dict[str, Any]:
71
- """Return dict with composited frame + mask; always attempts fallbacks."""
72
- predictor = None
73
- try:
74
- if self.models and hasattr(self.models, "get_sam2"):
75
- predictor = self.models.get_sam2()
76
- # Some wrappers expose predictor directly, others are already usable
77
- # segment_person_hq checks for set_image/predict itself.
78
- except Exception as e:
79
- self.log.warning(f"SAM2 predictor unavailable: {e}")
80
-
81
- # 1) segmentation (with fallbacks inside)
82
- mask = segment_person_hq(frame, predictor, fallback_enabled=True)
83
-
84
- # 2) refinement (MatAnyOne if available, else robust OpenCV path)
85
- matanyone = None
86
- try:
87
- if self.models and hasattr(self.models, "get_matanyone"):
88
- matanyone = self.models.get_matanyone()
89
- except Exception as e:
90
- self.log.warning(f"MatAnyOne unavailable: {e}")
91
-
92
- mask_refined = refine_mask_hq(frame, mask, matanyone, fallback_enabled=True)
93
-
94
- # 3) compositing
95
- out = replace_background_hq(frame, mask_refined, background, fallback_enabled=True)
96
-
97
- return {"frame": out, "mask": mask_refined}
98
-
99
- # ---------- Build background once per video ----------
100
  def _prepare_background_from_config(
101
  self,
102
  bg_config: Optional[Dict[str, Any]],
@@ -105,30 +77,36 @@ def _prepare_background_from_config(
105
  ) -> np.ndarray:
106
  """
107
  Accepts either:
108
- - {"custom_path": "/path/to/image.png"} β†’ load image
109
- - {"background_choice": "minimalist"} β†’ preset
110
  - None β†’ use self.config.background_preset
 
111
  """
112
- # 1) custom image?
113
  if bg_config and bg_config.get("custom_path"):
114
  path = bg_config["custom_path"]
115
- img = cv2.imread(path, cv2.IMREAD_COLOR)
116
- if img is None:
117
- self.log.warning(f"Custom background at '{path}' could not be read. Falling back to preset.")
118
  else:
119
- return cv2.resize(img, (width, height), interpolation=cv2.INTER_LANCZOS4)
 
120
 
121
- # 2) preset (explicit choice or default)
122
  choice = None
123
  if bg_config and "background_choice" in bg_config:
124
  choice = bg_config["background_choice"]
125
  if not choice:
126
  choice = self.config.background_preset
127
 
128
- cfg = PROFESSIONAL_BACKGROUNDS.get(choice, PROFESSIONAL_BACKGROUNDS["minimalist"])
129
- return create_professional_background(cfg, width, height)
 
 
 
 
130
 
131
- # ---------- Full video ----------
132
  def process_video(
133
  self,
134
  input_path: str,
@@ -140,11 +118,19 @@ def process_video(
140
  """
141
  Process a full video with live progress and optional cancel.
142
  progress_callback(current_frame, total_frames, fps_live)
 
 
 
 
 
 
 
143
  """
144
- ok, msg = validate_video_file(input_path)
 
145
  if not ok:
146
- raise ValueError(f"Invalid video: {msg}")
147
- self.log.info(f"Video validation: {msg}")
148
 
149
  cap = cv2.VideoCapture(input_path)
150
  if not cap.isOpened():
@@ -152,51 +138,126 @@ def process_video(
152
 
153
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
154
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
155
- fps = cap.get(cv2.CAP_PROP_FPS)
156
- total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
157
 
158
- fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
159
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
160
  writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
161
  if not writer.isOpened():
162
  cap.release()
163
  raise RuntimeError(f"Could not open writer for: {output_path}")
164
 
165
- # Build background once
166
- background = self._prepare_background_from_config(bg_config, width, height)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  frame_count = 0
169
  start_time = time.time()
 
 
170
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  while True:
172
- ret, frame = cap.read()
173
- if not ret:
174
- break
175
-
176
- # Cancel support
177
  if stop_event is not None and stop_event.is_set():
178
  self.log.info("Processing stopped by user request.")
179
  break
180
 
181
- # Process single frame
182
- result = self.process_frame(frame, background)
183
- writer.write(result["frame"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  frame_count += 1
185
 
186
- # Progress callback
187
  if progress_callback:
188
  elapsed = time.time() - start_time
189
  fps_live = frame_count / elapsed if elapsed > 0 else 0.0
190
  try:
191
  progress_callback(frame_count, total_frames, fps_live)
192
  except Exception:
193
- # Don’t break processing due to a UI callback error
194
  pass
 
195
  finally:
196
  cap.release()
197
  writer.release()
198
 
199
- self.log.info(f"Processed {frame_count} frames β†’ {output_path}")
200
  return {
201
  "frames": frame_count,
202
  "width": width,
 
4
 
5
  Bridges the legacy import
6
  from processing.video.video_processor import CoreVideoProcessor
7
+ to the modern pipeline functions in utils (segment, refine, composite),
8
+ using whatever models provider is passed in (e.g., models.loaders.ModelLoader).
9
 
10
  Requirements for the models provider:
11
+ - get_sam2() -> predictor or None
12
+ - get_matanyone() -> InferenceCore or compatible (or None)
13
  """
14
 
15
  from __future__ import annotations
16
 
17
  from dataclasses import dataclass
18
+ from typing import Optional, Dict, Any, Callable
19
  import time
20
  import threading
21
 
22
  import cv2
23
  import numpy as np
24
+ import torch
25
 
26
+ # Logger (fallback to std logging if your project logger isn't available)
27
  try:
28
+ from utils.logging_setup import make_logger
29
+ _log = make_logger("processing.video.video_processor")
30
  except Exception:
31
  import logging
32
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
33
  _log = logging.getLogger(__name__)
34
 
35
+ # New, hardened utils (device-safe, SAM2↔MatAnyOne interop)
36
+ from utils import (
37
  segment_person_hq,
38
  refine_mask_hq,
39
  replace_background_hq,
 
45
 
46
  @dataclass
47
  class ProcessorConfig:
48
+ # Use a valid preset key from PROFESSIONAL_BACKGROUNDS (e.g., "office", "studio", …)
49
+ background_preset: str = "office"
50
+ # None -> keep source fps (if available), else default to 25.0
51
+ write_fps: Optional[float] = None
52
 
53
 
54
  class CoreVideoProcessor:
55
  """
56
+ Minimal, safe implementation used by app entrypoint.
57
  It relies on a models provider (e.g., ModelLoader) that implements:
58
  - get_sam2()
59
  - get_matanyone()
 
60
 
61
  Supports progress callback and cancellation via stop_event.
62
  """
 
64
  def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
65
  self.log = _log
66
  self.config = config or ProcessorConfig()
67
+ self.models = models # app sets this to a provider with get_sam2/get_matanyone
68
  if self.models is None:
69
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
70
 
71
+ # ---------- Internals: background builder ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def _prepare_background_from_config(
73
  self,
74
  bg_config: Optional[Dict[str, Any]],
 
77
  ) -> np.ndarray:
78
  """
79
  Accepts either:
80
+ - {"custom_path": "/path/to/image.png"} β†’ load that image
81
+ - {"background_choice": "<preset_key>"} β†’ use preset key
82
  - None β†’ use self.config.background_preset
83
+ Returns an RGB np.uint8 image (H x W x 3).
84
  """
85
+ # 1) Custom image?
86
  if bg_config and bg_config.get("custom_path"):
87
  path = bg_config["custom_path"]
88
+ img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
89
+ if img_bgr is None:
90
+ self.log.warning("Custom background at '%s' could not be read. Falling back to preset.", path)
91
  else:
92
+ img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
93
+ return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
94
 
95
+ # 2) Preset (explicit or default)
96
  choice = None
97
  if bg_config and "background_choice" in bg_config:
98
  choice = bg_config["background_choice"]
99
  if not choice:
100
  choice = self.config.background_preset
101
 
102
+ if choice not in PROFESSIONAL_BACKGROUNDS:
103
+ self.log.warning("Unknown background preset '%s'; using 'office'.", choice)
104
+ choice = "office"
105
+
106
+ bg_rgb = create_professional_background(choice, width, height) # returns RGB
107
+ return bg_rgb
108
 
109
+ # ---------- Full video pipeline (first-frame seed + propagate) ----------
110
  def process_video(
111
  self,
112
  input_path: str,
 
118
  """
119
  Process a full video with live progress and optional cancel.
120
  progress_callback(current_frame, total_frames, fps_live)
121
+
122
+ Pipeline:
123
+ - Read video (OpenCV)
124
+ - Build background (once)
125
+ - Frame 0: SAM2 segmentation β†’ MatAnyOne refine (seed)
126
+ - Frames 1..N: MatAnyOne propagate (no mask)
127
+ - Composite each frame and write to MP4
128
  """
129
+ # Validate input video
130
+ ok = validate_video_file(input_path)
131
  if not ok:
132
+ raise ValueError("Invalid or unreadable video file")
133
+ self.log.info("Video validation OK: %s", input_path)
134
 
135
  cap = cv2.VideoCapture(input_path)
136
  if not cap.isOpened():
 
138
 
139
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
140
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
141
+ src_fps = cap.get(cv2.CAP_PROP_FPS)
142
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
143
 
144
+ fps_out = self.config.write_fps or (src_fps if src_fps and src_fps > 0 else 25.0)
145
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
146
  writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
147
  if not writer.isOpened():
148
  cap.release()
149
  raise RuntimeError(f"Could not open writer for: {output_path}")
150
 
151
+ # Build background (RGB)
152
+ background_rgb = self._prepare_background_from_config(bg_config, width, height)
153
+
154
+ # Models (allow fallbacks provided by app)
155
+ predictor = None
156
+ mat_core = None
157
+ try:
158
+ if self.models and hasattr(self.models, "get_sam2"):
159
+ predictor = self.models.get_sam2()
160
+ except Exception as e:
161
+ self.log.warning("SAM2 predictor unavailable: %s", e)
162
+ try:
163
+ if self.models and hasattr(self.models, "get_matanyone"):
164
+ mat_core = self.models.get_matanyone()
165
+ except Exception as e:
166
+ self.log.warning("MatAnyOne core unavailable: %s", e)
167
+
168
+ # Device (only used by helpers internally; we keep tensors on that device)
169
+ device = "cuda" if torch.cuda.is_available() else "cpu"
170
+ self.log.info("Starting processing on device=%s (size=%dx%d, fps_out=%.2f, frames=%s)",
171
+ device, width, height, float(fps_out), total_frames or "unknown")
172
 
173
  frame_count = 0
174
  start_time = time.time()
175
+ refined_mask_prev: Optional[np.ndarray] = None
176
+
177
  try:
178
+ # -------- First frame (seed) --------
179
+ ret, f0_bgr = cap.read()
180
+ if not ret:
181
+ raise RuntimeError("Empty video")
182
+
183
+ f0_rgb = cv2.cvtColor(f0_bgr, cv2.COLOR_BGR2RGB)
184
+
185
+ # Segmentation (SAM2 preferred, else fallback)
186
+ m0_hw = segment_person_hq(
187
+ frame_rgb=f0_rgb,
188
+ use_sam2=True,
189
+ sam2_predictor=predictor
190
+ )
191
+ if m0_hw is None:
192
+ # As an absolute last resort, use a solid foreground mask (keeps pipeline alive)
193
+ self.log.warning("First-frame segmentation failed; using full-foreground mask.")
194
+ m0_hw = np.ones((f0_rgb.shape[0], f0_rgb.shape[1]), dtype=np.float32)
195
+
196
+ # Refine / seed MatAnyOne (first_frame=True makes the helper pass the mask)
197
+ refined_mask_0 = refine_mask_hq(
198
+ mask_hw_float01=m0_hw,
199
+ frame_rgb=f0_rgb,
200
+ use_matanyone=True,
201
+ mat_core=mat_core,
202
+ first_frame=True,
203
+ device=device
204
+ )
205
+ refined_mask_prev = refined_mask_0
206
+
207
+ # Composite & write
208
+ comp0_rgb = replace_background_hq(f0_rgb, refined_mask_0, background_rgb)
209
+ writer.write(cv2.cvtColor(comp0_rgb, cv2.COLOR_RGB2BGR))
210
+ frame_count = 1
211
+
212
+ if progress_callback:
213
+ elapsed = time.time() - start_time
214
+ fps_live = frame_count / elapsed if elapsed > 0 else 0.0
215
+ try:
216
+ progress_callback(frame_count, total_frames, fps_live)
217
+ except Exception:
218
+ pass
219
+
220
+ # -------- Remaining frames (propagate) --------
221
  while True:
 
 
 
 
 
222
  if stop_event is not None and stop_event.is_set():
223
  self.log.info("Processing stopped by user request.")
224
  break
225
 
226
+ ret, fbgr = cap.read()
227
+ if not ret:
228
+ break
229
+
230
+ frgb = cv2.cvtColor(fbgr, cv2.COLOR_BGR2RGB)
231
+
232
+ # Propagate (first_frame=False -> mask ignored internally, MatAnyOne uses memory)
233
+ refined_mask_t = refine_mask_hq(
234
+ mask_hw_float01=refined_mask_prev if refined_mask_prev is not None else m0_hw,
235
+ frame_rgb=frgb,
236
+ use_matanyone=True,
237
+ mat_core=mat_core,
238
+ first_frame=False,
239
+ device=device
240
+ )
241
+ refined_mask_prev = refined_mask_t
242
+
243
+ comp_rgb = replace_background_hq(frgb, refined_mask_t, background_rgb)
244
+ writer.write(cv2.cvtColor(comp_rgb, cv2.COLOR_RGB2BGR))
245
+
246
  frame_count += 1
247
 
 
248
  if progress_callback:
249
  elapsed = time.time() - start_time
250
  fps_live = frame_count / elapsed if elapsed > 0 else 0.0
251
  try:
252
  progress_callback(frame_count, total_frames, fps_live)
253
  except Exception:
 
254
  pass
255
+
256
  finally:
257
  cap.release()
258
  writer.release()
259
 
260
+ self.log.info("Processed %d frames β†’ %s", frame_count, output_path)
261
  return {
262
  "frames": frame_count,
263
  "width": width,