MogensR commited on
Commit
a647170
·
1 Parent(s): 6a16de4

Update processing/video/video_processor.py

Browse files
Files changed (1) hide show
  1. processing/video/video_processor.py +273 -82
processing/video/video_processor.py CHANGED
@@ -7,7 +7,8 @@
7
  {"background_choice": "<preset_key>"}
8
  {"gradient": {type, start, end, angle_deg}}
9
  - Model-only downscale (max_model_size) for speed, full-res render.
10
- - Optional NVENC (ffmpeg pipe) writer; falls back to OpenCV writer.
 
11
 
12
  Requirements for the models provider:
13
  - get_sam2() -> predictor or None
@@ -17,11 +18,12 @@
17
  from __future__ import annotations
18
 
19
  from dataclasses import dataclass
20
- from typing import Optional, Dict, Any, Tuple, Callable
21
  import time
22
  import threading
23
  import shutil
24
  import subprocess
 
25
 
26
  import cv2
27
  import numpy as np
@@ -35,25 +37,61 @@
35
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
36
  _log = logging.getLogger(__name__)
37
 
38
- # CV pipeline helpers (from your unified utils)
39
- from utils import (
40
  segment_person_hq,
41
  refine_mask_hq,
42
  replace_background_hq,
43
  create_professional_background,
44
- create_gradient_background,
45
  validate_video_file,
46
  PROFESSIONAL_BACKGROUNDS,
47
  )
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  @dataclass
51
  class ProcessorConfig:
52
  background_preset: str = "office" # key in PROFESSIONAL_BACKGROUNDS
53
  write_fps: Optional[float] = None # None -> keep source fps
 
54
  # Model-only downscale (speedup without changing output resolution)
55
  max_model_size: Optional[int] = 1280
56
- # NVENC output (ffmpeg pipe); otherwise OpenCV writer (mp4v)
 
57
  use_nvenc: bool = True
58
  nvenc_codec: str = "h264" # "h264" or "hevc"
59
  nvenc_preset: str = "p5" # NVENC preset string
@@ -61,6 +99,185 @@ class ProcessorConfig:
61
  nvenc_tune_hq: bool = True
62
  nvenc_pix_fmt: str = "yuv420p" # browser-safe
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  class CoreVideoProcessor:
66
  """
@@ -68,7 +285,7 @@ class CoreVideoProcessor:
68
  It relies on a models provider (e.g., ModelLoader) that implements:
69
  - get_sam2()
70
  - get_matanyone()
71
- and uses utils.* for the pipeline.
72
 
73
  Supports progress callback and cancellation via stop_event.
74
  """
@@ -79,7 +296,6 @@ def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[An
79
  self.models = models # do NOT load here; core/app handles loading
80
  if self.models is None:
81
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
82
- # ffmpeg presence (for NVENC)
83
  self._ffmpeg = shutil.which("ffmpeg")
84
 
85
  # ---------- Single frame ----------
@@ -103,9 +319,9 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
103
  newW = int(round(W * scale))
104
  newH = int(round(H * scale))
105
  proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
106
- self.log.debug(f"Model-only downscale enabled: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
107
 
108
- # Convert to RGB for model utils if they expect RGB
109
  proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
110
 
111
  predictor = None
@@ -115,10 +331,10 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
115
  except Exception as e:
116
  self.log.warning(f"SAM2 predictor unavailable: {e}")
117
 
118
- # 1) segmentation (with fallbacks inside)
119
  mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
120
 
121
- # 2) refinement (MatAnyOne if available, else robust OpenCV path)
122
  matanyone = None
123
  try:
124
  if self.models and hasattr(self.models, "get_matanyone"):
@@ -126,7 +342,8 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
126
  except Exception as e:
127
  self.log.warning(f"MatAnyOne unavailable: {e}")
128
 
129
- mask_small_ref = refine_mask_hq(mask_small, proc_frame_rgb, use_matanyone=True, mat_core=matanyone)
 
130
 
131
  # Upsample mask back to full-res
132
  if scale != 1.0:
@@ -134,11 +351,11 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
134
  else:
135
  mask_full = mask_small_ref.astype(np.float32)
136
 
137
- # 3) compositing (expect RGB arrays in helper)
138
  frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
139
  out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
140
 
141
- # Convert back to BGR for writer
142
  out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
143
  return {"frame": out_bgr, "mask": mask_full}
144
 
@@ -151,7 +368,7 @@ def _prepare_background_from_config(
151
  ) -> np.ndarray:
152
  """
153
  Accepts either:
154
- - {"custom_path": "/path/to/image.png"} → load image
155
  - {"background_choice": "office"} → preset
156
  - {"gradient": {type,start,end,angle_deg}} → generated gradient
157
  Returns RGB np.uint8
@@ -169,7 +386,7 @@ def _prepare_background_from_config(
169
  # 2) gradient?
170
  if bg_config and isinstance(bg_config.get("gradient"), dict):
171
  try:
172
- return create_gradient_background(bg_config["gradient"], width, height)
173
  except Exception as e:
174
  self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
175
 
@@ -186,44 +403,6 @@ def _prepare_background_from_config(
186
 
187
  return create_professional_background(choice, width, height) # RGB
188
 
189
- # ---------- Writers ----------
190
- def _open_writer_ffmpeg_nvenc(self, output_path: str, fps: float, width: int, height: int):
191
- """
192
- Open an ffmpeg NVENC pipe that accepts raw BGR frames via stdin.
193
- """
194
- if not self._ffmpeg:
195
- return None
196
-
197
- vcodec = "h264_nvenc" if self.config.nvenc_codec.lower() == "h264" else "hevc_nvenc"
198
- preset = self.config.nvenc_preset
199
- tune = ["-tune", "hq"] if self.config.nvenc_tune_hq else []
200
- cq = ["-cq", str(int(self.config.nvenc_cq))]
201
- pixfmt = self.config.nvenc_pix_fmt # usually yuv420p for web
202
-
203
- cmd = [
204
- self._ffmpeg, "-hide_banner", "-loglevel", "error",
205
- "-f", "rawvideo",
206
- "-pix_fmt", "bgr24", # we feed BGR frames directly
207
- "-s", f"{width}x{height}",
208
- "-r", f"{fps:.6f}",
209
- "-i", "-", # stdin
210
- "-an",
211
- "-c:v", vcodec,
212
- "-preset", preset,
213
- *tune,
214
- *cq,
215
- "-pix_fmt", pixfmt,
216
- "-movflags", "+faststart",
217
- "-y", output_path,
218
- ]
219
- self.log.info(f"Using NVENC ({self.config.nvenc_codec}) via ffmpeg.")
220
- try:
221
- proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
222
- return proc
223
- except Exception as e:
224
- self.log.warning(f"ffmpeg NVENC open failed: {e}")
225
- return None
226
-
227
  # ---------- Full video ----------
228
  def process_video(
229
  self,
@@ -237,9 +416,9 @@ def process_video(
237
  Process a full video with live progress and optional cancel.
238
  progress_callback(current_frame, total_frames, fps_live)
239
  """
240
- ok = validate_video_file(input_path)
241
  if not ok:
242
- raise ValueError("Invalid or unreadable video")
243
 
244
  cap = cv2.VideoCapture(input_path)
245
  if not cap.isOpened():
@@ -252,22 +431,27 @@ def process_video(
252
 
253
  fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
254
 
255
- # Build background once (RGB)
256
  background_rgb = self._prepare_background_from_config(bg_config, width, height)
257
 
258
- # Writer: try NVENC (ffmpeg pipe) else OpenCV
259
- ffmpeg_proc = None
260
- writer = None
261
- if self.config.use_nvenc:
262
- ffmpeg_proc = self._open_writer_ffmpeg_nvenc(output_path, float(fps_out), width, height)
263
 
264
- if ffmpeg_proc is None:
265
- self.log.info("Using OpenCV writer (mp4v).")
 
 
 
 
 
 
266
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
267
  writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
268
  if not writer.isOpened():
269
  cap.release()
270
- raise RuntimeError(f"Could not open writer for: {output_path}")
271
 
272
  frame_count = 0
273
  start_time = time.time()
@@ -284,17 +468,26 @@ def process_video(
284
  # Process single frame
285
  result = self.process_frame(frame_bgr, background_rgb)
286
  out_bgr = result["frame"]
 
287
 
288
  # Write
289
- if ffmpeg_proc is not None and ffmpeg_proc.stdin:
290
  try:
291
- ffmpeg_proc.stdin.write(out_bgr.tobytes())
292
  except Exception as e:
293
- self.log.warning(f"ffmpeg pipe write failed at frame {frame_count}: {e}")
294
- # fallback: stop pipe and bail out
295
- ffmpeg_proc.stdin.close()
296
- ffmpeg_proc.wait(timeout=2)
297
- raise
 
 
 
 
 
 
 
 
298
  else:
299
  writer.write(out_bgr)
300
 
@@ -312,18 +505,16 @@ def process_video(
312
  cap.release()
313
  if writer is not None:
314
  writer.release()
315
- if ffmpeg_proc is not None:
316
  try:
317
- if ffmpeg_proc.stdin:
318
- ffmpeg_proc.stdin.close()
319
- ffmpeg_proc.wait(timeout=10)
320
  except Exception:
321
- try:
322
- ffmpeg_proc.kill()
323
- except Exception:
324
- pass
325
 
326
- self.log.info(f"Processed {frame_count} frames → {output_path}")
327
  return {
328
  "frames": frame_count,
329
  "width": width,
 
7
  {"background_choice": "<preset_key>"}
8
  {"gradient": {type, start, end, angle_deg}}
9
  - Model-only downscale (max_model_size) for speed, full-res render.
10
+ - FFmpeg pipe writer with encoder fallbacks and stderr surfacing; falls back
11
+ to OpenCV VideoWriter if FFmpeg isn't available or fails mid-run.
12
 
13
  Requirements for the models provider:
14
  - get_sam2() -> predictor or None
 
18
  from __future__ import annotations
19
 
20
  from dataclasses import dataclass
21
+ from typing import Optional, Dict, Any, Callable
22
  import time
23
  import threading
24
  import shutil
25
  import subprocess
26
+ import shlex
27
 
28
  import cv2
29
  import numpy as np
 
37
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
38
  _log = logging.getLogger(__name__)
39
 
40
+ # Import directly from utils.cv_processing to avoid circular imports via utils/__init__.py
41
+ from utils.cv_processing import (
42
  segment_person_hq,
43
  refine_mask_hq,
44
  replace_background_hq,
45
  create_professional_background,
 
46
  validate_video_file,
47
  PROFESSIONAL_BACKGROUNDS,
48
  )
49
 
50
+ # ---------- local gradient helper (no extra imports needed) ----------
51
+ def _to_rgb(c):
52
+ if isinstance(c, (list, tuple)) and len(c) == 3:
53
+ return tuple(int(x) for x in c)
54
+ if isinstance(c, str) and c.startswith("#") and len(c) == 7:
55
+ return tuple(int(c[i:i+2], 16) for i in (1, 3, 5))
56
+ return (255, 255, 255)
57
+
58
+ def _create_gradient_background_local(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
59
+ """
60
+ Minimal gradient generator for backgrounds (linear with rotation).
61
+ spec = {"type": "linear"|"radial"(ignored), "start": (r,g,b)|"#rrggbb", "end": ..., "angle_deg": float}
62
+ Returns RGB np.uint8 (H,W,3)
63
+ """
64
+ start = _to_rgb(spec.get("start", "#222222"))
65
+ end = _to_rgb(spec.get("end", "#888888"))
66
+ angle = float(spec.get("angle_deg", 0))
67
+
68
+ # build vertical gradient
69
+ bg = np.zeros((height, width, 3), np.uint8)
70
+ for y in range(height):
71
+ t = y / max(1, height - 1)
72
+ r = int(start[0]*(1-t) + end[0]*t)
73
+ g = int(start[1]*(1-t) + end[1]*t)
74
+ b = int(start[2]*(1-t) + end[2]*t)
75
+ bg[y, :] = (r, g, b)
76
+
77
+ if abs(angle) % 360 < 1e-6:
78
+ return bg
79
+
80
+ # rotate by angle using OpenCV (RGB-safe)
81
+ center = (width / 2, height / 2)
82
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
83
+ rot = cv2.warpAffine(bg, M, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
84
+ return rot
85
 
86
  @dataclass
87
  class ProcessorConfig:
88
  background_preset: str = "office" # key in PROFESSIONAL_BACKGROUNDS
89
  write_fps: Optional[float] = None # None -> keep source fps
90
+
91
  # Model-only downscale (speedup without changing output resolution)
92
  max_model_size: Optional[int] = 1280
93
+
94
+ # FFmpeg / NVENC output (pipe). If disabled or unavailable, use OpenCV writer.
95
  use_nvenc: bool = True
96
  nvenc_codec: str = "h264" # "h264" or "hevc"
97
  nvenc_preset: str = "p5" # NVENC preset string
 
99
  nvenc_tune_hq: bool = True
100
  nvenc_pix_fmt: str = "yuv420p" # browser-safe
101
 
102
+ # libx264 fallback
103
+ x264_preset: str = "medium"
104
+ x264_crf: int = 18
105
+ x264_pix_fmt: str = "yuv420p"
106
+
107
+ movflags_faststart: bool = True
108
+
109
+
110
+ class _FFmpegPipe:
111
+ """
112
+ Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
113
+ """
114
+
115
+ def __init__(self, width: int, height: int, fps: float, out_path: str, cfg: ProcessorConfig, log=_log):
116
+ self.width = int(width)
117
+ self.height = int(height)
118
+ self.fps = float(fps) if fps and fps > 0 else 25.0
119
+ self.out_path = out_path
120
+ self.cfg = cfg
121
+ self.log = log
122
+
123
+ self.proc: Optional[subprocess.Popen] = None
124
+ self.encoder_used: Optional[str] = None
125
+ self._stderr: bytes | None = None
126
+
127
+ self._ffmpeg = shutil.which("ffmpeg")
128
+ if not self._ffmpeg:
129
+ raise RuntimeError("ffmpeg not found on PATH")
130
+
131
+ self._start_with_fallbacks()
132
+
133
+ def _cmd_for_encoder(self, encoder: str) -> list[str]:
134
+ base = [
135
+ self._ffmpeg,
136
+ "-hide_banner", "-loglevel", "error",
137
+ "-y",
138
+ # rawvideo input from stdin
139
+ "-f", "rawvideo",
140
+ "-vcodec", "rawvideo",
141
+ "-pix_fmt", "bgr24",
142
+ "-s", f"{self.width}x{self.height}",
143
+ "-r", f"{self.fps}",
144
+ "-i", "-", # stdin
145
+ "-an", # no audio here
146
+ ]
147
+ if self.cfg.movflags_faststart:
148
+ base += ["-movflags", "+faststart"]
149
+
150
+ if encoder == "h264_nvenc":
151
+ base += [
152
+ "-c:v", "h264_nvenc",
153
+ "-preset", self.cfg.nvenc_preset,
154
+ "-cq", str(int(self.cfg.nvenc_cq)),
155
+ "-pix_fmt", self.cfg.nvenc_pix_fmt,
156
+ ]
157
+ if self.cfg.nvenc_tune_hq:
158
+ base += ["-tune", "hq"]
159
+ elif encoder == "hevc_nvenc":
160
+ base += [
161
+ "-c:v", "hevc_nvenc",
162
+ "-preset", self.cfg.nvenc_preset,
163
+ "-cq", str(int(self.cfg.nvenc_cq)),
164
+ "-pix_fmt", self.cfg.nvenc_pix_fmt,
165
+ ]
166
+ if self.cfg.nvenc_tune_hq:
167
+ base += ["-tune", "hq"]
168
+ elif encoder == "libx264":
169
+ base += [
170
+ "-c:v", "libx264",
171
+ "-preset", self.cfg.x264_preset,
172
+ "-crf", str(int(self.cfg.x264_crf)),
173
+ "-pix_fmt", self.cfg.x264_pix_fmt,
174
+ ]
175
+ elif encoder == "mpeg4":
176
+ base += [
177
+ "-c:v", "mpeg4",
178
+ "-q:v", "2",
179
+ "-pix_fmt", "yuv420p",
180
+ ]
181
+ else:
182
+ base += ["-c:v", "libx264", "-preset", self.cfg.x264_preset, "-crf", str(int(self.cfg.x264_crf)), "-pix_fmt", self.cfg.x264_pix_fmt]
183
+
184
+ base += [self.out_path]
185
+ return base
186
+
187
+ def _try_start(self, enc: str) -> bool:
188
+ cmd = self._cmd_for_encoder(enc)
189
+ try:
190
+ self.proc = subprocess.Popen(
191
+ cmd,
192
+ stdin=subprocess.PIPE,
193
+ stderr=subprocess.PIPE,
194
+ bufsize=10**7,
195
+ )
196
+ self.encoder_used = enc
197
+ self.log.info("FFmpeg started: %s", " ".join(shlex.quote(c) for c in cmd))
198
+ # quick poll: if ffmpeg dies immediately, fail fast
199
+ time.sleep(0.05)
200
+ if self.proc.poll() is not None:
201
+ self._stderr = self.proc.stderr.read() if self.proc.stderr else b""
202
+ self.log.warning("FFmpeg exited on start with %s: %s", enc, (self._stderr or b"").decode(errors="ignore"))
203
+ self.proc = None
204
+ return False
205
+ return True
206
+ except Exception as e:
207
+ self.log.warning("Failed to start FFmpeg with %s: %s", enc, e)
208
+ self.proc = None
209
+ return False
210
+
211
+ def _start_with_fallbacks(self):
212
+ encoders = []
213
+ if self.cfg.use_nvenc:
214
+ encoders += ["h264_nvenc"] if self.cfg.nvenc_codec.lower() == "h264" else ["hevc_nvenc"]
215
+ encoders += ["libx264", "mpeg4"]
216
+ for enc in encoders:
217
+ if self._try_start(enc):
218
+ return
219
+ msg = "Could not start FFmpeg with any encoder (nvenc/libx264/mpeg4). Is ffmpeg present and codecs available?"
220
+ if self._stderr:
221
+ msg += f" Stderr: {(self._stderr or b'').decode(errors='ignore')[:500]}"
222
+ raise RuntimeError(msg)
223
+
224
+ def write(self, frame_bgr: np.ndarray):
225
+ if self.proc is None or self.proc.stdin is None:
226
+ raise RuntimeError("FFmpeg process is not running (stdin is None).")
227
+ if not isinstance(frame_bgr, np.ndarray) or frame_bgr.dtype != np.uint8:
228
+ raise ValueError("Frame must be a np.ndarray of dtype uint8.")
229
+ if frame_bgr.ndim != 3 or frame_bgr.shape[2] != 3:
230
+ raise ValueError("Frame must have shape (H, W, 3).")
231
+ if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
232
+ raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
233
+
234
+ # ensure contiguous for tobytes()
235
+ frame_bgr = np.ascontiguousarray(frame_bgr)
236
+ try:
237
+ self.proc.stdin.write(frame_bgr.tobytes())
238
+ except Exception as e:
239
+ # collect stderr for diagnostics
240
+ stderr = b""
241
+ try:
242
+ if self.proc and self.proc.stderr:
243
+ stderr = self.proc.stderr.read()
244
+ except Exception:
245
+ pass
246
+ msg = f"FFmpeg pipe write failed: {e}"
247
+ if stderr:
248
+ msg += f"\nffmpeg stderr: {(stderr or b'').decode(errors='ignore')[:1000]}"
249
+ raise BrokenPipeError(msg)
250
+
251
+ def close(self):
252
+ if self.proc is None:
253
+ return
254
+ try:
255
+ if self.proc.stdin:
256
+ try:
257
+ self.proc.stdin.flush()
258
+ except Exception:
259
+ pass
260
+ try:
261
+ self.proc.stdin.close()
262
+ except Exception:
263
+ pass
264
+ # drain a bit of stderr for logs
265
+ if self.proc.stderr:
266
+ try:
267
+ err = self.proc.stderr.read()
268
+ if err:
269
+ self.log.debug("FFmpeg stderr (tail): %s", err.decode(errors="ignore")[-2000:])
270
+ except Exception:
271
+ pass
272
+ self.proc.wait(timeout=10)
273
+ except Exception:
274
+ try:
275
+ self.proc.kill()
276
+ except Exception:
277
+ pass
278
+ finally:
279
+ self.proc = None
280
+
281
 
282
  class CoreVideoProcessor:
283
  """
 
285
  It relies on a models provider (e.g., ModelLoader) that implements:
286
  - get_sam2()
287
  - get_matanyone()
288
+ and uses utils.cv_processing for the pipeline.
289
 
290
  Supports progress callback and cancellation via stop_event.
291
  """
 
296
  self.models = models # do NOT load here; core/app handles loading
297
  if self.models is None:
298
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
 
299
  self._ffmpeg = shutil.which("ffmpeg")
300
 
301
  # ---------- Single frame ----------
 
319
  newW = int(round(W * scale))
320
  newH = int(round(H * scale))
321
  proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
322
+ self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
323
 
324
+ # RGB for models
325
  proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
326
 
327
  predictor = None
 
331
  except Exception as e:
332
  self.log.warning(f"SAM2 predictor unavailable: {e}")
333
 
334
+ # 1) segmentation (with internal fallbacks)
335
  mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
336
 
337
+ # 2) refinement (MatAnyOne if available)
338
  matanyone = None
339
  try:
340
  if self.models and hasattr(self.models, "get_matanyone"):
 
342
  except Exception as e:
343
  self.log.warning(f"MatAnyOne unavailable: {e}")
344
 
345
+ # IMPORTANT: call order is (frame, mask, matanyone=...)
346
+ mask_small_ref = refine_mask_hq(proc_frame_rgb, mask_small, matanyone=matanyone, use_matanyone=True)
347
 
348
  # Upsample mask back to full-res
349
  if scale != 1.0:
 
351
  else:
352
  mask_full = mask_small_ref.astype(np.float32)
353
 
354
+ # 3) compositing (helpers expect RGB inputs; return RGB)
355
  frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
356
  out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
357
 
358
+ # Convert to BGR for writer
359
  out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
360
  return {"frame": out_bgr, "mask": mask_full}
361
 
 
368
  ) -> np.ndarray:
369
  """
370
  Accepts either:
371
+ - {"custom_path": "/path/to/image.png"} → load image (RGB out)
372
  - {"background_choice": "office"} → preset
373
  - {"gradient": {type,start,end,angle_deg}} → generated gradient
374
  Returns RGB np.uint8
 
386
  # 2) gradient?
387
  if bg_config and isinstance(bg_config.get("gradient"), dict):
388
  try:
389
+ return _create_gradient_background_local(bg_config["gradient"], width, height)
390
  except Exception as e:
391
  self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
392
 
 
403
 
404
  return create_professional_background(choice, width, height) # RGB
405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  # ---------- Full video ----------
407
  def process_video(
408
  self,
 
416
  Process a full video with live progress and optional cancel.
417
  progress_callback(current_frame, total_frames, fps_live)
418
  """
419
+ ok, msg = validate_video_file(input_path)
420
  if not ok:
421
+ raise ValueError(f"Invalid or unreadable video: {msg}")
422
 
423
  cap = cv2.VideoCapture(input_path)
424
  if not cap.isOpened():
 
431
 
432
  fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
433
 
434
+ # Background once (RGB)
435
  background_rgb = self._prepare_background_from_config(bg_config, width, height)
436
 
437
+ # Writer selection
438
+ ffmpeg_pipe: _FFmpegPipe | None = None
439
+ writer: cv2.VideoWriter | None = None
440
+ ffmpeg_failed_reason = None
 
441
 
442
+ if self.config.use_nvenc and self._ffmpeg:
443
+ try:
444
+ ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
445
+ except Exception as e:
446
+ ffmpeg_failed_reason = str(e)
447
+ self.log.warning("FFmpeg NVENC pipeline unavailable. Falling back to OpenCV. Reason: %s", e)
448
+
449
+ if ffmpeg_pipe is None:
450
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
451
  writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
452
  if not writer.isOpened():
453
  cap.release()
454
+ raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
455
 
456
  frame_count = 0
457
  start_time = time.time()
 
468
  # Process single frame
469
  result = self.process_frame(frame_bgr, background_rgb)
470
  out_bgr = result["frame"]
471
+ out_bgr = np.ascontiguousarray(out_bgr) # ensure contiguous for tobytes()
472
 
473
  # Write
474
+ if ffmpeg_pipe is not None:
475
  try:
476
+ ffmpeg_pipe.write(out_bgr)
477
  except Exception as e:
478
+ # Switch to OpenCV writer mid-run and continue
479
+ self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
480
+ try:
481
+ ffmpeg_pipe.close()
482
+ except Exception:
483
+ pass
484
+ ffmpeg_pipe = None
485
+ if writer is None:
486
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
487
+ writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
488
+ if not writer.isOpened():
489
+ raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
490
+ writer.write(out_bgr)
491
  else:
492
  writer.write(out_bgr)
493
 
 
505
  cap.release()
506
  if writer is not None:
507
  writer.release()
508
+ if ffmpeg_pipe is not None:
509
  try:
510
+ ffmpeg_pipe.close()
 
 
511
  except Exception:
512
+ pass
513
+
514
+ if ffmpeg_failed_reason:
515
+ self.log.info("Completed via OpenCV writer (FFmpeg initially failed): %s", ffmpeg_failed_reason)
516
 
517
+ self.log.info("Processed %d frames → %s", frame_count, output_path)
518
  return {
519
  "frames": frame_count,
520
  "width": width,