Update processing/video/video_processor.py
Browse files- processing/video/video_processor.py +273 -82
processing/video/video_processor.py
CHANGED
|
@@ -7,7 +7,8 @@
|
|
| 7 |
{"background_choice": "<preset_key>"}
|
| 8 |
{"gradient": {type, start, end, angle_deg}}
|
| 9 |
- Model-only downscale (max_model_size) for speed, full-res render.
|
| 10 |
-
-
|
|
|
|
| 11 |
|
| 12 |
Requirements for the models provider:
|
| 13 |
- get_sam2() -> predictor or None
|
|
@@ -17,11 +18,12 @@
|
|
| 17 |
from __future__ import annotations
|
| 18 |
|
| 19 |
from dataclasses import dataclass
|
| 20 |
-
from typing import Optional, Dict, Any,
|
| 21 |
import time
|
| 22 |
import threading
|
| 23 |
import shutil
|
| 24 |
import subprocess
|
|
|
|
| 25 |
|
| 26 |
import cv2
|
| 27 |
import numpy as np
|
|
@@ -35,25 +37,61 @@
|
|
| 35 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
| 36 |
_log = logging.getLogger(__name__)
|
| 37 |
|
| 38 |
-
#
|
| 39 |
-
from utils import (
|
| 40 |
segment_person_hq,
|
| 41 |
refine_mask_hq,
|
| 42 |
replace_background_hq,
|
| 43 |
create_professional_background,
|
| 44 |
-
create_gradient_background,
|
| 45 |
validate_video_file,
|
| 46 |
PROFESSIONAL_BACKGROUNDS,
|
| 47 |
)
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
@dataclass
|
| 51 |
class ProcessorConfig:
|
| 52 |
background_preset: str = "office" # key in PROFESSIONAL_BACKGROUNDS
|
| 53 |
write_fps: Optional[float] = None # None -> keep source fps
|
|
|
|
| 54 |
# Model-only downscale (speedup without changing output resolution)
|
| 55 |
max_model_size: Optional[int] = 1280
|
| 56 |
-
|
|
|
|
| 57 |
use_nvenc: bool = True
|
| 58 |
nvenc_codec: str = "h264" # "h264" or "hevc"
|
| 59 |
nvenc_preset: str = "p5" # NVENC preset string
|
|
@@ -61,6 +99,185 @@ class ProcessorConfig:
|
|
| 61 |
nvenc_tune_hq: bool = True
|
| 62 |
nvenc_pix_fmt: str = "yuv420p" # browser-safe
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
class CoreVideoProcessor:
|
| 66 |
"""
|
|
@@ -68,7 +285,7 @@ class CoreVideoProcessor:
|
|
| 68 |
It relies on a models provider (e.g., ModelLoader) that implements:
|
| 69 |
- get_sam2()
|
| 70 |
- get_matanyone()
|
| 71 |
-
and uses utils
|
| 72 |
|
| 73 |
Supports progress callback and cancellation via stop_event.
|
| 74 |
"""
|
|
@@ -79,7 +296,6 @@ def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[An
|
|
| 79 |
self.models = models # do NOT load here; core/app handles loading
|
| 80 |
if self.models is None:
|
| 81 |
self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
|
| 82 |
-
# ffmpeg presence (for NVENC)
|
| 83 |
self._ffmpeg = shutil.which("ffmpeg")
|
| 84 |
|
| 85 |
# ---------- Single frame ----------
|
|
@@ -103,9 +319,9 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
|
|
| 103 |
newW = int(round(W * scale))
|
| 104 |
newH = int(round(H * scale))
|
| 105 |
proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
|
| 106 |
-
self.log.debug(f"Model-only downscale
|
| 107 |
|
| 108 |
-
#
|
| 109 |
proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
|
| 110 |
|
| 111 |
predictor = None
|
|
@@ -115,10 +331,10 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
|
|
| 115 |
except Exception as e:
|
| 116 |
self.log.warning(f"SAM2 predictor unavailable: {e}")
|
| 117 |
|
| 118 |
-
# 1) segmentation (with fallbacks
|
| 119 |
mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
|
| 120 |
|
| 121 |
-
# 2) refinement (MatAnyOne if available
|
| 122 |
matanyone = None
|
| 123 |
try:
|
| 124 |
if self.models and hasattr(self.models, "get_matanyone"):
|
|
@@ -126,7 +342,8 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
|
|
| 126 |
except Exception as e:
|
| 127 |
self.log.warning(f"MatAnyOne unavailable: {e}")
|
| 128 |
|
| 129 |
-
|
|
|
|
| 130 |
|
| 131 |
# Upsample mask back to full-res
|
| 132 |
if scale != 1.0:
|
|
@@ -134,11 +351,11 @@ def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Di
|
|
| 134 |
else:
|
| 135 |
mask_full = mask_small_ref.astype(np.float32)
|
| 136 |
|
| 137 |
-
# 3) compositing (expect RGB
|
| 138 |
frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
| 139 |
out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
|
| 140 |
|
| 141 |
-
# Convert
|
| 142 |
out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
|
| 143 |
return {"frame": out_bgr, "mask": mask_full}
|
| 144 |
|
|
@@ -151,7 +368,7 @@ def _prepare_background_from_config(
|
|
| 151 |
) -> np.ndarray:
|
| 152 |
"""
|
| 153 |
Accepts either:
|
| 154 |
-
- {"custom_path": "/path/to/image.png"} → load image
|
| 155 |
- {"background_choice": "office"} → preset
|
| 156 |
- {"gradient": {type,start,end,angle_deg}} → generated gradient
|
| 157 |
Returns RGB np.uint8
|
|
@@ -169,7 +386,7 @@ def _prepare_background_from_config(
|
|
| 169 |
# 2) gradient?
|
| 170 |
if bg_config and isinstance(bg_config.get("gradient"), dict):
|
| 171 |
try:
|
| 172 |
-
return
|
| 173 |
except Exception as e:
|
| 174 |
self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
|
| 175 |
|
|
@@ -186,44 +403,6 @@ def _prepare_background_from_config(
|
|
| 186 |
|
| 187 |
return create_professional_background(choice, width, height) # RGB
|
| 188 |
|
| 189 |
-
# ---------- Writers ----------
|
| 190 |
-
def _open_writer_ffmpeg_nvenc(self, output_path: str, fps: float, width: int, height: int):
|
| 191 |
-
"""
|
| 192 |
-
Open an ffmpeg NVENC pipe that accepts raw BGR frames via stdin.
|
| 193 |
-
"""
|
| 194 |
-
if not self._ffmpeg:
|
| 195 |
-
return None
|
| 196 |
-
|
| 197 |
-
vcodec = "h264_nvenc" if self.config.nvenc_codec.lower() == "h264" else "hevc_nvenc"
|
| 198 |
-
preset = self.config.nvenc_preset
|
| 199 |
-
tune = ["-tune", "hq"] if self.config.nvenc_tune_hq else []
|
| 200 |
-
cq = ["-cq", str(int(self.config.nvenc_cq))]
|
| 201 |
-
pixfmt = self.config.nvenc_pix_fmt # usually yuv420p for web
|
| 202 |
-
|
| 203 |
-
cmd = [
|
| 204 |
-
self._ffmpeg, "-hide_banner", "-loglevel", "error",
|
| 205 |
-
"-f", "rawvideo",
|
| 206 |
-
"-pix_fmt", "bgr24", # we feed BGR frames directly
|
| 207 |
-
"-s", f"{width}x{height}",
|
| 208 |
-
"-r", f"{fps:.6f}",
|
| 209 |
-
"-i", "-", # stdin
|
| 210 |
-
"-an",
|
| 211 |
-
"-c:v", vcodec,
|
| 212 |
-
"-preset", preset,
|
| 213 |
-
*tune,
|
| 214 |
-
*cq,
|
| 215 |
-
"-pix_fmt", pixfmt,
|
| 216 |
-
"-movflags", "+faststart",
|
| 217 |
-
"-y", output_path,
|
| 218 |
-
]
|
| 219 |
-
self.log.info(f"Using NVENC ({self.config.nvenc_codec}) via ffmpeg.")
|
| 220 |
-
try:
|
| 221 |
-
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE)
|
| 222 |
-
return proc
|
| 223 |
-
except Exception as e:
|
| 224 |
-
self.log.warning(f"ffmpeg NVENC open failed: {e}")
|
| 225 |
-
return None
|
| 226 |
-
|
| 227 |
# ---------- Full video ----------
|
| 228 |
def process_video(
|
| 229 |
self,
|
|
@@ -237,9 +416,9 @@ def process_video(
|
|
| 237 |
Process a full video with live progress and optional cancel.
|
| 238 |
progress_callback(current_frame, total_frames, fps_live)
|
| 239 |
"""
|
| 240 |
-
ok = validate_video_file(input_path)
|
| 241 |
if not ok:
|
| 242 |
-
raise ValueError("Invalid or unreadable video")
|
| 243 |
|
| 244 |
cap = cv2.VideoCapture(input_path)
|
| 245 |
if not cap.isOpened():
|
|
@@ -252,22 +431,27 @@ def process_video(
|
|
| 252 |
|
| 253 |
fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
|
| 254 |
|
| 255 |
-
#
|
| 256 |
background_rgb = self._prepare_background_from_config(bg_config, width, height)
|
| 257 |
|
| 258 |
-
# Writer
|
| 259 |
-
|
| 260 |
-
writer = None
|
| 261 |
-
|
| 262 |
-
ffmpeg_proc = self._open_writer_ffmpeg_nvenc(output_path, float(fps_out), width, height)
|
| 263 |
|
| 264 |
-
if
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 267 |
writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
|
| 268 |
if not writer.isOpened():
|
| 269 |
cap.release()
|
| 270 |
-
raise RuntimeError(f"Could not open
|
| 271 |
|
| 272 |
frame_count = 0
|
| 273 |
start_time = time.time()
|
|
@@ -284,17 +468,26 @@ def process_video(
|
|
| 284 |
# Process single frame
|
| 285 |
result = self.process_frame(frame_bgr, background_rgb)
|
| 286 |
out_bgr = result["frame"]
|
|
|
|
| 287 |
|
| 288 |
# Write
|
| 289 |
-
if
|
| 290 |
try:
|
| 291 |
-
|
| 292 |
except Exception as e:
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
else:
|
| 299 |
writer.write(out_bgr)
|
| 300 |
|
|
@@ -312,18 +505,16 @@ def process_video(
|
|
| 312 |
cap.release()
|
| 313 |
if writer is not None:
|
| 314 |
writer.release()
|
| 315 |
-
if
|
| 316 |
try:
|
| 317 |
-
|
| 318 |
-
ffmpeg_proc.stdin.close()
|
| 319 |
-
ffmpeg_proc.wait(timeout=10)
|
| 320 |
except Exception:
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
|
| 326 |
-
self.log.info(
|
| 327 |
return {
|
| 328 |
"frames": frame_count,
|
| 329 |
"width": width,
|
|
|
|
| 7 |
{"background_choice": "<preset_key>"}
|
| 8 |
{"gradient": {type, start, end, angle_deg}}
|
| 9 |
- Model-only downscale (max_model_size) for speed, full-res render.
|
| 10 |
+
- FFmpeg pipe writer with encoder fallbacks and stderr surfacing; falls back
|
| 11 |
+
to OpenCV VideoWriter if FFmpeg isn't available or fails mid-run.
|
| 12 |
|
| 13 |
Requirements for the models provider:
|
| 14 |
- get_sam2() -> predictor or None
|
|
|
|
| 18 |
from __future__ import annotations
|
| 19 |
|
| 20 |
from dataclasses import dataclass
|
| 21 |
+
from typing import Optional, Dict, Any, Callable
|
| 22 |
import time
|
| 23 |
import threading
|
| 24 |
import shutil
|
| 25 |
import subprocess
|
| 26 |
+
import shlex
|
| 27 |
|
| 28 |
import cv2
|
| 29 |
import numpy as np
|
|
|
|
| 37 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
| 38 |
_log = logging.getLogger(__name__)
|
| 39 |
|
| 40 |
+
# Import directly from utils.cv_processing to avoid circular imports via utils/__init__.py
|
| 41 |
+
from utils.cv_processing import (
|
| 42 |
segment_person_hq,
|
| 43 |
refine_mask_hq,
|
| 44 |
replace_background_hq,
|
| 45 |
create_professional_background,
|
|
|
|
| 46 |
validate_video_file,
|
| 47 |
PROFESSIONAL_BACKGROUNDS,
|
| 48 |
)
|
| 49 |
|
| 50 |
+
# ---------- local gradient helper (no extra imports needed) ----------
|
| 51 |
+
def _to_rgb(c):
|
| 52 |
+
if isinstance(c, (list, tuple)) and len(c) == 3:
|
| 53 |
+
return tuple(int(x) for x in c)
|
| 54 |
+
if isinstance(c, str) and c.startswith("#") and len(c) == 7:
|
| 55 |
+
return tuple(int(c[i:i+2], 16) for i in (1, 3, 5))
|
| 56 |
+
return (255, 255, 255)
|
| 57 |
+
|
| 58 |
+
def _create_gradient_background_local(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
|
| 59 |
+
"""
|
| 60 |
+
Minimal gradient generator for backgrounds (linear with rotation).
|
| 61 |
+
spec = {"type": "linear"|"radial"(ignored), "start": (r,g,b)|"#rrggbb", "end": ..., "angle_deg": float}
|
| 62 |
+
Returns RGB np.uint8 (H,W,3)
|
| 63 |
+
"""
|
| 64 |
+
start = _to_rgb(spec.get("start", "#222222"))
|
| 65 |
+
end = _to_rgb(spec.get("end", "#888888"))
|
| 66 |
+
angle = float(spec.get("angle_deg", 0))
|
| 67 |
+
|
| 68 |
+
# build vertical gradient
|
| 69 |
+
bg = np.zeros((height, width, 3), np.uint8)
|
| 70 |
+
for y in range(height):
|
| 71 |
+
t = y / max(1, height - 1)
|
| 72 |
+
r = int(start[0]*(1-t) + end[0]*t)
|
| 73 |
+
g = int(start[1]*(1-t) + end[1]*t)
|
| 74 |
+
b = int(start[2]*(1-t) + end[2]*t)
|
| 75 |
+
bg[y, :] = (r, g, b)
|
| 76 |
+
|
| 77 |
+
if abs(angle) % 360 < 1e-6:
|
| 78 |
+
return bg
|
| 79 |
+
|
| 80 |
+
# rotate by angle using OpenCV (RGB-safe)
|
| 81 |
+
center = (width / 2, height / 2)
|
| 82 |
+
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
| 83 |
+
rot = cv2.warpAffine(bg, M, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
|
| 84 |
+
return rot
|
| 85 |
|
| 86 |
@dataclass
|
| 87 |
class ProcessorConfig:
|
| 88 |
background_preset: str = "office" # key in PROFESSIONAL_BACKGROUNDS
|
| 89 |
write_fps: Optional[float] = None # None -> keep source fps
|
| 90 |
+
|
| 91 |
# Model-only downscale (speedup without changing output resolution)
|
| 92 |
max_model_size: Optional[int] = 1280
|
| 93 |
+
|
| 94 |
+
# FFmpeg / NVENC output (pipe). If disabled or unavailable, use OpenCV writer.
|
| 95 |
use_nvenc: bool = True
|
| 96 |
nvenc_codec: str = "h264" # "h264" or "hevc"
|
| 97 |
nvenc_preset: str = "p5" # NVENC preset string
|
|
|
|
| 99 |
nvenc_tune_hq: bool = True
|
| 100 |
nvenc_pix_fmt: str = "yuv420p" # browser-safe
|
| 101 |
|
| 102 |
+
# libx264 fallback
|
| 103 |
+
x264_preset: str = "medium"
|
| 104 |
+
x264_crf: int = 18
|
| 105 |
+
x264_pix_fmt: str = "yuv420p"
|
| 106 |
+
|
| 107 |
+
movflags_faststart: bool = True
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class _FFmpegPipe:
|
| 111 |
+
"""
|
| 112 |
+
Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
def __init__(self, width: int, height: int, fps: float, out_path: str, cfg: ProcessorConfig, log=_log):
|
| 116 |
+
self.width = int(width)
|
| 117 |
+
self.height = int(height)
|
| 118 |
+
self.fps = float(fps) if fps and fps > 0 else 25.0
|
| 119 |
+
self.out_path = out_path
|
| 120 |
+
self.cfg = cfg
|
| 121 |
+
self.log = log
|
| 122 |
+
|
| 123 |
+
self.proc: Optional[subprocess.Popen] = None
|
| 124 |
+
self.encoder_used: Optional[str] = None
|
| 125 |
+
self._stderr: bytes | None = None
|
| 126 |
+
|
| 127 |
+
self._ffmpeg = shutil.which("ffmpeg")
|
| 128 |
+
if not self._ffmpeg:
|
| 129 |
+
raise RuntimeError("ffmpeg not found on PATH")
|
| 130 |
+
|
| 131 |
+
self._start_with_fallbacks()
|
| 132 |
+
|
| 133 |
+
def _cmd_for_encoder(self, encoder: str) -> list[str]:
|
| 134 |
+
base = [
|
| 135 |
+
self._ffmpeg,
|
| 136 |
+
"-hide_banner", "-loglevel", "error",
|
| 137 |
+
"-y",
|
| 138 |
+
# rawvideo input from stdin
|
| 139 |
+
"-f", "rawvideo",
|
| 140 |
+
"-vcodec", "rawvideo",
|
| 141 |
+
"-pix_fmt", "bgr24",
|
| 142 |
+
"-s", f"{self.width}x{self.height}",
|
| 143 |
+
"-r", f"{self.fps}",
|
| 144 |
+
"-i", "-", # stdin
|
| 145 |
+
"-an", # no audio here
|
| 146 |
+
]
|
| 147 |
+
if self.cfg.movflags_faststart:
|
| 148 |
+
base += ["-movflags", "+faststart"]
|
| 149 |
+
|
| 150 |
+
if encoder == "h264_nvenc":
|
| 151 |
+
base += [
|
| 152 |
+
"-c:v", "h264_nvenc",
|
| 153 |
+
"-preset", self.cfg.nvenc_preset,
|
| 154 |
+
"-cq", str(int(self.cfg.nvenc_cq)),
|
| 155 |
+
"-pix_fmt", self.cfg.nvenc_pix_fmt,
|
| 156 |
+
]
|
| 157 |
+
if self.cfg.nvenc_tune_hq:
|
| 158 |
+
base += ["-tune", "hq"]
|
| 159 |
+
elif encoder == "hevc_nvenc":
|
| 160 |
+
base += [
|
| 161 |
+
"-c:v", "hevc_nvenc",
|
| 162 |
+
"-preset", self.cfg.nvenc_preset,
|
| 163 |
+
"-cq", str(int(self.cfg.nvenc_cq)),
|
| 164 |
+
"-pix_fmt", self.cfg.nvenc_pix_fmt,
|
| 165 |
+
]
|
| 166 |
+
if self.cfg.nvenc_tune_hq:
|
| 167 |
+
base += ["-tune", "hq"]
|
| 168 |
+
elif encoder == "libx264":
|
| 169 |
+
base += [
|
| 170 |
+
"-c:v", "libx264",
|
| 171 |
+
"-preset", self.cfg.x264_preset,
|
| 172 |
+
"-crf", str(int(self.cfg.x264_crf)),
|
| 173 |
+
"-pix_fmt", self.cfg.x264_pix_fmt,
|
| 174 |
+
]
|
| 175 |
+
elif encoder == "mpeg4":
|
| 176 |
+
base += [
|
| 177 |
+
"-c:v", "mpeg4",
|
| 178 |
+
"-q:v", "2",
|
| 179 |
+
"-pix_fmt", "yuv420p",
|
| 180 |
+
]
|
| 181 |
+
else:
|
| 182 |
+
base += ["-c:v", "libx264", "-preset", self.cfg.x264_preset, "-crf", str(int(self.cfg.x264_crf)), "-pix_fmt", self.cfg.x264_pix_fmt]
|
| 183 |
+
|
| 184 |
+
base += [self.out_path]
|
| 185 |
+
return base
|
| 186 |
+
|
| 187 |
+
def _try_start(self, enc: str) -> bool:
|
| 188 |
+
cmd = self._cmd_for_encoder(enc)
|
| 189 |
+
try:
|
| 190 |
+
self.proc = subprocess.Popen(
|
| 191 |
+
cmd,
|
| 192 |
+
stdin=subprocess.PIPE,
|
| 193 |
+
stderr=subprocess.PIPE,
|
| 194 |
+
bufsize=10**7,
|
| 195 |
+
)
|
| 196 |
+
self.encoder_used = enc
|
| 197 |
+
self.log.info("FFmpeg started: %s", " ".join(shlex.quote(c) for c in cmd))
|
| 198 |
+
# quick poll: if ffmpeg dies immediately, fail fast
|
| 199 |
+
time.sleep(0.05)
|
| 200 |
+
if self.proc.poll() is not None:
|
| 201 |
+
self._stderr = self.proc.stderr.read() if self.proc.stderr else b""
|
| 202 |
+
self.log.warning("FFmpeg exited on start with %s: %s", enc, (self._stderr or b"").decode(errors="ignore"))
|
| 203 |
+
self.proc = None
|
| 204 |
+
return False
|
| 205 |
+
return True
|
| 206 |
+
except Exception as e:
|
| 207 |
+
self.log.warning("Failed to start FFmpeg with %s: %s", enc, e)
|
| 208 |
+
self.proc = None
|
| 209 |
+
return False
|
| 210 |
+
|
| 211 |
+
def _start_with_fallbacks(self):
|
| 212 |
+
encoders = []
|
| 213 |
+
if self.cfg.use_nvenc:
|
| 214 |
+
encoders += ["h264_nvenc"] if self.cfg.nvenc_codec.lower() == "h264" else ["hevc_nvenc"]
|
| 215 |
+
encoders += ["libx264", "mpeg4"]
|
| 216 |
+
for enc in encoders:
|
| 217 |
+
if self._try_start(enc):
|
| 218 |
+
return
|
| 219 |
+
msg = "Could not start FFmpeg with any encoder (nvenc/libx264/mpeg4). Is ffmpeg present and codecs available?"
|
| 220 |
+
if self._stderr:
|
| 221 |
+
msg += f" Stderr: {(self._stderr or b'').decode(errors='ignore')[:500]}"
|
| 222 |
+
raise RuntimeError(msg)
|
| 223 |
+
|
| 224 |
+
def write(self, frame_bgr: np.ndarray):
|
| 225 |
+
if self.proc is None or self.proc.stdin is None:
|
| 226 |
+
raise RuntimeError("FFmpeg process is not running (stdin is None).")
|
| 227 |
+
if not isinstance(frame_bgr, np.ndarray) or frame_bgr.dtype != np.uint8:
|
| 228 |
+
raise ValueError("Frame must be a np.ndarray of dtype uint8.")
|
| 229 |
+
if frame_bgr.ndim != 3 or frame_bgr.shape[2] != 3:
|
| 230 |
+
raise ValueError("Frame must have shape (H, W, 3).")
|
| 231 |
+
if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
|
| 232 |
+
raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
|
| 233 |
+
|
| 234 |
+
# ensure contiguous for tobytes()
|
| 235 |
+
frame_bgr = np.ascontiguousarray(frame_bgr)
|
| 236 |
+
try:
|
| 237 |
+
self.proc.stdin.write(frame_bgr.tobytes())
|
| 238 |
+
except Exception as e:
|
| 239 |
+
# collect stderr for diagnostics
|
| 240 |
+
stderr = b""
|
| 241 |
+
try:
|
| 242 |
+
if self.proc and self.proc.stderr:
|
| 243 |
+
stderr = self.proc.stderr.read()
|
| 244 |
+
except Exception:
|
| 245 |
+
pass
|
| 246 |
+
msg = f"FFmpeg pipe write failed: {e}"
|
| 247 |
+
if stderr:
|
| 248 |
+
msg += f"\nffmpeg stderr: {(stderr or b'').decode(errors='ignore')[:1000]}"
|
| 249 |
+
raise BrokenPipeError(msg)
|
| 250 |
+
|
| 251 |
+
def close(self):
|
| 252 |
+
if self.proc is None:
|
| 253 |
+
return
|
| 254 |
+
try:
|
| 255 |
+
if self.proc.stdin:
|
| 256 |
+
try:
|
| 257 |
+
self.proc.stdin.flush()
|
| 258 |
+
except Exception:
|
| 259 |
+
pass
|
| 260 |
+
try:
|
| 261 |
+
self.proc.stdin.close()
|
| 262 |
+
except Exception:
|
| 263 |
+
pass
|
| 264 |
+
# drain a bit of stderr for logs
|
| 265 |
+
if self.proc.stderr:
|
| 266 |
+
try:
|
| 267 |
+
err = self.proc.stderr.read()
|
| 268 |
+
if err:
|
| 269 |
+
self.log.debug("FFmpeg stderr (tail): %s", err.decode(errors="ignore")[-2000:])
|
| 270 |
+
except Exception:
|
| 271 |
+
pass
|
| 272 |
+
self.proc.wait(timeout=10)
|
| 273 |
+
except Exception:
|
| 274 |
+
try:
|
| 275 |
+
self.proc.kill()
|
| 276 |
+
except Exception:
|
| 277 |
+
pass
|
| 278 |
+
finally:
|
| 279 |
+
self.proc = None
|
| 280 |
+
|
| 281 |
|
| 282 |
class CoreVideoProcessor:
|
| 283 |
"""
|
|
|
|
| 285 |
It relies on a models provider (e.g., ModelLoader) that implements:
|
| 286 |
- get_sam2()
|
| 287 |
- get_matanyone()
|
| 288 |
+
and uses utils.cv_processing for the pipeline.
|
| 289 |
|
| 290 |
Supports progress callback and cancellation via stop_event.
|
| 291 |
"""
|
|
|
|
| 296 |
self.models = models # do NOT load here; core/app handles loading
|
| 297 |
if self.models is None:
|
| 298 |
self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
|
|
|
|
| 299 |
self._ffmpeg = shutil.which("ffmpeg")
|
| 300 |
|
| 301 |
# ---------- Single frame ----------
|
|
|
|
| 319 |
newW = int(round(W * scale))
|
| 320 |
newH = int(round(H * scale))
|
| 321 |
proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
|
| 322 |
+
self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
|
| 323 |
|
| 324 |
+
# RGB for models
|
| 325 |
proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
|
| 326 |
|
| 327 |
predictor = None
|
|
|
|
| 331 |
except Exception as e:
|
| 332 |
self.log.warning(f"SAM2 predictor unavailable: {e}")
|
| 333 |
|
| 334 |
+
# 1) segmentation (with internal fallbacks)
|
| 335 |
mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
|
| 336 |
|
| 337 |
+
# 2) refinement (MatAnyOne if available)
|
| 338 |
matanyone = None
|
| 339 |
try:
|
| 340 |
if self.models and hasattr(self.models, "get_matanyone"):
|
|
|
|
| 342 |
except Exception as e:
|
| 343 |
self.log.warning(f"MatAnyOne unavailable: {e}")
|
| 344 |
|
| 345 |
+
# IMPORTANT: call order is (frame, mask, matanyone=...)
|
| 346 |
+
mask_small_ref = refine_mask_hq(proc_frame_rgb, mask_small, matanyone=matanyone, use_matanyone=True)
|
| 347 |
|
| 348 |
# Upsample mask back to full-res
|
| 349 |
if scale != 1.0:
|
|
|
|
| 351 |
else:
|
| 352 |
mask_full = mask_small_ref.astype(np.float32)
|
| 353 |
|
| 354 |
+
# 3) compositing (helpers expect RGB inputs; return RGB)
|
| 355 |
frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
|
| 356 |
out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
|
| 357 |
|
| 358 |
+
# Convert to BGR for writer
|
| 359 |
out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
|
| 360 |
return {"frame": out_bgr, "mask": mask_full}
|
| 361 |
|
|
|
|
| 368 |
) -> np.ndarray:
|
| 369 |
"""
|
| 370 |
Accepts either:
|
| 371 |
+
- {"custom_path": "/path/to/image.png"} → load image (RGB out)
|
| 372 |
- {"background_choice": "office"} → preset
|
| 373 |
- {"gradient": {type,start,end,angle_deg}} → generated gradient
|
| 374 |
Returns RGB np.uint8
|
|
|
|
| 386 |
# 2) gradient?
|
| 387 |
if bg_config and isinstance(bg_config.get("gradient"), dict):
|
| 388 |
try:
|
| 389 |
+
return _create_gradient_background_local(bg_config["gradient"], width, height)
|
| 390 |
except Exception as e:
|
| 391 |
self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
|
| 392 |
|
|
|
|
| 403 |
|
| 404 |
return create_professional_background(choice, width, height) # RGB
|
| 405 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
# ---------- Full video ----------
|
| 407 |
def process_video(
|
| 408 |
self,
|
|
|
|
| 416 |
Process a full video with live progress and optional cancel.
|
| 417 |
progress_callback(current_frame, total_frames, fps_live)
|
| 418 |
"""
|
| 419 |
+
ok, msg = validate_video_file(input_path)
|
| 420 |
if not ok:
|
| 421 |
+
raise ValueError(f"Invalid or unreadable video: {msg}")
|
| 422 |
|
| 423 |
cap = cv2.VideoCapture(input_path)
|
| 424 |
if not cap.isOpened():
|
|
|
|
| 431 |
|
| 432 |
fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
|
| 433 |
|
| 434 |
+
# Background once (RGB)
|
| 435 |
background_rgb = self._prepare_background_from_config(bg_config, width, height)
|
| 436 |
|
| 437 |
+
# Writer selection
|
| 438 |
+
ffmpeg_pipe: _FFmpegPipe | None = None
|
| 439 |
+
writer: cv2.VideoWriter | None = None
|
| 440 |
+
ffmpeg_failed_reason = None
|
|
|
|
| 441 |
|
| 442 |
+
if self.config.use_nvenc and self._ffmpeg:
|
| 443 |
+
try:
|
| 444 |
+
ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
|
| 445 |
+
except Exception as e:
|
| 446 |
+
ffmpeg_failed_reason = str(e)
|
| 447 |
+
self.log.warning("FFmpeg NVENC pipeline unavailable. Falling back to OpenCV. Reason: %s", e)
|
| 448 |
+
|
| 449 |
+
if ffmpeg_pipe is None:
|
| 450 |
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 451 |
writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
|
| 452 |
if not writer.isOpened():
|
| 453 |
cap.release()
|
| 454 |
+
raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
|
| 455 |
|
| 456 |
frame_count = 0
|
| 457 |
start_time = time.time()
|
|
|
|
| 468 |
# Process single frame
|
| 469 |
result = self.process_frame(frame_bgr, background_rgb)
|
| 470 |
out_bgr = result["frame"]
|
| 471 |
+
out_bgr = np.ascontiguousarray(out_bgr) # ensure contiguous for tobytes()
|
| 472 |
|
| 473 |
# Write
|
| 474 |
+
if ffmpeg_pipe is not None:
|
| 475 |
try:
|
| 476 |
+
ffmpeg_pipe.write(out_bgr)
|
| 477 |
except Exception as e:
|
| 478 |
+
# Switch to OpenCV writer mid-run and continue
|
| 479 |
+
self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
|
| 480 |
+
try:
|
| 481 |
+
ffmpeg_pipe.close()
|
| 482 |
+
except Exception:
|
| 483 |
+
pass
|
| 484 |
+
ffmpeg_pipe = None
|
| 485 |
+
if writer is None:
|
| 486 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 487 |
+
writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
|
| 488 |
+
if not writer.isOpened():
|
| 489 |
+
raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
|
| 490 |
+
writer.write(out_bgr)
|
| 491 |
else:
|
| 492 |
writer.write(out_bgr)
|
| 493 |
|
|
|
|
| 505 |
cap.release()
|
| 506 |
if writer is not None:
|
| 507 |
writer.release()
|
| 508 |
+
if ffmpeg_pipe is not None:
|
| 509 |
try:
|
| 510 |
+
ffmpeg_pipe.close()
|
|
|
|
|
|
|
| 511 |
except Exception:
|
| 512 |
+
pass
|
| 513 |
+
|
| 514 |
+
if ffmpeg_failed_reason:
|
| 515 |
+
self.log.info("Completed via OpenCV writer (FFmpeg initially failed): %s", ffmpeg_failed_reason)
|
| 516 |
|
| 517 |
+
self.log.info("Processed %d frames → %s", frame_count, output_path)
|
| 518 |
return {
|
| 519 |
"frames": frame_count,
|
| 520 |
"width": width,
|