Update processing/two_stage/matanyone_adapter.py
Browse files
processing/two_stage/matanyone_adapter.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
| 2 |
# processing/two_stage/matanyone_adapter.py
|
| 3 |
"""
|
| 4 |
MatAnyone Adapter - robust interface handler for InferenceCore.
|
|
|
|
| 5 |
|
| 6 |
Priority:
|
| 7 |
1) Use process_video(...) when available.
|
|
@@ -232,6 +233,8 @@ def run_matanyone_with_reference_mask(
|
|
| 232 |
) -> str:
|
| 233 |
"""
|
| 234 |
Run MatAnyone with a reference mask/trimap and return a directory with alpha_*.png frames.
|
|
|
|
|
|
|
| 235 |
"""
|
| 236 |
_ensure_dir(output_dir)
|
| 237 |
|
|
@@ -251,22 +254,55 @@ def run_matanyone_with_reference_mask(
|
|
| 251 |
"mask_path": reference_mask_path, # we pass our trimap here
|
| 252 |
"output_path": out_mp4,
|
| 253 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
# Optional params if present (provide safe defaults)
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
if "r_erode" in params: kwargs["r_erode"] = 0
|
| 257 |
if "r_dilate" in params: kwargs["r_dilate"] = 0
|
| 258 |
if "suffix" in params: kwargs["suffix"] = "_pha.mp4"
|
| 259 |
if "save_image" in params: kwargs["save_image"] = True # also dump frames if supported
|
| 260 |
if "max_size" in params: kwargs["max_size"] = 2048
|
| 261 |
|
| 262 |
-
logger.info(f"Calling process_video with
|
|
|
|
|
|
|
|
|
|
| 263 |
proc(**kwargs)
|
| 264 |
|
| 265 |
-
#
|
| 266 |
produced = _detect_alpha_artifact(output_dir)
|
| 267 |
if produced and os.path.isdir(produced):
|
|
|
|
| 268 |
return produced
|
| 269 |
if os.path.exists(out_mp4):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
alpha_dir = os.path.join(output_dir, "alpha_png")
|
| 271 |
return _mp4_to_alpha_png(out_mp4, alpha_dir)
|
| 272 |
|
|
@@ -284,6 +320,9 @@ def run_matanyone_with_reference_mask(
|
|
| 284 |
argmap[p] = output_dir
|
| 285 |
else:
|
| 286 |
argmap[p] = os.path.join(output_dir, "pha.mp4")
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
has_video = any(v == video_path for v in argmap.values())
|
| 289 |
has_mask = any(v == reference_mask_path for v in argmap.values())
|
|
@@ -296,6 +335,15 @@ def run_matanyone_with_reference_mask(
|
|
| 296 |
if produced and os.path.isdir(produced):
|
| 297 |
return produced
|
| 298 |
if produced and produced.lower().endswith(".mp4"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
alpha_dir = os.path.join(output_dir, "alpha_png")
|
| 300 |
return _mp4_to_alpha_png(produced, alpha_dir)
|
| 301 |
# If we set a specific mp4 path, try that
|
|
@@ -308,14 +356,19 @@ def run_matanyone_with_reference_mask(
|
|
| 308 |
logger.warning(f"process_video failed; falling back to step(): {e}")
|
| 309 |
|
| 310 |
# 2) Fallback: step-based pipeline (initialize with mask, then step(image)…)
|
| 311 |
-
logger.info("Using step-based pipeline: initialize (via mask) → step(image)…")
|
| 312 |
|
| 313 |
cap = cv2.VideoCapture(video_path)
|
| 314 |
if not cap.isOpened():
|
| 315 |
raise ValueError(f"Cannot open video: {video_path}")
|
|
|
|
| 316 |
fps = fps_override or cap.get(cv2.CAP_PROP_FPS) or 25
|
| 317 |
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 318 |
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
ok, frame0 = cap.read()
|
| 321 |
if not ok:
|
|
@@ -351,17 +404,28 @@ def run_matanyone_with_reference_mask(
|
|
| 351 |
except Exception:
|
| 352 |
pass
|
| 353 |
|
|
|
|
| 354 |
i = 1
|
| 355 |
while True:
|
| 356 |
ok, frame = cap.read()
|
| 357 |
if not ok:
|
| 358 |
break
|
|
|
|
| 359 |
img = _to_chw01(frame)
|
| 360 |
out = getattr(inference_core, "step")(img)
|
| 361 |
alpha_i = _prob_to_alpha01(inference_core, out)
|
| 362 |
_write_alpha_png(alpha_i, os.path.join(alpha_dir, f"alpha_{i:06d}.png"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
i += 1
|
| 364 |
|
| 365 |
cap.release()
|
| 366 |
-
|
| 367 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
# processing/two_stage/matanyone_adapter.py
|
| 3 |
"""
|
| 4 |
MatAnyone Adapter - robust interface handler for InferenceCore.
|
| 5 |
+
FIXED: Removed n_warmup limitation that was causing 5-second truncation.
|
| 6 |
|
| 7 |
Priority:
|
| 8 |
1) Use process_video(...) when available.
|
|
|
|
| 233 |
) -> str:
|
| 234 |
"""
|
| 235 |
Run MatAnyone with a reference mask/trimap and return a directory with alpha_*.png frames.
|
| 236 |
+
|
| 237 |
+
FIXED: Removed n_warmup=5 limitation that was truncating videos to 5 seconds.
|
| 238 |
"""
|
| 239 |
_ensure_dir(output_dir)
|
| 240 |
|
|
|
|
| 254 |
"mask_path": reference_mask_path, # we pass our trimap here
|
| 255 |
"output_path": out_mp4,
|
| 256 |
}
|
| 257 |
+
|
| 258 |
+
# FIX: Don't limit n_warmup to 5!
|
| 259 |
+
# n_warmup likely means "number of warmup frames" or "warmup seconds"
|
| 260 |
+
# Setting it to 5 was causing only 5 seconds to be processed
|
| 261 |
+
|
| 262 |
# Optional params if present (provide safe defaults)
|
| 263 |
+
# EITHER: Don't set n_warmup at all (let MatAnyone use its default)
|
| 264 |
+
# OR: Set it to 0 to disable warmup
|
| 265 |
+
# OR: Set it to a small number of frames (10-30) not seconds
|
| 266 |
+
|
| 267 |
+
# Option 1: Don't set n_warmup at all - let MatAnyone decide
|
| 268 |
+
# if "n_warmup" in params:
|
| 269 |
+
# pass # Don't set it
|
| 270 |
+
|
| 271 |
+
# Option 2: Disable warmup entirely
|
| 272 |
+
if "n_warmup" in params:
|
| 273 |
+
kwargs["n_warmup"] = 0 # Disable warmup
|
| 274 |
+
|
| 275 |
+
# Option 3: If it's frames, use a reasonable number
|
| 276 |
+
# if "n_warmup" in params:
|
| 277 |
+
# kwargs["n_warmup"] = 10 # 10 frames, not seconds
|
| 278 |
+
|
| 279 |
if "r_erode" in params: kwargs["r_erode"] = 0
|
| 280 |
if "r_dilate" in params: kwargs["r_dilate"] = 0
|
| 281 |
if "suffix" in params: kwargs["suffix"] = "_pha.mp4"
|
| 282 |
if "save_image" in params: kwargs["save_image"] = True # also dump frames if supported
|
| 283 |
if "max_size" in params: kwargs["max_size"] = 2048
|
| 284 |
|
| 285 |
+
logger.info(f"Calling process_video with kwargs: {kwargs}")
|
| 286 |
+
logger.info(f"NOTE: n_warmup set to {kwargs.get('n_warmup', 'default')} to process full video")
|
| 287 |
+
|
| 288 |
+
# Call MatAnyone
|
| 289 |
proc(**kwargs)
|
| 290 |
|
| 291 |
+
# Check what was produced
|
| 292 |
produced = _detect_alpha_artifact(output_dir)
|
| 293 |
if produced and os.path.isdir(produced):
|
| 294 |
+
logger.info(f"MatAnyone produced PNG directory: {produced}")
|
| 295 |
return produced
|
| 296 |
if os.path.exists(out_mp4):
|
| 297 |
+
# Check duration of the output
|
| 298 |
+
cap = cv2.VideoCapture(out_mp4)
|
| 299 |
+
if cap.isOpened():
|
| 300 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 301 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 302 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 303 |
+
cap.release()
|
| 304 |
+
logger.info(f"MatAnyone output: {frame_count} frames, {duration:.1f} seconds")
|
| 305 |
+
|
| 306 |
alpha_dir = os.path.join(output_dir, "alpha_png")
|
| 307 |
return _mp4_to_alpha_png(out_mp4, alpha_dir)
|
| 308 |
|
|
|
|
| 320 |
argmap[p] = output_dir
|
| 321 |
else:
|
| 322 |
argmap[p] = os.path.join(output_dir, "pha.mp4")
|
| 323 |
+
# FIX: Handle n_warmup in generic mapping too
|
| 324 |
+
elif lp == "n_warmup":
|
| 325 |
+
argmap[p] = 0 # Disable warmup
|
| 326 |
|
| 327 |
has_video = any(v == video_path for v in argmap.values())
|
| 328 |
has_mask = any(v == reference_mask_path for v in argmap.values())
|
|
|
|
| 335 |
if produced and os.path.isdir(produced):
|
| 336 |
return produced
|
| 337 |
if produced and produced.lower().endswith(".mp4"):
|
| 338 |
+
# Log duration for debugging
|
| 339 |
+
cap = cv2.VideoCapture(produced)
|
| 340 |
+
if cap.isOpened():
|
| 341 |
+
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 342 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 343 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 344 |
+
cap.release()
|
| 345 |
+
logger.info(f"MatAnyone output duration: {duration:.1f} seconds")
|
| 346 |
+
|
| 347 |
alpha_dir = os.path.join(output_dir, "alpha_png")
|
| 348 |
return _mp4_to_alpha_png(produced, alpha_dir)
|
| 349 |
# If we set a specific mp4 path, try that
|
|
|
|
| 356 |
logger.warning(f"process_video failed; falling back to step(): {e}")
|
| 357 |
|
| 358 |
# 2) Fallback: step-based pipeline (initialize with mask, then step(image)…)
|
| 359 |
+
logger.info("Using step-based pipeline: initialize (via mask) → step(image)… for FULL video")
|
| 360 |
|
| 361 |
cap = cv2.VideoCapture(video_path)
|
| 362 |
if not cap.isOpened():
|
| 363 |
raise ValueError(f"Cannot open video: {video_path}")
|
| 364 |
+
|
| 365 |
fps = fps_override or cap.get(cv2.CAP_PROP_FPS) or 25
|
| 366 |
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 367 |
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 368 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 369 |
+
duration = total_frames / fps if fps > 0 else 0
|
| 370 |
+
|
| 371 |
+
logger.info(f"Processing full video: {total_frames} frames, {duration:.1f} seconds")
|
| 372 |
|
| 373 |
ok, frame0 = cap.read()
|
| 374 |
if not ok:
|
|
|
|
| 404 |
except Exception:
|
| 405 |
pass
|
| 406 |
|
| 407 |
+
# Process ALL frames, not just a limited number
|
| 408 |
i = 1
|
| 409 |
while True:
|
| 410 |
ok, frame = cap.read()
|
| 411 |
if not ok:
|
| 412 |
break
|
| 413 |
+
|
| 414 |
img = _to_chw01(frame)
|
| 415 |
out = getattr(inference_core, "step")(img)
|
| 416 |
alpha_i = _prob_to_alpha01(inference_core, out)
|
| 417 |
_write_alpha_png(alpha_i, os.path.join(alpha_dir, f"alpha_{i:06d}.png"))
|
| 418 |
+
|
| 419 |
+
# Progress logging
|
| 420 |
+
if i % 30 == 0:
|
| 421 |
+
progress = (i / total_frames) * 100 if total_frames > 0 else 0
|
| 422 |
+
logger.info(f"Processing frame {i}/{total_frames} ({progress:.1f}%)")
|
| 423 |
+
|
| 424 |
i += 1
|
| 425 |
|
| 426 |
cap.release()
|
| 427 |
+
|
| 428 |
+
actual_duration = i / fps if fps > 0 else 0
|
| 429 |
+
logger.info(f"✅ Processed {i} alpha PNG frames ({actual_duration:.1f} seconds) → {alpha_dir}")
|
| 430 |
+
|
| 431 |
+
return alpha_dir
|