Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Sep 9

Commit

cce1b33

verified ·

1 Parent(s): 16a0e0c

Update processing/two_stage/matanyone_adapter.py

Browse files

Files changed (1) hide show

processing/two_stage/matanyone_adapter.py +70 -6

processing/two_stage/matanyone_adapter.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # processing/two_stage/matanyone_adapter.py
 """
 MatAnyone Adapter - robust interface handler for InferenceCore.
 Priority:
 1) Use process_video(...) when available.
@@ -232,6 +233,8 @@ def run_matanyone_with_reference_mask(
 ) -> str:
     """
     Run MatAnyone with a reference mask/trimap and return a directory with alpha_*.png frames.
     """
     _ensure_dir(output_dir)
@@ -251,22 +254,55 @@ def run_matanyone_with_reference_mask(
                     "mask_path":   reference_mask_path,  # we pass our trimap here
                     "output_path": out_mp4,
                 }
                 # Optional params if present (provide safe defaults)
-                if "n_warmup" in params:  kwargs["n_warmup"]  = 5
                 if "r_erode"  in params:  kwargs["r_erode"]   = 0
                 if "r_dilate" in params:  kwargs["r_dilate"]  = 0
                 if "suffix"   in params:  kwargs["suffix"]    = "_pha.mp4"
                 if "save_image" in params: kwargs["save_image"] = True  # also dump frames if supported
                 if "max_size" in params: kwargs["max_size"] = 2048
-                logger.info(f"Calling process_video with explicit kwargs for known signature.")
                 proc(**kwargs)
-                # Prefer PNGs if they were dumped; otherwise convert MP4
                 produced = _detect_alpha_artifact(output_dir)
                 if produced and os.path.isdir(produced):
                     return produced
                 if os.path.exists(out_mp4):
                     alpha_dir = os.path.join(output_dir, "alpha_png")
                     return _mp4_to_alpha_png(out_mp4, alpha_dir)
@@ -284,6 +320,9 @@ def run_matanyone_with_reference_mask(
                         argmap[p] = output_dir
                     else:
                         argmap[p] = os.path.join(output_dir, "pha.mp4")
             has_video = any(v == video_path for v in argmap.values())
             has_mask  = any(v == reference_mask_path for v in argmap.values())
@@ -296,6 +335,15 @@ def run_matanyone_with_reference_mask(
                 if produced and os.path.isdir(produced):
                     return produced
                 if produced and produced.lower().endswith(".mp4"):
                     alpha_dir = os.path.join(output_dir, "alpha_png")
                     return _mp4_to_alpha_png(produced, alpha_dir)
                 # If we set a specific mp4 path, try that
@@ -308,14 +356,19 @@ def run_matanyone_with_reference_mask(
             logger.warning(f"process_video failed; falling back to step(): {e}")
     # 2) Fallback: step-based pipeline (initialize with mask, then step(image)…)
-    logger.info("Using step-based pipeline: initialize (via mask) → step(image)…")
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError(f"Cannot open video: {video_path}")
     fps = fps_override or cap.get(cv2.CAP_PROP_FPS) or 25
     w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     ok, frame0 = cap.read()
     if not ok:
@@ -351,17 +404,28 @@ def run_matanyone_with_reference_mask(
     except Exception:
         pass
     i = 1
     while True:
         ok, frame = cap.read()
         if not ok:
             break
         img = _to_chw01(frame)
         out = getattr(inference_core, "step")(img)
         alpha_i = _prob_to_alpha01(inference_core, out)
         _write_alpha_png(alpha_i, os.path.join(alpha_dir, f"alpha_{i:06d}.png"))
         i += 1
     cap.release()
-    logger.info(f"Wrote {i} alpha PNG frames → {alpha_dir}")
-    return alpha_dir

 # processing/two_stage/matanyone_adapter.py
 """
 MatAnyone Adapter - robust interface handler for InferenceCore.
+FIXED: Removed n_warmup limitation that was causing 5-second truncation.
 Priority:
 1) Use process_video(...) when available.
 ) -> str:
     """
     Run MatAnyone with a reference mask/trimap and return a directory with alpha_*.png frames.
+    FIXED: Removed n_warmup=5 limitation that was truncating videos to 5 seconds.
     """
     _ensure_dir(output_dir)
                     "mask_path":   reference_mask_path,  # we pass our trimap here
                     "output_path": out_mp4,
                 }
+                # FIX: Don't limit n_warmup to 5!
+                # n_warmup likely means "number of warmup frames" or "warmup seconds"
+                # Setting it to 5 was causing only 5 seconds to be processed
                 # Optional params if present (provide safe defaults)
+                # EITHER: Don't set n_warmup at all (let MatAnyone use its default)
+                # OR: Set it to 0 to disable warmup
+                # OR: Set it to a small number of frames (10-30) not seconds
+                # Option 1: Don't set n_warmup at all - let MatAnyone decide
+                # if "n_warmup" in params:
+                #     pass  # Don't set it
+                # Option 2: Disable warmup entirely
+                if "n_warmup" in params:
+                    kwargs["n_warmup"] = 0  # Disable warmup
+                # Option 3: If it's frames, use a reasonable number
+                # if "n_warmup" in params:
+                #     kwargs["n_warmup"] = 10  # 10 frames, not seconds
                 if "r_erode"  in params:  kwargs["r_erode"]   = 0
                 if "r_dilate" in params:  kwargs["r_dilate"]  = 0
                 if "suffix"   in params:  kwargs["suffix"]    = "_pha.mp4"
                 if "save_image" in params: kwargs["save_image"] = True  # also dump frames if supported
                 if "max_size" in params: kwargs["max_size"] = 2048
+                logger.info(f"Calling process_video with kwargs: {kwargs}")
+                logger.info(f"NOTE: n_warmup set to {kwargs.get('n_warmup', 'default')} to process full video")
+                # Call MatAnyone
                 proc(**kwargs)
+                # Check what was produced
                 produced = _detect_alpha_artifact(output_dir)
                 if produced and os.path.isdir(produced):
+                    logger.info(f"MatAnyone produced PNG directory: {produced}")
                     return produced
                 if os.path.exists(out_mp4):
+                    # Check duration of the output
+                    cap = cv2.VideoCapture(out_mp4)
+                    if cap.isOpened():
+                        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                        fps = cap.get(cv2.CAP_PROP_FPS)
+                        duration = frame_count / fps if fps > 0 else 0
+                        cap.release()
+                        logger.info(f"MatAnyone output: {frame_count} frames, {duration:.1f} seconds")
                     alpha_dir = os.path.join(output_dir, "alpha_png")
                     return _mp4_to_alpha_png(out_mp4, alpha_dir)
                         argmap[p] = output_dir
                     else:
                         argmap[p] = os.path.join(output_dir, "pha.mp4")
+                # FIX: Handle n_warmup in generic mapping too
+                elif lp == "n_warmup":
+                    argmap[p] = 0  # Disable warmup
             has_video = any(v == video_path for v in argmap.values())
             has_mask  = any(v == reference_mask_path for v in argmap.values())
                 if produced and os.path.isdir(produced):
                     return produced
                 if produced and produced.lower().endswith(".mp4"):
+                    # Log duration for debugging
+                    cap = cv2.VideoCapture(produced)
+                    if cap.isOpened():
+                        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                        fps = cap.get(cv2.CAP_PROP_FPS)
+                        duration = frame_count / fps if fps > 0 else 0
+                        cap.release()
+                        logger.info(f"MatAnyone output duration: {duration:.1f} seconds")
                     alpha_dir = os.path.join(output_dir, "alpha_png")
                     return _mp4_to_alpha_png(produced, alpha_dir)
                 # If we set a specific mp4 path, try that
             logger.warning(f"process_video failed; falling back to step(): {e}")
     # 2) Fallback: step-based pipeline (initialize with mask, then step(image)…)
+    logger.info("Using step-based pipeline: initialize (via mask) → step(image)… for FULL video")
     cap = cv2.VideoCapture(video_path)
     if not cap.isOpened():
         raise ValueError(f"Cannot open video: {video_path}")
     fps = fps_override or cap.get(cv2.CAP_PROP_FPS) or 25
     w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    duration = total_frames / fps if fps > 0 else 0
+    logger.info(f"Processing full video: {total_frames} frames, {duration:.1f} seconds")
     ok, frame0 = cap.read()
     if not ok:
     except Exception:
         pass
+    # Process ALL frames, not just a limited number
     i = 1
     while True:
         ok, frame = cap.read()
         if not ok:
             break
         img = _to_chw01(frame)
         out = getattr(inference_core, "step")(img)
         alpha_i = _prob_to_alpha01(inference_core, out)
         _write_alpha_png(alpha_i, os.path.join(alpha_dir, f"alpha_{i:06d}.png"))
+        # Progress logging
+        if i % 30 == 0:
+            progress = (i / total_frames) * 100 if total_frames > 0 else 0
+            logger.info(f"Processing frame {i}/{total_frames} ({progress:.1f}%)")
         i += 1
     cap.release()
+    actual_duration = i / fps if fps > 0 else 0
+    logger.info(f"✅ Processed {i} alpha PNG frames ({actual_duration:.1f} seconds) → {alpha_dir}")
+    return alpha_dir