Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 21

Commit

2167778

1 Parent(s): ad645ee

Update app.py

Browse files

Files changed (1) hide show

app.py +613 -502

app.py CHANGED Viewed

@@ -1,542 +1,653 @@
 #!/usr/bin/env python3
-# ========================= PRE-IMPORT ENV GUARDS =========================
-import os
-os.environ.pop("OMP_NUM_THREADS", None)
-os.environ.setdefault("MKL_NUM_THREADS", "1")
-os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
-os.environ.setdefault("VECLIB_MAXIMUM_THREADS", "1")
-os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")
-# ========================= IMPORTS =========================
-import gc
 import sys
 import cv2
-import torch
 import numpy as np
-import gradio as gr
-import tempfile
-import time
 from pathlib import Path
-import logging
 import traceback
-from datetime import datetime
-import psutil
-import warnings
-warnings.filterwarnings("ignore")
-# Import the properly implemented functions from utilities
 from utilities import (
     segment_person_hq,
     refine_mask_hq,
     replace_background_hq,
-    load_background_image,
-    resize_background_to_match,
-    apply_temporal_smoothing,
-    smooth_edges,
-    estimate_foreground
 )
-# Import two-stage processor for advanced mode
-from two_stage_processor import TwoStageProcessor
-# Import UI components
-from ui_components import create_ui, get_example_videos, get_example_backgrounds
-# ========================= LOGGING SETUP =========================
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
 logger = logging.getLogger(__name__)
-# ========================= GPU/DEVICE SETUP =========================
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-logger.info(f"Using device: {device}")
-if device.type == "cuda":
-    torch.cuda.empty_cache()
-    # Optimize CUDA settings for memory efficiency
-    torch.backends.cudnn.benchmark = False
-    torch.backends.cudnn.deterministic = True
-    torch.cuda.set_per_process_memory_fraction(0.8)  # Limit to 80% of VRAM
-# ========================= GLOBAL MODELS =========================
-# Models will be loaded on demand to save RAM
-sam2_model = None
-matta_model = None
 two_stage_processor = None
-# ========================= MODEL LOADING =========================
-def load_models_on_demand(use_two_stage=False):
-    """Load models only when needed, with proper memory management"""
-    global sam2_model, matta_model, two_stage_processor
     try:
-        # Clear any existing models first
-        clear_models_from_memory()
-        if use_two_stage and two_stage_processor is None:
-            logger.info("Loading Two-Stage Processor (SAM2 + MattA)...")
-            two_stage_processor = TwoStageProcessor(device=device)
-            logger.info("Two-Stage Processor loaded successfully")
-        elif not use_two_stage:
-            # Load individual models for single-stage processing
-            if sam2_model is None:
-                logger.info("Loading SAM2 model...")
-                # This should be imported from your SAM2 implementation
-                from sam2_integration import load_sam2_model
-                sam2_model = load_sam2_model(device=device)
-                logger.info("SAM2 model loaded")
-            if matta_model is None:
-                logger.info("Loading MattingAnything model...")
-                # This should be imported from your MattA implementation
-                from matta_integration import load_matta_model
-                matta_model = load_matta_model(device=device)
-                logger.info("MattingAnything model loaded")
-        # Force garbage collection after loading
-        gc.collect()
-        if device.type == "cuda":
-            torch.cuda.empty_cache()
     except Exception as e:
-        logger.error(f"Error loading models: {str(e)}")
-        raise
-def clear_models_from_memory():
-    """Clear models from memory to free up RAM"""
-    global sam2_model, matta_model, two_stage_processor
-    if sam2_model is not None:
-        del sam2_model
-        sam2_model = None
-    if matta_model is not None:
-        del matta_model
-        matta_model = None
-    if two_stage_processor is not None:
-        del two_stage_processor
-        two_stage_processor = None
-    gc.collect()
-    if device.type == "cuda":
-        torch.cuda.empty_cache()
-# ========================= MEMORY MONITORING =========================
-def log_memory_usage(stage=""):
-    """Log current memory usage"""
-    process = psutil.Process()
-    mem_info = process.memory_info()
-    ram_usage = mem_info.rss / 1024 / 1024 / 1024  # GB
-    if device.type == "cuda":
-        vram_usage = torch.cuda.memory_allocated() / 1024 / 1024 / 1024  # GB
-        vram_reserved = torch.cuda.memory_reserved() / 1024 / 1024 / 1024  # GB
-        logger.info(f"[{stage}] RAM: {ram_usage:.2f}GB | VRAM: {vram_usage:.2f}GB (reserved: {vram_reserved:.2f}GB)")
-    else:
-        logger.info(f"[{stage}] RAM: {ram_usage:.2f}GB")
-# ========================= PROGRESS TRACKING =========================
-def write_progress_info(info_dict):
-    """Write formatted progress information to temp file for UI display"""
     try:
-        progress_file = "/tmp/processing_info.txt"
-        with open(progress_file, "w") as f:
-            if "error" in info_dict:
-                f.write(f"❌ ERROR\n{info_dict['error']}\n")
-            elif "complete" in info_dict:
-                f.write(f"✅ COMPLETE\n")
-                f.write(f"Total Frames: {info_dict.get('total_frames', 'N/A')}\n")
-                f.write(f"Processing Time: {info_dict.get('time', 'N/A')}\n")
-                f.write(f"Average FPS: {info_dict.get('fps', 'N/A')}\n")
-                f.write(f"Resolution: {info_dict.get('resolution', 'N/A')}\n")
-                f.write(f"Background: {info_dict.get('background', 'N/A')}\n")
             else:
-                f.write(f"📊 PROCESSING STATUS\n")
-                f.write(f"━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
-                f.write(f"🎬 Frame {info_dict.get('current_frame', 0)}/{info_dict.get('total_frames', 0)}\n")
-                f.write(f"⏱️ Elapsed: {info_dict.get('elapsed', '0s')}\n")
-                f.write(f"⚡ Speed: {info_dict.get('speed', '0')} fps\n")
-                f.write(f"🎯 ETA: {info_dict.get('eta', 'calculating...')}\n")
-                f.write(f"━━━━━━━━━━━━━━━━━━━━━━━━━━\n")
-                f.write(f"📈 Progress: {info_dict.get('progress', 0):.1f}%\n")
     except Exception as e:
-        logger.error(f"Error writing progress: {e}")
-# ========================= MAIN PROCESSING FUNCTION =========================
-def process_video(
-    input_video,
-    background_image,
-    use_two_stage=False,
-    use_mask_refinement=True,
-    use_temporal_smoothing=True,
-    mask_blur=5,
-    edge_smoothing=5,
-    background_type="Color",
-    background_color="#00FF00",
-    progress=gr.Progress()
-):
-    """
-    Main video processing function with proper SAM2+MattA integration
-    """
-    temp_dir = None
-    cap = None
-    out = None
-    start_time = time.time()
     try:
-        # Initial setup
-        logger.info("Starting video processing...")
-        log_memory_usage("Start")
-        # Validate inputs
-        if input_video is None:
-            raise ValueError("No input video provided")
-        # Load models based on processing mode
-        load_models_on_demand(use_two_stage=use_two_stage)
-        log_memory_usage("Models Loaded")
-        # Setup video capture
-        cap = cv2.VideoCapture(input_video)
         if not cap.isOpened():
-            raise ValueError(f"Failed to open video: {input_video}")
-        # Get video properties
-        fps = int(cap.get(cv2.CAP_PROP_FPS))
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        logger.info(f"Video info: {width}x{height}, {fps} fps, {total_frames} frames")
         # Prepare background
-        if background_type == "Color":
-            background = np.full((height, width, 3),
-                               tuple(int(background_color[i:i+2], 16) for i in (5, 3, 1)),
-                               dtype=np.uint8)
-        elif background_type == "Image" and background_image is not None:
-            background = load_background_image(background_image)
-            background = resize_background_to_match(background, (width, height))
-        elif background_type == "Blur":
-            # Will be handled per frame
-            background = None
         else:
-            background = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
-        # Setup output video
-        temp_dir = tempfile.mkdtemp()
-        output_path = os.path.join(temp_dir, "output_video.mp4")
-        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        # Process frames
-        frame_idx = 0
-        processed_frames = []
-        masks_history = []  # For temporal smoothing
-        # Batch processing for memory efficiency
-        BATCH_SIZE = 10 if device.type == "cuda" else 5
-        frame_batch = []
         while True:
             ret, frame = cap.read()
             if not ret:
                 break
-            frame_batch.append(frame)
-            # Process batch when full or at end
-            if len(frame_batch) == BATCH_SIZE or frame_idx == total_frames - 1:
-                for batch_frame in frame_batch:
-                    # Update progress
-                    progress(frame_idx / total_frames, f"Processing frame {frame_idx}/{total_frames}")
-                    # Calculate and write detailed progress info
-                    elapsed_time = time.time() - start_time
-                    if frame_idx > 0:
-                        fps_current = frame_idx / elapsed_time
-                        eta = (total_frames - frame_idx) / fps_current
-                        write_progress_info({
-                            'current_frame': frame_idx,
-                            'total_frames': total_frames,
-                            'elapsed': f"{elapsed_time:.1f}s",
-                            'speed': f"{fps_current:.1f}",
-                            'eta': f"{eta:.0f}s",
-                            'progress': (frame_idx / total_frames) * 100
-                        })
-                    # Process frame based on mode
-                    if use_two_stage:
-                        # Use integrated two-stage processor
-                        processed_frame, mask = two_stage_processor.process_frame(
-                            batch_frame,
-                            background if background is not None else batch_frame,
-                            use_refinement=use_mask_refinement,
-                            mask_blur=mask_blur
-                        )
-                    else:
-                        # Use utilities functions (properly implemented with transparency fix)
-                        # Step 1: Segment person using SAM2
-                        mask = segment_person_hq(batch_frame, sam2_model)
-                        # Step 2: Refine mask using MattA if enabled
-                        if use_mask_refinement and matta_model is not None:
-                            mask = refine_mask_hq(batch_frame, mask, matta_model)
-                        # Step 3: Apply temporal smoothing if enabled
-                        if use_temporal_smoothing and len(masks_history) > 0:
-                            mask = apply_temporal_smoothing(mask, masks_history, window_size=5)
-                        # Store mask for temporal smoothing
-                        masks_history.append(mask)
-                        if len(masks_history) > 10:  # Keep only recent masks
-                            masks_history.pop(0)
-                        # Step 4: Apply edge smoothing
-                        if edge_smoothing > 0:
-                            mask = smooth_edges(mask, edge_smoothing)
-                        # Step 5: Handle background
-                        if background_type == "Blur":
-                            background_frame = cv2.GaussianBlur(batch_frame, (21, 21), 0)
-                        else:
-                            background_frame = background
-                        # Step 6: Replace background with proper alpha handling
-                        processed_frame = replace_background_hq(
-                            batch_frame,
-                            mask,
-                            background_frame
-                        )
-                    # Write frame
-                    out.write(processed_frame)
-                    processed_frames.append(processed_frame)
-                    frame_idx += 1
-                    # Memory management - clear every 100 frames
-                    if frame_idx % 100 == 0:
-                        gc.collect()
-                        if device.type == "cuda":
-                            torch.cuda.empty_cache()
-                        log_memory_usage(f"Frame {frame_idx}")
-                # Clear batch
-                frame_batch = []
-        # Finalize
         cap.release()
-        out.release()
-        # Write completion info
         total_time = time.time() - start_time
-        avg_fps = total_frames / total_time if total_time > 0 else 0
-        write_progress_info({
-            'complete': True,
-            'total_frames': total_frames,
-            'time': f"{total_time:.1f}s",
-            'fps': f"{avg_fps:.1f}",
-            'resolution': f"{width}x{height}",
-            'background': background_type
-        })
-        logger.info(f"Processing complete: {total_frames} frames in {total_time:.1f}s ({avg_fps:.1f} fps)")
-        log_memory_usage("Complete")
-        return output_path
-    except Exception as e:
-        logger.error(f"Processing error: {str(e)}\n{traceback.format_exc()}")
-        write_progress_info({'error': str(e)})
-        raise gr.Error(f"Processing failed: {str(e)}")
-    finally:
-        # Cleanup
-        if cap is not None:
-            cap.release()
-        if out is not None:
-            out.release()
-        # Clear models to free memory
-        clear_models_from_memory()
-        # Final garbage collection
-        gc.collect()
-        if device.type == "cuda":
-            torch.cuda.empty_cache()
-# ========================= GRADIO APP =========================
-def create_app():
-    """Create and configure the Gradio application"""
-    with gr.Blocks(title="Video Background Replacement - SAM2+MattA", theme=gr.themes.Soft()) as app:
-        gr.Markdown("""
-        # 🎬 Video Background Replacement
-        ### Powered by SAM2 + MattingAnything
-        Upload a video and replace the background with:
-        - 🎨 Solid colors
-        - 🖼️ Custom images
-        - 🌫️ Blurred background
-        **Two-Stage Mode**: Combines SAM2 segmentation with MattA refinement for best quality
-        """)
-        with gr.Tabs():
-            with gr.TabItem("🎥 Process Video"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        input_video = gr.Video(label="Input Video", height=300)
-                        with gr.Accordion("⚙️ Processing Options", open=True):
-                            use_two_stage = gr.Checkbox(
-                                label="Use Two-Stage Processing (SAM2→MattA)",
-                                value=True,
-                                info="Better quality but slower"
-                            )
-                            use_mask_refinement = gr.Checkbox(
-                                label="Refine Masks",
-                                value=True,
-                                info="Use MattA for better edges"
-                            )
-                            use_temporal_smoothing = gr.Checkbox(
-                                label="Temporal Smoothing",
-                                value=True,
-                                info="Reduce flickering between frames"
-                            )
-                            mask_blur = gr.Slider(
-                                minimum=0,
-                                maximum=21,
-                                value=5,
-                                step=2,
-                                label="Mask Blur"
-                            )
-                            edge_smoothing = gr.Slider(
-                                minimum=0,
-                                maximum=21,
-                                value=5,
-                                step=2,
-                                label="Edge Smoothing"
-                            )
-                        with gr.Accordion("🎨 Background Options", open=True):
-                            background_type = gr.Radio(
-                                choices=["Color", "Image", "Blur"],
-                                value="Color",
-                                label="Background Type"
-                            )
-                            background_color = gr.ColorPicker(
-                                label="Background Color",
-                                value="#00FF00",
-                                visible=True
-                            )
-                            background_image = gr.Image(
-                                label="Background Image",
-                                type="filepath",
-                                visible=False
-                            )
-                            # Show/hide based on background type
-                            def update_background_inputs(bg_type):
-                                return (
-                                    gr.update(visible=bg_type == "Color"),
-                                    gr.update(visible=bg_type == "Image")
-                                )
-                            background_type.change(
-                                update_background_inputs,
-                                inputs=[background_type],
-                                outputs=[background_color, background_image]
-                            )
-                    with gr.Column(scale=1):
-                        output_video = gr.Video(label="Output Video", height=300)
-                        process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
-                        processing_info = gr.Textbox(
-                            label="📊 Processing Info",
-                            lines=10,
-                            max_lines=15,
-                            interactive=False,
-                            placeholder="Processing status will appear here...",
-                            elem_id="processing-info"
-                        )
-                # Connect processing
-                process_btn.click(
-                    fn=process_video,
-                    inputs=[
-                        input_video,
-                        background_image,
-                        use_two_stage,
-                        use_mask_refinement,
-                        use_temporal_smoothing,
-                        mask_blur,
-                        edge_smoothing,
-                        background_type,
-                        background_color
-                    ],
-                    outputs=[output_video]
-                )
-            with gr.TabItem("📚 Examples"):
-                gr.Examples(
-                    examples=get_example_videos(),
-                    inputs=input_video,
-                    label="Sample Videos"
-                )
-                gr.Examples(
-                    examples=get_example_backgrounds(),
-                    inputs=background_image,
-                    label="Sample Backgrounds"
                 )
-            with gr.TabItem("ℹ️ About"):
-                gr.Markdown("""
-                ### Technology Stack
-                - **SAM2**: Segment Anything Model 2 for accurate person segmentation
-                - **MattingAnything**: Advanced alpha matting for refined edges
-                - **Two-Stage Processing**: Combines both models for optimal quality
-                ### Tips for Best Results
-                1. **Use Two-Stage Mode** for highest quality output
-                2. **Enable Temporal Smoothing** to reduce flickering
-                3. **Adjust Edge Smoothing** for softer transitions
-                4. **High contrast backgrounds** work best
-                ### Performance Notes
-                - Processing speed depends on video resolution and length
-                - GPU recommended for faster processing
-                - Two-stage mode is slower but produces better results
-                """)
-    return app
-# ========================= MAIN ENTRY POINT =========================
-if __name__ == "__main__":
     try:
-        # Create and launch app
-        app = create_app()
-        # Configure for HuggingFace Spaces
-        app.queue(max_size=5)
-        app.launch(
             server_name="0.0.0.0",
             server_port=7860,
-            share=False,
-            debug=False,
-            show_error=True
         )
     except Exception as e:
-        logger.error(f"Failed to start application: {str(e)}")
-        traceback.print_exc()
-        sys.exit(1)

 #!/usr/bin/env python3
+"""
+Final Fixed Video Background Replacement
+Uses proper functions from utilities.py to avoid transparency issues
+NEW: Added GPU detection, model caching, batch processing support,
+     and improved error handling
+"""
 import sys
 import cv2
 import numpy as np
 from pathlib import Path
+import torch
 import traceback
+import time
+import shutil
+import gc
+import threading
+from typing import Optional, Tuple, Dict, Any
+import logging
+from huggingface_hub import hf_hub_download
+# Import utilities - CRITICAL: Use these functions, don't duplicate!
 from utilities import (
     segment_person_hq,
     refine_mask_hq,
+    enhance_mask_opencv,
     replace_background_hq,
+    create_professional_background,
+    PROFESSIONAL_BACKGROUNDS,
+    validate_video_file
 )
+# Import two-stage processor if available
+try:
+    from two_stage_processor import TwoStageProcessor, CHROMA_PRESETS
+    TWO_STAGE_AVAILABLE = True
+except ImportError:
+    TWO_STAGE_AVAILABLE = False
+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# ============================================================================ #
+# OPTIMIZATION SETTINGS
+# ============================================================================ #
+KEYFRAME_INTERVAL = 5  # Process MatAnyone every 5th frame
+FRAME_SKIP = 1        # Process every frame (set to 2 for every other frame)
+MEMORY_CLEANUP_INTERVAL = 30  # Clean memory every 30 frames
+# ============================================================================ #
+# MODEL CACHING SYSTEM
+# ============================================================================ #
+CACHE_DIR = Path("/tmp/model_cache")
+CACHE_DIR.mkdir(exist_ok=True, parents=True)
+# ============================================================================ #
+# GLOBAL MODEL STATE
+# ============================================================================ #
+sam2_predictor = None
+matanyone_model = None
+models_loaded = False
+loading_lock = threading.Lock()
 two_stage_processor = None
+PROCESS_CANCELLED = False
+# ============================================================================ #
+# SAM2 LOADER WITH VALIDATION
+# ============================================================================ #
+def load_sam2_predictor_fixed(device: str = "cuda", progress_callback: Optional[callable] = None) -> Any:
+    """Load SAM2 with proper error handling and validation"""
+    def _prog(pct: float, desc: str):
+        if progress_callback:
+            progress_callback(pct, desc)
+        # Format progress info for display in the UI
+        if "Frame" in desc and "|" in desc:
+            parts = desc.split("|")
+            frame_info = parts[0].strip() if len(parts) > 0 else ""
+            time_info = parts[1].strip() if len(parts) > 1 else ""
+            fps_info = parts[2].strip() if len(parts) > 2 else ""
+            eta_info = parts[3].strip() if len(parts) > 3 else ""
+            display_text = f"""📊 PROCESSING STATUS
+━━━━━━━━━━━━━━━━━━━━━━━━━━
+🎬 {frame_info}
+⏱️ Elapsed: {time_info}
+⚡ Speed: {fps_info}
+🎯 {eta_info}
+━━━━━━━━━━━━━━━━━━━━━━━━━━
+�� Progress: {pct*100:.1f}%"""
+            try:
+                with open("/tmp/processing_info.txt", 'w') as f:
+                    f.write(display_text)
+            except Exception as e:
+                logger.warning(f"Error writing processing info: {e}")
     try:
+        _prog(0.1, "Initializing SAM2...")
+        # Download checkpoint with caching
+        checkpoint_path = hf_hub_download(
+            repo_id="facebook/sam2-hiera-large",
+            filename="sam2_hiera_large.pt",
+            cache_dir=str(CACHE_DIR / "sam2_checkpoint"),
+            force_download=False
+        )
+        _prog(0.5, "SAM2 checkpoint downloaded, building model...")
+        # Import and build
+        from sam2.build_sam import build_sam2
+        from sam2.sam2_image_predictor import SAM2ImagePredictor
+        # Build model with explicit config
+        sam2_model = build_sam2("sam2_hiera_l.yaml", checkpoint_path)
+        sam2_model.to(device)
+        predictor = SAM2ImagePredictor(sam2_model)
+        # Test the predictor with dummy data
+        _prog(0.8, "Testing SAM2 functionality...")
+        test_image = np.zeros((256, 256, 3), dtype=np.uint8)
+        predictor.set_image(test_image)
+        test_points = np.array([[128, 128]])
+        test_labels = np.array([1])
+        masks, scores, _ = predictor.predict(
+            point_coords=test_points,
+            point_labels=test_labels,
+            multimask_output=False
+        )
+        if masks is None or len(masks) == 0:
+            raise Exception("SAM2 predictor test failed - no masks generated")
+        _prog(1.0, "SAM2 loaded and validated successfully!")
+        logger.info("SAM2 predictor loaded and tested successfully")
+        return predictor
     except Exception as e:
+        logger.error(f"SAM2 loading failed: {str(e)}")
+        logger.error(f"Full traceback: {traceback.format_exc()}")
+        raise Exception(f"SAM2 loading failed: {str(e)}")
+# ============================================================================ #
+# MATANYONE LOADER WITH VALIDATION
+# ============================================================================ #
+def load_matanyone_fixed(progress_callback: Optional[callable] = None) -> Any:
+    """Load MatAnyone with proper error handling and validation"""
+    def _prog(pct: float, desc: str):
+        if progress_callback:
+            progress_callback(pct, desc)
     try:
+        _prog(0.2, "Loading MatAnyone...")
+        from matanyone import InferenceCore
+        processor = InferenceCore("PeiqingYang/MatAnyone")
+        # Test MatAnyone with dummy data
+        _prog(0.8, "Testing MatAnyone functionality...")
+        test_image = np.zeros((256, 256, 3), dtype=np.uint8)
+        test_mask = np.zeros((256, 256), dtype=np.uint8)
+        test_mask[64:192, 64:192] = 255
+        # Test the processor
+        try:
+            if hasattr(processor, 'process') or hasattr(processor, '__call__'):
+                logger.info("MatAnyone processor interface detected")
             else:
+                logger.warning("MatAnyone interface unclear, will use fallback refinement")
+        except Exception as test_e:
+            logger.warning(f"MatAnyone test failed: {test_e}, will use enhanced OpenCV")
+        _prog(1.0, "MatAnyone loaded successfully!")
+        logger.info("MatAnyone processor loaded successfully")
+        return processor
     except Exception as e:
+        logger.error(f"MatAnyone loading failed: {str(e)}")
+        logger.error(f"Full traceback: {traceback.format_exc()}")
+        raise Exception(f"MatAnyone loading failed: {str(e)}")
+# ============================================================================ #
+# MODEL MANAGEMENT FUNCTIONS
+# ============================================================================ #
+def get_model_status() -> Dict[str, str]:
+    """Return current model status for UI"""
+    global sam2_predictor, matanyone_model, models_loaded
+    return {
+        'sam2': 'Ready' if sam2_predictor is not None else 'Not loaded',
+        'matanyone': 'Ready' if matanyone_model is not None else 'Not loaded',
+        'validated': models_loaded
+    }
+def get_cache_status() -> Dict[str, Any]:
+    """Get current cache status"""
+    return {
+        "sam2_loaded": sam2_predictor is not None,
+        "matanyone_loaded": matanyone_model is not None,
+        "models_validated": models_loaded,
+        "two_stage_available": TWO_STAGE_AVAILABLE
+    }
+def load_models_with_validation(progress_callback: Optional[callable] = None) -> str:
+    """Load models with comprehensive validation"""
+    global sam2_predictor, matanyone_model, models_loaded, two_stage_processor, PROCESS_CANCELLED
+    with loading_lock:
+        if models_loaded and not PROCESS_CANCELLED:
+            return "Models already loaded and validated"
+        try:
+            PROCESS_CANCELLED = False
+            start_time = time.time()
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Starting model loading on {device}")
+            if progress_callback:
+                progress_callback(0.0, "Starting model loading...")
+            # Load SAM2 with validation
+            sam2_predictor = load_sam2_predictor_fixed(device=device, progress_callback=progress_callback)
+            if PROCESS_CANCELLED:
+                return "Model loading cancelled by user"
+            # Load MatAnyone with validation
+            matanyone_model = load_matanyone_fixed(progress_callback=progress_callback)
+            if PROCESS_CANCELLED:
+                return "Model loading cancelled by user"
+            models_loaded = True
+            # Initialize two-stage processor if available
+            if TWO_STAGE_AVAILABLE:
+                two_stage_processor = TwoStageProcessor(sam2_predictor, matanyone_model)
+                logger.info("Two-stage processor initialized")
+            load_time = time.time() - start_time
+            message = f"SUCCESS: SAM2 + MatAnyone loaded and validated in {load_time:.1f}s"
+            if TWO_STAGE_AVAILABLE:
+                message += " (Two-stage mode available)"
+            logger.info(message)
+            return message
+        except Exception as e:
+            models_loaded = False
+            error_msg = f"Model loading failed: {str(e)}"
+            logger.error(error_msg)
+            return error_msg
+# ============================================================================ #
+# MAIN VIDEO PROCESSING - USING UTILITIES FUNCTIONS
+# ============================================================================ #
+def process_video_fixed(
+    video_path: str,
+    background_choice: str,
+    custom_background_path: Optional[str],
+    progress_callback: Optional[callable] = None,
+    use_two_stage: bool = False,
+    chroma_preset: str = "standard",
+    preview_mask: bool = False,
+    preview_greenscreen: bool = False
+) -> Tuple[Optional[str], str]:
+    """Optimized video processing using proper functions from utilities"""
+    global PROCESS_CANCELLED
+    if PROCESS_CANCELLED:
+        return None, "Processing cancelled by user"
+    if not models_loaded:
+        return None, "Models not loaded. Call load_models_with_validation() first."
+    if not video_path or not os.path.exists(video_path):
+        return None, f"Video file not found: {video_path}"
+    # Validate video file
+    is_valid, validation_msg = validate_video_file(video_path)
+    if not is_valid:
+        return None, f"Invalid video: {validation_msg}"
+    def _prog(pct: float, desc: str):
+        if PROCESS_CANCELLED:
+            raise Exception("Processing cancelled by user")
+        if progress_callback:
+            progress_callback(pct, desc)
+        # Update processing info file
+        if "Frame" in desc and "|" in desc:
+            parts = desc.split("|")
+            frame_info = parts[0].strip() if len(parts) > 0 else ""
+            time_info = parts[1].strip() if len(parts) > 1 else ""
+            fps_info = parts[2].strip() if len(parts) > 2 else ""
+            eta_info = parts[3].strip() if len(parts) > 3 else ""
+            display_text = f"""📊 PROCESSING STATUS
+━━━━━━━━━━━━━━━━━━━━━━━━━━
+🎬 {frame_info}
+⏱️ Elapsed: {time_info}
+⚡ Speed: {fps_info}
+🎯 {eta_info}
+━━━━━━━━━━━━━━━━━━━━━━━━━━
+📈 Progress: {pct*100:.1f}%"""
+            try:
+                with open("/tmp/processing_info.txt", 'w') as f:
+                    f.write(display_text)
+            except Exception as e:
+                logger.warning(f"Error writing processing info: {e}")
     try:
+        _prog(0.0, f"Starting {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'} processing...")
+        # Check if two-stage mode is requested
+        if use_two_stage:
+            if not TWO_STAGE_AVAILABLE:
+                return None, "Two-stage mode not available. Please add two_stage_processor.py file."
+            if two_stage_processor is None:
+                return None, "Two-stage processor not initialized. Please reload models."
+            _prog(0.05, "Starting TWO-STAGE green screen processing...")
+            # Get video dimensions
+            cap = cv2.VideoCapture(video_path)
+            frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            cap.release()
+            # Prepare background
+            if background_choice == "custom" and custom_background_path:
+                if not os.path.exists(custom_background_path):
+                    return None, f"Custom background not found: {custom_background_path}"
+                background = cv2.imread(custom_background_path)
+                if background is None:
+                    return None, "Could not read custom background image."
+                background_name = "Custom Image"
+            else:
+                if background_choice in PROFESSIONAL_BACKGROUNDS:
+                    bg_config = PROFESSIONAL_BACKGROUNDS[background_choice]
+                    background = create_professional_background(bg_config, frame_width, frame_height)
+                    background_name = bg_config["name"]
+                else:
+                    return None, f"Invalid background selection: {background_choice}"
+            # Get chroma settings
+            chroma_settings = CHROMA_PRESETS.get(chroma_preset, CHROMA_PRESETS['standard'])
+            # Run two-stage pipeline
+            timestamp = int(time.time())
+            final_output = f"/tmp/twostage_final_{timestamp}.mp4"
+            result, message = two_stage_processor.process_full_pipeline(
+                video_path,
+                background,
+                final_output,
+                chroma_settings=chroma_settings,
+                progress_callback=_prog
+            )
+            if PROCESS_CANCELLED:
+                return None, "Processing cancelled by user"
+            if result is None:
+                return None, message
+            # Add audio back
+            _prog(0.9, "Adding audio...")
+            final_with_audio = f"/tmp/twostage_audio_{timestamp}.mp4"
+            try:
+                audio_cmd = (
+                    f'ffmpeg -y -i "{final_output}" -i "{video_path}" '
+                    f'-c:v libx264 -crf 18 -preset medium '
+                    f'-c:a aac -b:a 192k -ac 2 -ar 48000 '
+                    f'-map 0:v:0 -map 1:a:0? -shortest "{final_with_audio}"'
+                )
+                os.system(audio_cmd)
+                if os.path.exists(final_with_audio):
+                    os.remove(final_output)
+                    final_output = final_with_audio
+            except Exception as e:
+                logger.warning(f"Audio processing error: {e}")
+                final_with_audio = final_output  # Fallback to video without audio
+            _prog(1.0, "TWO-STAGE processing complete!")
+            success_message = (
+                f"TWO-STAGE Success!\n"
+                f"Background: {background_name}\n"
+                f"Method: Green Screen Chroma Key\n"
+                f"Preset: {chroma_preset}\n"
+                f"Quality: Professional cinema-grade"
+            )
+            return final_output, success_message
+        # Single-stage processing
+        _prog(0.05, "Starting SINGLE-STAGE processing...")
+        cap = cv2.VideoCapture(video_path)
         if not cap.isOpened():
+            return None, "Could not open video file."
+        fps = cap.get(cv2.CAP_PROP_FPS)
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        if total_frames == 0:
+            return None, "Video appears to be empty."
+        # Log video info
+        logger.info(f"Video info: {frame_width}x{frame_height}, {fps}fps, {total_frames} frames")
         # Prepare background
+        background = None
+        background_name = ""
+        if background_choice == "custom" and custom_background_path:
+            if not os.path.exists(custom_background_path):
+                return None, f"Custom background not found: {custom_background_path}"
+            background = cv2.imread(custom_background_path)
+            if background is None:
+                return None, "Could not read custom background image."
+            background_name = "Custom Image"
         else:
+            if background_choice in PROFESSIONAL_BACKGROUNDS:
+                bg_config = PROFESSIONAL_BACKGROUNDS[background_choice]
+                background = create_professional_background(bg_config, frame_width, frame_height)
+                background_name = bg_config["name"]
+            else:
+                return None, f"Invalid background selection: {background_choice}"
+        if background is None:
+            return None, "Failed to create background."
+        timestamp = int(time.time())
+        fourcc = cv2.VideoWriter_fourcc(*'avc1')  # H.264 for better compatibility
+        _prog(0.1, f"Processing {total_frames} frames with {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'} processing...")
+        # Create temporary output for preview if needed
+        if preview_mask or preview_greenscreen:
+            temp_output = f"/tmp/preview_{timestamp}.mp4"
+            final_writer = cv2.VideoWriter(temp_output, fourcc, fps, (frame_width, frame_height))
+        else:
+            final_path = f"/tmp/output_{timestamp}.mp4"
+            final_writer = cv2.VideoWriter(final_path, fourcc, fps, (frame_width, frame_height))
+        if not final_writer.isOpened():
+            return None, "Could not create output video file."
+        frame_count = 0
+        successful_frames = 0
+        last_refined_mask = None
+        # Processing stats
+        start_time = time.time()
         while True:
+            if PROCESS_CANCELLED:
+                cap.release()
+                final_writer.release()
+                if os.path.exists(final_path):
+                    os.remove(final_path)
+                return None, "Processing cancelled by user"
             ret, frame = cap.read()
             if not ret:
                 break
+            # Skip frames if FRAME_SKIP > 1
+            if frame_count % FRAME_SKIP != 0:
+                frame_count += 1
+                continue
+            try:
+                # Update progress with detailed timing info and ETA
+                elapsed_time = time.time() - start_time
+                current_fps = frame_count / elapsed_time if elapsed_time > 0 else 0
+                remaining_frames = total_frames - frame_count
+                eta_seconds = remaining_frames / current_fps if current_fps > 0 else 0
+                eta_display = f"{int(eta_seconds//60)}m {int(eta_seconds%60)}s" if eta_seconds > 60 else f"{int(eta_seconds)}s"
+                progress_msg = f"Frame {frame_count + 1}/{total_frames} | {elapsed_time:.1f}s | {current_fps:.1f} fps | ETA: {eta_display}"
+                # Log and display progress
+                logger.info(progress_msg)
+                _prog(0.1 + (frame_count / max(1, total_frames)) * 0.8, progress_msg)
+                # CRITICAL: Use functions from utilities.py, not local implementations!
+                # SAM2 segmentation using utilities function
+                mask = segment_person_hq(frame, sam2_predictor)
+                if preview_mask:
+                    # Save mask visualization
+                    mask_vis = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
+                    mask_vis[..., 1:] = 0  # Green mask
+                    final_writer.write(mask_vis)
+                    frame_count += 1
+                    continue
+                # MatAnyone refinement on keyframes using utilities function
+                if (frame_count % KEYFRAME_INTERVAL == 0) or (last_refined_mask is None):
+                    refined_mask = refine_mask_hq(frame, mask, matanyone_model)
+                    last_refined_mask = refined_mask.copy()
+                    logger.info(f"Keyframe refinement at frame {frame_count}")
+                else:
+                    # Blend SAM2 mask with last refined mask for temporal smoothness
+                    alpha = 0.7
+                    refined_mask = cv2.addWeighted(mask, alpha, last_refined_mask, 1-alpha, 0)
+                if preview_greenscreen:
+                    # Create green screen preview
+                    green_bg = np.zeros_like(frame)
+                    green_bg[:, :] = [0, 255, 0]  # Pure green
+                    preview_frame = frame.copy()
+                    mask_3ch = cv2.cvtColor(refined_mask, cv2.COLOR_GRAY2BGR)
+                    mask_norm = mask_3ch.astype(float) / 255
+                    preview_frame = preview_frame * mask_norm + green_bg * (1 - mask_norm)
+                    final_writer.write(preview_frame.astype(np.uint8))
+                    frame_count += 1
+                    continue
+                # CRITICAL: Use replace_background_hq from utilities which has the transparency fix!
+                result_frame = replace_background_hq(frame, refined_mask, background)
+                final_writer.write(result_frame)
+                successful_frames += 1
+            except Exception as frame_error:
+                logger.warning(f"Error processing frame {frame_count}: {frame_error}")
+                # Write original frame if processing fails
+                final_writer.write(frame)
+            frame_count += 1
+            # Memory management
+            if frame_count % MEMORY_CLEANUP_INTERVAL == 0:
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                elapsed = time.time() - start_time
+                fps_actual = frame_count / elapsed
+                eta = (total_frames - frame_count) / fps_actual if fps_actual > 0 else 0
+                logger.info(f"Progress: {frame_count}/{total_frames}, FPS: {fps_actual:.1f}, ETA: {eta:.0f}s")
         cap.release()
+        final_writer.release()
+        if PROCESS_CANCELLED:
+            if os.path.exists(final_path):
+                os.remove(final_path)
+            return None, "Processing cancelled by user"
+        if successful_frames == 0:
+            return None, "No frames were processed successfully with AI."
+        # Calculate processing stats
         total_time = time.time() - start_time
+        avg_fps = frame_count / total_time if total_time > 0 else 0
+        _prog(0.9, "Finalizing output...")
+        if preview_mask or preview_greenscreen:
+            final_output = temp_output
+        else:
+            # Add audio back for final output
+            _prog(0.9, "Adding audio...")
+            final_output = f"/tmp/final_{timestamp}.mp4"
+            try:
+                audio_cmd = (
+                    f'ffmpeg -y -i "{final_path}" -i "{video_path}" '
+                    f'-c:v libx264 -crf 18 -preset medium '
+                    f'-c:a aac -b:a 192k -ac 2 -ar 48000 '
+                    f'-map 0:v:0 -map 1:a:0? -shortest "{final_output}"'
                 )
+                result = os.system(audio_cmd)
+                if result != 0 or not os.path.exists(final_output):
+                    shutil.copy2(final_path, final_output)
+            except Exception as e:
+                logger.warning(f"Audio processing error: {e}")
+                shutil.copy2(final_path, final_output)
+            # Cleanup
+            try:
+                if os.path.exists(final_path):
+                    os.remove(final_path)
+            except Exception as e:
+                logger.warning(f"Cleanup error: {e}")
+        _prog(1.0, "Processing complete!")
+        success_message = (
+            f"Success!\n"
+            f"Background: {background_name}\n"
+            f"Resolution: {frame_width}x{frame_height}\n"
+            f"Total frames: {frame_count}\n"
+            f"Successfully processed: {successful_frames}\n"
+            f"Processing time: {total_time:.1f}s\n"
+            f"Average FPS: {avg_fps:.1f}\n"
+            f"Keyframe interval: {KEYFRAME_INTERVAL}\n"
+            f"Mode: {'TWO-STAGE' if use_two_stage else 'SINGLE-STAGE'}"
+        )
+        return final_output, success_message
+    except Exception as e:
+        logger.error(f"Processing error: {traceback.format_exc()}")
+        return None, f"Processing Error: {str(e)}"
+# ============================================================================ #
+# MAIN - IMPORT UI COMPONENTS
+# ============================================================================ #
+def main():
     try:
+        print("===== FINAL FIXED VIDEO BACKGROUND REPLACEMENT =====")
+        print(f"Keyframe interval: {KEYFRAME_INTERVAL} frames")
+        print(f"Frame skip: {FRAME_SKIP} (1=all frames, 2=every other)")
+        print(f"Two-stage mode: {'AVAILABLE' if TWO_STAGE_AVAILABLE else 'NOT AVAILABLE'}")
+        print("Loading UI components...")
+        # Import UI components
+        from ui_components import create_interface
+        os.makedirs("/tmp/MyAvatar/My_Videos/", exist_ok=True)
+        CACHE_DIR.mkdir(exist_ok=True, parents=True)
+        print("Creating interface...")
+        demo = create_interface()
+        print("Launching...")
+        demo.launch(
             server_name="0.0.0.0",
             server_port=7860,
+            share=True,
+            show_error=True,
+            debug=True,
+            enable_queue=True
         )
     except Exception as e:
+        logger.error(f"Startup failed: {e}")
+        print(f"Startup failed: {e}")
+if __name__ == "__main__":
+    main()