Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 27

Commit

8ac347b

1 Parent(s): 6983e91

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -212

app.py CHANGED Viewed

@@ -2,17 +2,14 @@
 """
 BackgroundFX Pro - CSP-Safe Application Entry Point
 Now with: live background preview + sources: Preset / Upload / Gradient / AI Generate
 """
 import early_env  # <<< must be FIRST
-import os, time, math
 from typing import Optional, Dict, Any, Callable, Tuple
-# Prefer a writable cache on HF/Spaces
-os.environ.setdefault("HF_HOME", "/tmp/hf")
-os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
 # 1) CSP-safe Gradio env
 os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
@@ -41,7 +38,6 @@ def _patched_get_type(schema):
 logger.info("Entrypoint starting…")
 # 4) Imports
-from core.exceptions import ModelLoadingError, VideoProcessingError
 from config.app_config import get_config
 from utils.hardware.device_manager import DeviceManager
 from utils.system.memory_manager import MemoryManager
@@ -49,35 +45,12 @@ def _patched_get_type(schema):
 from processing.video.video_processor import CoreVideoProcessor, ProcessorConfig
 from processing.audio.audio_processor import AudioProcessor
-# Background helpers
-from utils import PROFESSIONAL_BACKGROUNDS, validate_video_file, create_professional_background
-# Gradient helper (add to utils; fallback here for preview only if missing)
-try:
-    from utils import create_gradient_background
-except Exception:
-    def create_gradient_background(spec: Dict[str, Any], width: int, height: int):
-        # Lightweight fallback preview (linear only)
-        import numpy as np
-        import cv2
-        def _to_rgb(c):
-            if isinstance(c, (list, tuple)) and len(c) == 3:
-                return tuple(int(x) for x in c)
-            if isinstance(c, str) and c.startswith("#") and len(c) == 7:
-                return tuple(int(c[i:i+2], 16) for i in (1,3,5))
-            return (255, 255, 255)
-        start = _to_rgb(spec.get("start", "#222222"))
-        end   = _to_rgb(spec.get("end", "#888888"))
-        angle = float(spec.get("angle_deg", 0))
-        bg = np.zeros((height, width, 3), np.uint8)
-        for y in range(height):
-            t = y / max(1, height - 1)
-            r = int(start[0] * (1 - t) + end[0] * t)
-            g = int(start[1] * (1 - t) + end[1] * t)
-            b = int(start[2] * (1 - t) + end[2] * t)
-            bg[y, :] = (r, g, b)
-        center = (width / 2, height / 2)
-        rot = cv2.getRotationMatrix2D(center, angle, 1.0)
-        return cv2.warpAffine(bg, rot, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
 # 5) CSP-safe fallbacks for models
 class CSPSafeSAM2:
@@ -111,7 +84,6 @@ def process(self, image, mask, **kwargs):
 import numpy as np
 import cv2
 from PIL import Image
-from typing import Tuple
 PREVIEW_W, PREVIEW_H = 640, 360  # 16:9
@@ -126,9 +98,30 @@ def _np_to_pil(arr: np.ndarray) -> Image.Image:
         arr = arr.clip(0, 255).astype(np.uint8)
     return Image.fromarray(arr)
-def _div8(n: int) -> int:
-    # Ensure sizes are multiples of 8 for SD/VAEs
-    return int(math.floor(max(64, n) / 8.0) * 8)
 # ---------- main app ----------
 class VideoBackgroundApp:
@@ -140,11 +133,66 @@ def __init__(self):
         self.audio_proc = AudioProcessor()
         self.models_loaded = False
         self.core_processor: Optional[CoreVideoProcessor] = None
-        # Text-to-Image pipeline cache
-        self.t2i_pipe = None
-        self.t2i_model_id = None
         logger.info("VideoBackgroundApp initialized (device=%s)", self.device_mgr.get_optimal_device())
     def load_models(self, progress_callback: Optional[Callable] = None) -> str:
         logger.info("Loading models (CSP-safe)…")
         try:
@@ -156,17 +204,8 @@ def load_models(self, progress_callback: Optional[Callable] = None) -> str:
         sam2_model = getattr(sam2, "model", sam2) if sam2 else CSPSafeSAM2()
         matanyone_model = getattr(matanyone, "model", matanyone) if matanyone else CSPSafeMatAnyone()
-        cfg = ProcessorConfig(
-            background_preset="office",
-            write_fps=None,
-            max_model_size=1280,
-            use_nvenc=True,
-            nvenc_codec="h264",
-            nvenc_preset="p5",
-            nvenc_cq=18,
-            nvenc_tune_hq=True,
-            nvenc_pix_fmt="yuv420p",
-        )
         self.core_processor = CoreVideoProcessor(config=cfg, models=None)
         self.core_processor.models = type('FakeModelManager', (), {
             'get_sam2': lambda self_: sam2_model,
@@ -197,173 +236,37 @@ def preview_upload(self, file) -> Optional[Image.Image]:
     def preview_gradient(self, gtype: str, color1: str, color2: str, angle: int) -> Image.Image:
         spec = {
-            "type": (gtype or "linear").lower(),  # "linear" or "radial" (linear in fallback)
             "start": _hex_to_rgb(color1 or "#222222"),
             "end": _hex_to_rgb(color2 or "#888888"),
             "angle_deg": float(angle or 0),
         }
-        bg = create_gradient_background(spec, PREVIEW_W, PREVIEW_H)
         return _np_to_pil(bg)
-    # ---- AI BG: lazy-load + reuse pipe ----
-    def _ensure_t2i(self):
-        """
-        Choose and load a text-to-image pipeline once, with memory-efficient settings.
-        Returns (pipe, model_id, msg)
-        """
-        if self.t2i_pipe is not None:
-            return self.t2i_pipe, self.t2i_model_id, "AI generator ready"
-        try:
-            import torch
-            from diffusers import StableDiffusionPipeline, AutoPipelineForText2Image
-        except Exception as e:
-            return None, None, f"AI generation unavailable (missing deps): {e}"
-        # Heuristic: prefer fast/light models when VRAM is small
-        token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
-        device = "cuda" if getattr(torch, "cuda", None) and torch.cuda.is_available() else "cpu"
-        vram_gb = None
-        try:
-            vram_gb = self.device_mgr.get_device_memory_gb()
-        except Exception:
-            pass
-        # Prefer SD-Turbo if GPU and small VRAM; SDXL-Turbo if large VRAM; fallback to SD 2.1 on CPU
-        if device == "cuda":
-            if vram_gb and vram_gb >= 12:
-                model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/sdxl-turbo")
-            else:
-                model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/sd-turbo")
-        else:
-            # CPU-friendly (still heavy): classic SD 2.1
-            model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/stable-diffusion-2-1")
-        logger.info(f"Loading text-to-image model: {model_id} (device={device}, vram={vram_gb} GB)")
-        dtype = torch.float16 if device == "cuda" else torch.float32
-        pipe = None
-        err = None
-        try:
-            # Newer unified API handles sd-turbo and sdxl-turbo too
-            pipe = AutoPipelineForText2Image.from_pretrained(
-                model_id,
-                torch_dtype=dtype,
-                use_safetensors=True,
-                token=token
-            )
-        except Exception as e1:
-            err = e1
-            try:
-                # Fallback to classic pipeline (works for sd/stable-diffusion-2-1)
-                pipe = StableDiffusionPipeline.from_pretrained(
-                    model_id,
-                    torch_dtype=dtype,
-                    use_safetensors=True,
-                    safety_checker=None,   # disable to avoid false positives for office backgrounds
-                    feature_extractor=None,
-                    use_auth_token=token  # legacy name
-                )
-            except Exception as e2:
-                return None, None, f"AI model load failed: {e1} / {e2}"
-        # Memory/perf knobs
-        try:
-            pipe.set_progress_bar_config(disable=True)
-        except Exception:
-            pass
-        try:
-            pipe.enable_attention_slicing()
-        except Exception:
-            pass
-        try:
-            pipe.enable_vae_slicing()
-        except Exception:
-            pass
-        if device == "cuda":
-            try:
-                pipe.enable_xformers_memory_efficient_attention()
-            except Exception:
-                pass
-            pipe = pipe.to(device)
-        else:
-            # If accelerate is present, offload module-wise to save RAM
-            try:
-                pipe.enable_sequential_cpu_offload()
-            except Exception:
-                pass
-        self.t2i_pipe = pipe
-        self.t2i_model_id = model_id
-        return pipe, model_id, f"AI model loaded: {model_id}"
     def ai_generate_background(self, prompt: str, seed: int, width: int, height: int) -> Tuple[Optional[Image.Image], Optional[str], str]:
         """
-        Generate a background and save to /tmp. Returns (preview_img, path, status).
         """
-        pipe, model_id, msg = self._ensure_t2i()
-        if pipe is None:
-            logger.warning(msg)
-            return None, None, msg
-        # Ensure sane, divisible-by-8 sizes
-        w = _div8(int(width)) if width else PREVIEW_W
-        h = _div8(int(height)) if height else PREVIEW_H
-        w = max(256, min(w, 1536))
-        h = max(256, min(h, 1536))
-        # Reasonable defaults for office-like backgrounds
-        prompt = (prompt or "professional modern office background, neutral colors, soft depth of field, clean, minimal, photorealistic")
-        negative = "text, watermark, logo, people, person, artifact, noisy, blurry"
-        # Seed & inference
         try:
             import torch
-            g = None
-            device = "cuda" if getattr(torch, "cuda", None) and torch.cuda.is_available() else "cpu"
-            try:
-                g = torch.Generator(device=device).manual_seed(int(seed)) if seed is not None else None
-            except Exception:
-                g = None
-            # steps: turbo likes very low steps; classic SD needs more
-            steps = 4 if ("turbo" in (model_id or "").lower()) else 25
-            guidance = 1.0 if ("turbo" in (model_id or "").lower()) else 7.0
-            with torch.inference_mode():
-                if device == "cuda":
-                    # autocast for fp16
-                    with torch.autocast("cuda"):
-                        out = pipe(
-                            prompt=prompt,
-                            negative_prompt=negative,
-                            height=h,
-                            width=w,
-                            guidance_scale=guidance,
-                            num_inference_steps=steps,
-                            generator=g
-                        )
-                else:
-                    out = pipe(
-                        prompt=prompt,
-                        negative_prompt=negative,
-                        height=h,
-                        width=w,
-                        guidance_scale=guidance,
-                        num_inference_steps=steps,
-                        generator=g
-                    )
-            img = out.images[0]
             tmp_path = f"/tmp/ai_bg_{int(time.time())}.png"
             img.save(tmp_path)
-            # Return preview-sized display to keep UI snappy
-            return img.resize((PREVIEW_W, PREVIEW_H), Image.LANCZOS), tmp_path, f"{msg} • Generated {w}x{h}"
         except Exception as e:
-            logger.exception("AI generation error")
-            return None, None, f"AI generation failed: {e}"
     # ---- PROCESS VIDEO ----
     def process_video(
@@ -391,11 +294,11 @@ def process_video(
         output_path = f"/tmp/output_{int(time.time())}.mp4"
-        # Validate input video
-        ok = validate_video_file(video)
         if not ok:
-            logger.warning("Invalid/unreadable video: %s", video)
-            return None, "Invalid or unreadable video file"
         # Build bg_config based on source
         src = (bg_source or "Preset").lower()
@@ -514,7 +417,7 @@ def on_source_toggle(src):
         )
         # ✅ Clear any previous AI image path when switching source (avoids stale AI background)
-        def _clear_ai_state(_):
             return None
         bg_source.change(fn=_clear_ai_state, inputs=[bg_source], outputs=[ai_bg_path_state])
@@ -560,7 +463,9 @@ def ai_generate(prompt, seed, size):
         def safe_load():
             msg = app.load_models()
             logger.info("UI: models loaded")
-            return msg, app.preview_preset(preset_key.value if hasattr(preset_key, "value") else "office")
         btn_load.click(fn=safe_load, outputs=[status, bg_preview])
         def safe_process(vid, src, pkey, file, gtype, c1, c2, ang, ai_path):

 """
 BackgroundFX Pro - CSP-Safe Application Entry Point
 Now with: live background preview + sources: Preset / Upload / Gradient / AI Generate
+(uses utils.cv_processing to avoid circular imports)
 """
 import early_env  # <<< must be FIRST
+import os, time
 from typing import Optional, Dict, Any, Callable, Tuple
 # 1) CSP-safe Gradio env
 os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 logger.info("Entrypoint starting…")
 # 4) Imports
 from config.app_config import get_config
 from utils.hardware.device_manager import DeviceManager
 from utils.system.memory_manager import MemoryManager
 from processing.video.video_processor import CoreVideoProcessor, ProcessorConfig
 from processing.audio.audio_processor import AudioProcessor
+# ⛑️ Bring helpers from the slim, self-contained cv_processing (no circular imports)
+from utils.cv_processing import (
+    PROFESSIONAL_BACKGROUNDS,          # dict of presets
+    validate_video_file,               # returns (ok, reason)
+    create_professional_background,    # used for preview defaults
+)
 # 5) CSP-safe fallbacks for models
 class CSPSafeSAM2:
 import numpy as np
 import cv2
 from PIL import Image
 PREVIEW_W, PREVIEW_H = 640, 360  # 16:9
         arr = arr.clip(0, 255).astype(np.uint8)
     return Image.fromarray(arr)
+def _create_gradient_preview(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
+    """Lightweight linear gradient (with rotation) for previews."""
+    def _to_rgb(c):
+        if isinstance(c, (list, tuple)) and len(c) == 3:
+            return tuple(int(x) for x in c)
+        if isinstance(c, str) and c.startswith("#") and len(c) == 7:
+            return tuple(int(c[i:i+2], 16) for i in (1,3,5))
+        return (255, 255, 255)
+    start = _to_rgb(spec.get("start", "#222222"))
+    end   = _to_rgb(spec.get("end", "#888888"))
+    angle = float(spec.get("angle_deg", 0))
+    bg = np.zeros((height, width, 3), np.uint8)
+    for y in range(height):
+        t = y / max(1, height - 1)
+        r = int(start[0] * (1 - t) + end[0] * t)
+        g = int(start[1] * (1 - t) + end[1] * t)
+        b = int(start[2] * (1 - t) + end[2] * t)
+        bg[y, :] = (r, g, b)
+    if abs(angle) % 360 < 1e-6:
+        return bg
+    center = (width / 2, height / 2)
+    rot = cv2.getRotationMatrix2D(center, angle, 1.0)
+    return cv2.warpAffine(bg, rot, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
 # ---------- main app ----------
 class VideoBackgroundApp:
         self.audio_proc = AudioProcessor()
         self.models_loaded = False
         self.core_processor: Optional[CoreVideoProcessor] = None
         logger.info("VideoBackgroundApp initialized (device=%s)", self.device_mgr.get_optimal_device())
+    def _build_processor_config_safely(self) -> ProcessorConfig:
+        """
+        Build ProcessorConfig including stability knobs if supported by your installed CoreVideoProcessor.
+        If your version doesn't have those fields, we auto-filter them out to avoid TypeError.
+        """
+        # Desired config (includes stability + encoding)
+        desired: Dict[str, Any] = dict(
+            background_preset="office",
+            write_fps=None,
+            max_model_size=1280,
+            # --- stability knobs (only used if supported in your CoreVideoProcessor) ---
+            temporal_ema_alpha=0.75,  # 0.6–0.85 typical
+            min_iou_to_accept=0.05,   # reject sudden mask jumps
+            dilate_px=6,              # pad edges for hair/ears
+            edge_blur_px=2,           # calm shimmering edges
+            # --- encoding (NVENC + fallbacks used inside the processor you installed) ---
+            use_nvenc=True,
+            nvenc_codec="h264",
+            nvenc_preset="p5",
+            nvenc_cq=18,
+            nvenc_tune_hq=True,
+            nvenc_pix_fmt="yuv420p",
+        )
+        # Filter against dataclass fields if present
+        fields = getattr(ProcessorConfig, "__dataclass_fields__", None)
+        if isinstance(fields, dict):
+            filtered = {k: v for k, v in desired.items() if k in fields}
+        else:
+            # very old ProcessorConfig: just pass the common ones
+            filtered = {
+                "background_preset": desired["background_preset"],
+                "write_fps": desired["write_fps"],
+                "max_model_size": desired["max_model_size"],
+                "use_nvenc": desired["use_nvenc"],
+                "nvenc_codec": desired["nvenc_codec"],
+                "nvenc_preset": desired["nvenc_preset"],
+                "nvenc_cq": desired["nvenc_cq"],
+                "nvenc_tune_hq": desired["nvenc_tune_hq"],
+                "nvenc_pix_fmt": desired["nvenc_pix_fmt"],
+            }
+        try:
+            return ProcessorConfig(**filtered)
+        except TypeError:
+            # final safety: pass minimal args
+            return ProcessorConfig(
+                background_preset="office",
+                write_fps=None,
+                max_model_size=1280,
+                use_nvenc=True,
+                nvenc_codec="h264",
+                nvenc_preset="p5",
+                nvenc_cq=18,
+                nvenc_tune_hq=True,
+                nvenc_pix_fmt="yuv420p",
+            )
     def load_models(self, progress_callback: Optional[Callable] = None) -> str:
         logger.info("Loading models (CSP-safe)…")
         try:
         sam2_model = getattr(sam2, "model", sam2) if sam2 else CSPSafeSAM2()
         matanyone_model = getattr(matanyone, "model", matanyone) if matanyone else CSPSafeMatAnyone()
+        cfg = self._build_processor_config_safely()
         self.core_processor = CoreVideoProcessor(config=cfg, models=None)
         self.core_processor.models = type('FakeModelManager', (), {
             'get_sam2': lambda self_: sam2_model,
     def preview_gradient(self, gtype: str, color1: str, color2: str, angle: int) -> Image.Image:
         spec = {
+            "type": (gtype or "linear").lower(),  # "linear" or "radial" (preview uses linear with rotation)
             "start": _hex_to_rgb(color1 or "#222222"),
             "end": _hex_to_rgb(color2 or "#888888"),
             "angle_deg": float(angle or 0),
         }
+        bg = _create_gradient_preview(spec, PREVIEW_W, PREVIEW_H)
         return _np_to_pil(bg)
     def ai_generate_background(self, prompt: str, seed: int, width: int, height: int) -> Tuple[Optional[Image.Image], Optional[str], str]:
         """
+        Try generating a background with diffusers; save to /tmp and return (img, path, status).
         """
         try:
+            from diffusers import StableDiffusionPipeline
             import torch
+            model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/stable-diffusion-2-1")
+            dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)
+            g = torch.Generator(device=device).manual_seed(int(seed)) if seed is not None else None
+            if device == "cuda":
+                with torch.autocast("cuda"):
+                    img = pipe(prompt, height=height, width=width, guidance_scale=7.0, num_inference_steps=25, generator=g).images[0]
+            else:
+                img = pipe(prompt, height=height, width=width, guidance_scale=7.0, num_inference_steps=25, generator=g).images[0]
             tmp_path = f"/tmp/ai_bg_{int(time.time())}.png"
             img.save(tmp_path)
+            return img.resize((PREVIEW_W, PREVIEW_H), Image.LANCZOS), tmp_path, f"AI background generated ✓ ({os.path.basename(tmp_path)})"
         except Exception as e:
+            logger.warning("AI generation unavailable: %s", e)
+            return None, None, f"AI generation unavailable: {e}"
     # ---- PROCESS VIDEO ----
     def process_video(
         output_path = f"/tmp/output_{int(time.time())}.mp4"
+        # ✅ Validate input video (tuple: ok, reason)
+        ok, reason = validate_video_file(video)
         if not ok:
+            logger.warning("Invalid/unreadable video: %s (%s)", video, reason)
+            return None, f"Invalid or unreadable video file: {reason}"
         # Build bg_config based on source
         src = (bg_source or "Preset").lower()
         )
         # ✅ Clear any previous AI image path when switching source (avoids stale AI background)
+        def _clear_ai_state(_):
             return None
         bg_source.change(fn=_clear_ai_state, inputs=[bg_source], outputs=[ai_bg_path_state])
         def safe_load():
             msg = app.load_models()
             logger.info("UI: models loaded")
+            # Set initial preview (preset default)
+            default_key = preset_key.value if hasattr(preset_key, "value") else "office"
+            return msg, app.preview_preset(default_key)
         btn_load.click(fn=safe_load, outputs=[status, bg_preview])
         def safe_process(vid, src, pkey, file, gtype, c1, c2, ang, ai_path):