Update app.py
Browse files
app.py
CHANGED
|
@@ -2,17 +2,14 @@
|
|
| 2 |
"""
|
| 3 |
BackgroundFX Pro - CSP-Safe Application Entry Point
|
| 4 |
Now with: live background preview + sources: Preset / Upload / Gradient / AI Generate
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
import early_env # <<< must be FIRST
|
| 8 |
|
| 9 |
-
import os, time
|
| 10 |
from typing import Optional, Dict, Any, Callable, Tuple
|
| 11 |
|
| 12 |
-
# Prefer a writable cache on HF/Spaces
|
| 13 |
-
os.environ.setdefault("HF_HOME", "/tmp/hf")
|
| 14 |
-
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
|
| 15 |
-
|
| 16 |
# 1) CSP-safe Gradio env
|
| 17 |
os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
|
| 18 |
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
|
@@ -41,7 +38,6 @@ def _patched_get_type(schema):
|
|
| 41 |
logger.info("Entrypoint starting…")
|
| 42 |
|
| 43 |
# 4) Imports
|
| 44 |
-
from core.exceptions import ModelLoadingError, VideoProcessingError
|
| 45 |
from config.app_config import get_config
|
| 46 |
from utils.hardware.device_manager import DeviceManager
|
| 47 |
from utils.system.memory_manager import MemoryManager
|
|
@@ -49,35 +45,12 @@ def _patched_get_type(schema):
|
|
| 49 |
from processing.video.video_processor import CoreVideoProcessor, ProcessorConfig
|
| 50 |
from processing.audio.audio_processor import AudioProcessor
|
| 51 |
|
| 52 |
-
#
|
| 53 |
-
from utils import
|
| 54 |
-
#
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def create_gradient_background(spec: Dict[str, Any], width: int, height: int):
|
| 59 |
-
# Lightweight fallback preview (linear only)
|
| 60 |
-
import numpy as np
|
| 61 |
-
import cv2
|
| 62 |
-
def _to_rgb(c):
|
| 63 |
-
if isinstance(c, (list, tuple)) and len(c) == 3:
|
| 64 |
-
return tuple(int(x) for x in c)
|
| 65 |
-
if isinstance(c, str) and c.startswith("#") and len(c) == 7:
|
| 66 |
-
return tuple(int(c[i:i+2], 16) for i in (1,3,5))
|
| 67 |
-
return (255, 255, 255)
|
| 68 |
-
start = _to_rgb(spec.get("start", "#222222"))
|
| 69 |
-
end = _to_rgb(spec.get("end", "#888888"))
|
| 70 |
-
angle = float(spec.get("angle_deg", 0))
|
| 71 |
-
bg = np.zeros((height, width, 3), np.uint8)
|
| 72 |
-
for y in range(height):
|
| 73 |
-
t = y / max(1, height - 1)
|
| 74 |
-
r = int(start[0] * (1 - t) + end[0] * t)
|
| 75 |
-
g = int(start[1] * (1 - t) + end[1] * t)
|
| 76 |
-
b = int(start[2] * (1 - t) + end[2] * t)
|
| 77 |
-
bg[y, :] = (r, g, b)
|
| 78 |
-
center = (width / 2, height / 2)
|
| 79 |
-
rot = cv2.getRotationMatrix2D(center, angle, 1.0)
|
| 80 |
-
return cv2.warpAffine(bg, rot, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
|
| 81 |
|
| 82 |
# 5) CSP-safe fallbacks for models
|
| 83 |
class CSPSafeSAM2:
|
|
@@ -111,7 +84,6 @@ def process(self, image, mask, **kwargs):
|
|
| 111 |
import numpy as np
|
| 112 |
import cv2
|
| 113 |
from PIL import Image
|
| 114 |
-
from typing import Tuple
|
| 115 |
|
| 116 |
PREVIEW_W, PREVIEW_H = 640, 360 # 16:9
|
| 117 |
|
|
@@ -126,9 +98,30 @@ def _np_to_pil(arr: np.ndarray) -> Image.Image:
|
|
| 126 |
arr = arr.clip(0, 255).astype(np.uint8)
|
| 127 |
return Image.fromarray(arr)
|
| 128 |
|
| 129 |
-
def
|
| 130 |
-
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
# ---------- main app ----------
|
| 134 |
class VideoBackgroundApp:
|
|
@@ -140,11 +133,66 @@ def __init__(self):
|
|
| 140 |
self.audio_proc = AudioProcessor()
|
| 141 |
self.models_loaded = False
|
| 142 |
self.core_processor: Optional[CoreVideoProcessor] = None
|
| 143 |
-
# Text-to-Image pipeline cache
|
| 144 |
-
self.t2i_pipe = None
|
| 145 |
-
self.t2i_model_id = None
|
| 146 |
logger.info("VideoBackgroundApp initialized (device=%s)", self.device_mgr.get_optimal_device())
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
def load_models(self, progress_callback: Optional[Callable] = None) -> str:
|
| 149 |
logger.info("Loading models (CSP-safe)…")
|
| 150 |
try:
|
|
@@ -156,17 +204,8 @@ def load_models(self, progress_callback: Optional[Callable] = None) -> str:
|
|
| 156 |
sam2_model = getattr(sam2, "model", sam2) if sam2 else CSPSafeSAM2()
|
| 157 |
matanyone_model = getattr(matanyone, "model", matanyone) if matanyone else CSPSafeMatAnyone()
|
| 158 |
|
| 159 |
-
cfg =
|
| 160 |
-
|
| 161 |
-
write_fps=None,
|
| 162 |
-
max_model_size=1280,
|
| 163 |
-
use_nvenc=True,
|
| 164 |
-
nvenc_codec="h264",
|
| 165 |
-
nvenc_preset="p5",
|
| 166 |
-
nvenc_cq=18,
|
| 167 |
-
nvenc_tune_hq=True,
|
| 168 |
-
nvenc_pix_fmt="yuv420p",
|
| 169 |
-
)
|
| 170 |
self.core_processor = CoreVideoProcessor(config=cfg, models=None)
|
| 171 |
self.core_processor.models = type('FakeModelManager', (), {
|
| 172 |
'get_sam2': lambda self_: sam2_model,
|
|
@@ -197,173 +236,37 @@ def preview_upload(self, file) -> Optional[Image.Image]:
|
|
| 197 |
|
| 198 |
def preview_gradient(self, gtype: str, color1: str, color2: str, angle: int) -> Image.Image:
|
| 199 |
spec = {
|
| 200 |
-
"type": (gtype or "linear").lower(), # "linear" or "radial" (linear
|
| 201 |
"start": _hex_to_rgb(color1 or "#222222"),
|
| 202 |
"end": _hex_to_rgb(color2 or "#888888"),
|
| 203 |
"angle_deg": float(angle or 0),
|
| 204 |
}
|
| 205 |
-
bg =
|
| 206 |
return _np_to_pil(bg)
|
| 207 |
|
| 208 |
-
# ---- AI BG: lazy-load + reuse pipe ----
|
| 209 |
-
def _ensure_t2i(self):
|
| 210 |
-
"""
|
| 211 |
-
Choose and load a text-to-image pipeline once, with memory-efficient settings.
|
| 212 |
-
Returns (pipe, model_id, msg)
|
| 213 |
-
"""
|
| 214 |
-
if self.t2i_pipe is not None:
|
| 215 |
-
return self.t2i_pipe, self.t2i_model_id, "AI generator ready"
|
| 216 |
-
|
| 217 |
-
try:
|
| 218 |
-
import torch
|
| 219 |
-
from diffusers import StableDiffusionPipeline, AutoPipelineForText2Image
|
| 220 |
-
except Exception as e:
|
| 221 |
-
return None, None, f"AI generation unavailable (missing deps): {e}"
|
| 222 |
-
|
| 223 |
-
# Heuristic: prefer fast/light models when VRAM is small
|
| 224 |
-
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
| 225 |
-
device = "cuda" if getattr(torch, "cuda", None) and torch.cuda.is_available() else "cpu"
|
| 226 |
-
|
| 227 |
-
vram_gb = None
|
| 228 |
-
try:
|
| 229 |
-
vram_gb = self.device_mgr.get_device_memory_gb()
|
| 230 |
-
except Exception:
|
| 231 |
-
pass
|
| 232 |
-
|
| 233 |
-
# Prefer SD-Turbo if GPU and small VRAM; SDXL-Turbo if large VRAM; fallback to SD 2.1 on CPU
|
| 234 |
-
if device == "cuda":
|
| 235 |
-
if vram_gb and vram_gb >= 12:
|
| 236 |
-
model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/sdxl-turbo")
|
| 237 |
-
else:
|
| 238 |
-
model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/sd-turbo")
|
| 239 |
-
else:
|
| 240 |
-
# CPU-friendly (still heavy): classic SD 2.1
|
| 241 |
-
model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/stable-diffusion-2-1")
|
| 242 |
-
|
| 243 |
-
logger.info(f"Loading text-to-image model: {model_id} (device={device}, vram={vram_gb} GB)")
|
| 244 |
-
|
| 245 |
-
dtype = torch.float16 if device == "cuda" else torch.float32
|
| 246 |
-
|
| 247 |
-
pipe = None
|
| 248 |
-
err = None
|
| 249 |
-
try:
|
| 250 |
-
# Newer unified API handles sd-turbo and sdxl-turbo too
|
| 251 |
-
pipe = AutoPipelineForText2Image.from_pretrained(
|
| 252 |
-
model_id,
|
| 253 |
-
torch_dtype=dtype,
|
| 254 |
-
use_safetensors=True,
|
| 255 |
-
token=token
|
| 256 |
-
)
|
| 257 |
-
except Exception as e1:
|
| 258 |
-
err = e1
|
| 259 |
-
try:
|
| 260 |
-
# Fallback to classic pipeline (works for sd/stable-diffusion-2-1)
|
| 261 |
-
pipe = StableDiffusionPipeline.from_pretrained(
|
| 262 |
-
model_id,
|
| 263 |
-
torch_dtype=dtype,
|
| 264 |
-
use_safetensors=True,
|
| 265 |
-
safety_checker=None, # disable to avoid false positives for office backgrounds
|
| 266 |
-
feature_extractor=None,
|
| 267 |
-
use_auth_token=token # legacy name
|
| 268 |
-
)
|
| 269 |
-
except Exception as e2:
|
| 270 |
-
return None, None, f"AI model load failed: {e1} / {e2}"
|
| 271 |
-
|
| 272 |
-
# Memory/perf knobs
|
| 273 |
-
try:
|
| 274 |
-
pipe.set_progress_bar_config(disable=True)
|
| 275 |
-
except Exception:
|
| 276 |
-
pass
|
| 277 |
-
try:
|
| 278 |
-
pipe.enable_attention_slicing()
|
| 279 |
-
except Exception:
|
| 280 |
-
pass
|
| 281 |
-
try:
|
| 282 |
-
pipe.enable_vae_slicing()
|
| 283 |
-
except Exception:
|
| 284 |
-
pass
|
| 285 |
-
if device == "cuda":
|
| 286 |
-
try:
|
| 287 |
-
pipe.enable_xformers_memory_efficient_attention()
|
| 288 |
-
except Exception:
|
| 289 |
-
pass
|
| 290 |
-
pipe = pipe.to(device)
|
| 291 |
-
else:
|
| 292 |
-
# If accelerate is present, offload module-wise to save RAM
|
| 293 |
-
try:
|
| 294 |
-
pipe.enable_sequential_cpu_offload()
|
| 295 |
-
except Exception:
|
| 296 |
-
pass
|
| 297 |
-
|
| 298 |
-
self.t2i_pipe = pipe
|
| 299 |
-
self.t2i_model_id = model_id
|
| 300 |
-
return pipe, model_id, f"AI model loaded: {model_id}"
|
| 301 |
-
|
| 302 |
def ai_generate_background(self, prompt: str, seed: int, width: int, height: int) -> Tuple[Optional[Image.Image], Optional[str], str]:
|
| 303 |
"""
|
| 304 |
-
|
| 305 |
"""
|
| 306 |
-
pipe, model_id, msg = self._ensure_t2i()
|
| 307 |
-
if pipe is None:
|
| 308 |
-
logger.warning(msg)
|
| 309 |
-
return None, None, msg
|
| 310 |
-
|
| 311 |
-
# Ensure sane, divisible-by-8 sizes
|
| 312 |
-
w = _div8(int(width)) if width else PREVIEW_W
|
| 313 |
-
h = _div8(int(height)) if height else PREVIEW_H
|
| 314 |
-
w = max(256, min(w, 1536))
|
| 315 |
-
h = max(256, min(h, 1536))
|
| 316 |
-
|
| 317 |
-
# Reasonable defaults for office-like backgrounds
|
| 318 |
-
prompt = (prompt or "professional modern office background, neutral colors, soft depth of field, clean, minimal, photorealistic")
|
| 319 |
-
negative = "text, watermark, logo, people, person, artifact, noisy, blurry"
|
| 320 |
-
|
| 321 |
-
# Seed & inference
|
| 322 |
try:
|
|
|
|
| 323 |
import torch
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
with torch.inference_mode():
|
| 336 |
-
if device == "cuda":
|
| 337 |
-
# autocast for fp16
|
| 338 |
-
with torch.autocast("cuda"):
|
| 339 |
-
out = pipe(
|
| 340 |
-
prompt=prompt,
|
| 341 |
-
negative_prompt=negative,
|
| 342 |
-
height=h,
|
| 343 |
-
width=w,
|
| 344 |
-
guidance_scale=guidance,
|
| 345 |
-
num_inference_steps=steps,
|
| 346 |
-
generator=g
|
| 347 |
-
)
|
| 348 |
-
else:
|
| 349 |
-
out = pipe(
|
| 350 |
-
prompt=prompt,
|
| 351 |
-
negative_prompt=negative,
|
| 352 |
-
height=h,
|
| 353 |
-
width=w,
|
| 354 |
-
guidance_scale=guidance,
|
| 355 |
-
num_inference_steps=steps,
|
| 356 |
-
generator=g
|
| 357 |
-
)
|
| 358 |
-
img = out.images[0]
|
| 359 |
-
|
| 360 |
tmp_path = f"/tmp/ai_bg_{int(time.time())}.png"
|
| 361 |
img.save(tmp_path)
|
| 362 |
-
|
| 363 |
-
return img.resize((PREVIEW_W, PREVIEW_H), Image.LANCZOS), tmp_path, f"{msg} • Generated {w}x{h}"
|
| 364 |
except Exception as e:
|
| 365 |
-
logger.
|
| 366 |
-
return None, None, f"AI generation
|
| 367 |
|
| 368 |
# ---- PROCESS VIDEO ----
|
| 369 |
def process_video(
|
|
@@ -391,11 +294,11 @@ def process_video(
|
|
| 391 |
|
| 392 |
output_path = f"/tmp/output_{int(time.time())}.mp4"
|
| 393 |
|
| 394 |
-
# Validate input video
|
| 395 |
-
ok = validate_video_file(video)
|
| 396 |
if not ok:
|
| 397 |
-
logger.warning("Invalid/unreadable video: %s", video)
|
| 398 |
-
return None, "Invalid or unreadable video file"
|
| 399 |
|
| 400 |
# Build bg_config based on source
|
| 401 |
src = (bg_source or "Preset").lower()
|
|
@@ -514,7 +417,7 @@ def on_source_toggle(src):
|
|
| 514 |
)
|
| 515 |
|
| 516 |
# ✅ Clear any previous AI image path when switching source (avoids stale AI background)
|
| 517 |
-
def _clear_ai_state(_):
|
| 518 |
return None
|
| 519 |
bg_source.change(fn=_clear_ai_state, inputs=[bg_source], outputs=[ai_bg_path_state])
|
| 520 |
|
|
@@ -560,7 +463,9 @@ def ai_generate(prompt, seed, size):
|
|
| 560 |
def safe_load():
|
| 561 |
msg = app.load_models()
|
| 562 |
logger.info("UI: models loaded")
|
| 563 |
-
|
|
|
|
|
|
|
| 564 |
btn_load.click(fn=safe_load, outputs=[status, bg_preview])
|
| 565 |
|
| 566 |
def safe_process(vid, src, pkey, file, gtype, c1, c2, ang, ai_path):
|
|
|
|
| 2 |
"""
|
| 3 |
BackgroundFX Pro - CSP-Safe Application Entry Point
|
| 4 |
Now with: live background preview + sources: Preset / Upload / Gradient / AI Generate
|
| 5 |
+
(uses utils.cv_processing to avoid circular imports)
|
| 6 |
"""
|
| 7 |
|
| 8 |
import early_env # <<< must be FIRST
|
| 9 |
|
| 10 |
+
import os, time
|
| 11 |
from typing import Optional, Dict, Any, Callable, Tuple
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# 1) CSP-safe Gradio env
|
| 14 |
os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
|
| 15 |
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
|
|
|
|
| 38 |
logger.info("Entrypoint starting…")
|
| 39 |
|
| 40 |
# 4) Imports
|
|
|
|
| 41 |
from config.app_config import get_config
|
| 42 |
from utils.hardware.device_manager import DeviceManager
|
| 43 |
from utils.system.memory_manager import MemoryManager
|
|
|
|
| 45 |
from processing.video.video_processor import CoreVideoProcessor, ProcessorConfig
|
| 46 |
from processing.audio.audio_processor import AudioProcessor
|
| 47 |
|
| 48 |
+
# ⛑️ Bring helpers from the slim, self-contained cv_processing (no circular imports)
|
| 49 |
+
from utils.cv_processing import (
|
| 50 |
+
PROFESSIONAL_BACKGROUNDS, # dict of presets
|
| 51 |
+
validate_video_file, # returns (ok, reason)
|
| 52 |
+
create_professional_background, # used for preview defaults
|
| 53 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# 5) CSP-safe fallbacks for models
|
| 56 |
class CSPSafeSAM2:
|
|
|
|
| 84 |
import numpy as np
|
| 85 |
import cv2
|
| 86 |
from PIL import Image
|
|
|
|
| 87 |
|
| 88 |
PREVIEW_W, PREVIEW_H = 640, 360 # 16:9
|
| 89 |
|
|
|
|
| 98 |
arr = arr.clip(0, 255).astype(np.uint8)
|
| 99 |
return Image.fromarray(arr)
|
| 100 |
|
| 101 |
+
def _create_gradient_preview(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
|
| 102 |
+
"""Lightweight linear gradient (with rotation) for previews."""
|
| 103 |
+
def _to_rgb(c):
|
| 104 |
+
if isinstance(c, (list, tuple)) and len(c) == 3:
|
| 105 |
+
return tuple(int(x) for x in c)
|
| 106 |
+
if isinstance(c, str) and c.startswith("#") and len(c) == 7:
|
| 107 |
+
return tuple(int(c[i:i+2], 16) for i in (1,3,5))
|
| 108 |
+
return (255, 255, 255)
|
| 109 |
+
start = _to_rgb(spec.get("start", "#222222"))
|
| 110 |
+
end = _to_rgb(spec.get("end", "#888888"))
|
| 111 |
+
angle = float(spec.get("angle_deg", 0))
|
| 112 |
+
|
| 113 |
+
bg = np.zeros((height, width, 3), np.uint8)
|
| 114 |
+
for y in range(height):
|
| 115 |
+
t = y / max(1, height - 1)
|
| 116 |
+
r = int(start[0] * (1 - t) + end[0] * t)
|
| 117 |
+
g = int(start[1] * (1 - t) + end[1] * t)
|
| 118 |
+
b = int(start[2] * (1 - t) + end[2] * t)
|
| 119 |
+
bg[y, :] = (r, g, b)
|
| 120 |
+
if abs(angle) % 360 < 1e-6:
|
| 121 |
+
return bg
|
| 122 |
+
center = (width / 2, height / 2)
|
| 123 |
+
rot = cv2.getRotationMatrix2D(center, angle, 1.0)
|
| 124 |
+
return cv2.warpAffine(bg, rot, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
|
| 125 |
|
| 126 |
# ---------- main app ----------
|
| 127 |
class VideoBackgroundApp:
|
|
|
|
| 133 |
self.audio_proc = AudioProcessor()
|
| 134 |
self.models_loaded = False
|
| 135 |
self.core_processor: Optional[CoreVideoProcessor] = None
|
|
|
|
|
|
|
|
|
|
| 136 |
logger.info("VideoBackgroundApp initialized (device=%s)", self.device_mgr.get_optimal_device())
|
| 137 |
|
| 138 |
+
def _build_processor_config_safely(self) -> ProcessorConfig:
|
| 139 |
+
"""
|
| 140 |
+
Build ProcessorConfig including stability knobs if supported by your installed CoreVideoProcessor.
|
| 141 |
+
If your version doesn't have those fields, we auto-filter them out to avoid TypeError.
|
| 142 |
+
"""
|
| 143 |
+
# Desired config (includes stability + encoding)
|
| 144 |
+
desired: Dict[str, Any] = dict(
|
| 145 |
+
background_preset="office",
|
| 146 |
+
write_fps=None,
|
| 147 |
+
max_model_size=1280,
|
| 148 |
+
# --- stability knobs (only used if supported in your CoreVideoProcessor) ---
|
| 149 |
+
temporal_ema_alpha=0.75, # 0.6–0.85 typical
|
| 150 |
+
min_iou_to_accept=0.05, # reject sudden mask jumps
|
| 151 |
+
dilate_px=6, # pad edges for hair/ears
|
| 152 |
+
edge_blur_px=2, # calm shimmering edges
|
| 153 |
+
# --- encoding (NVENC + fallbacks used inside the processor you installed) ---
|
| 154 |
+
use_nvenc=True,
|
| 155 |
+
nvenc_codec="h264",
|
| 156 |
+
nvenc_preset="p5",
|
| 157 |
+
nvenc_cq=18,
|
| 158 |
+
nvenc_tune_hq=True,
|
| 159 |
+
nvenc_pix_fmt="yuv420p",
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# Filter against dataclass fields if present
|
| 163 |
+
fields = getattr(ProcessorConfig, "__dataclass_fields__", None)
|
| 164 |
+
if isinstance(fields, dict):
|
| 165 |
+
filtered = {k: v for k, v in desired.items() if k in fields}
|
| 166 |
+
else:
|
| 167 |
+
# very old ProcessorConfig: just pass the common ones
|
| 168 |
+
filtered = {
|
| 169 |
+
"background_preset": desired["background_preset"],
|
| 170 |
+
"write_fps": desired["write_fps"],
|
| 171 |
+
"max_model_size": desired["max_model_size"],
|
| 172 |
+
"use_nvenc": desired["use_nvenc"],
|
| 173 |
+
"nvenc_codec": desired["nvenc_codec"],
|
| 174 |
+
"nvenc_preset": desired["nvenc_preset"],
|
| 175 |
+
"nvenc_cq": desired["nvenc_cq"],
|
| 176 |
+
"nvenc_tune_hq": desired["nvenc_tune_hq"],
|
| 177 |
+
"nvenc_pix_fmt": desired["nvenc_pix_fmt"],
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
return ProcessorConfig(**filtered)
|
| 182 |
+
except TypeError:
|
| 183 |
+
# final safety: pass minimal args
|
| 184 |
+
return ProcessorConfig(
|
| 185 |
+
background_preset="office",
|
| 186 |
+
write_fps=None,
|
| 187 |
+
max_model_size=1280,
|
| 188 |
+
use_nvenc=True,
|
| 189 |
+
nvenc_codec="h264",
|
| 190 |
+
nvenc_preset="p5",
|
| 191 |
+
nvenc_cq=18,
|
| 192 |
+
nvenc_tune_hq=True,
|
| 193 |
+
nvenc_pix_fmt="yuv420p",
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
def load_models(self, progress_callback: Optional[Callable] = None) -> str:
|
| 197 |
logger.info("Loading models (CSP-safe)…")
|
| 198 |
try:
|
|
|
|
| 204 |
sam2_model = getattr(sam2, "model", sam2) if sam2 else CSPSafeSAM2()
|
| 205 |
matanyone_model = getattr(matanyone, "model", matanyone) if matanyone else CSPSafeMatAnyone()
|
| 206 |
|
| 207 |
+
cfg = self._build_processor_config_safely()
|
| 208 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
self.core_processor = CoreVideoProcessor(config=cfg, models=None)
|
| 210 |
self.core_processor.models = type('FakeModelManager', (), {
|
| 211 |
'get_sam2': lambda self_: sam2_model,
|
|
|
|
| 236 |
|
| 237 |
def preview_gradient(self, gtype: str, color1: str, color2: str, angle: int) -> Image.Image:
|
| 238 |
spec = {
|
| 239 |
+
"type": (gtype or "linear").lower(), # "linear" or "radial" (preview uses linear with rotation)
|
| 240 |
"start": _hex_to_rgb(color1 or "#222222"),
|
| 241 |
"end": _hex_to_rgb(color2 or "#888888"),
|
| 242 |
"angle_deg": float(angle or 0),
|
| 243 |
}
|
| 244 |
+
bg = _create_gradient_preview(spec, PREVIEW_W, PREVIEW_H)
|
| 245 |
return _np_to_pil(bg)
|
| 246 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
def ai_generate_background(self, prompt: str, seed: int, width: int, height: int) -> Tuple[Optional[Image.Image], Optional[str], str]:
|
| 248 |
"""
|
| 249 |
+
Try generating a background with diffusers; save to /tmp and return (img, path, status).
|
| 250 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
try:
|
| 252 |
+
from diffusers import StableDiffusionPipeline
|
| 253 |
import torch
|
| 254 |
+
model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/stable-diffusion-2-1")
|
| 255 |
+
dtype = torch.float16 if torch.cuda.is_available() else torch.float32
|
| 256 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 257 |
+
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)
|
| 258 |
+
g = torch.Generator(device=device).manual_seed(int(seed)) if seed is not None else None
|
| 259 |
+
if device == "cuda":
|
| 260 |
+
with torch.autocast("cuda"):
|
| 261 |
+
img = pipe(prompt, height=height, width=width, guidance_scale=7.0, num_inference_steps=25, generator=g).images[0]
|
| 262 |
+
else:
|
| 263 |
+
img = pipe(prompt, height=height, width=width, guidance_scale=7.0, num_inference_steps=25, generator=g).images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
tmp_path = f"/tmp/ai_bg_{int(time.time())}.png"
|
| 265 |
img.save(tmp_path)
|
| 266 |
+
return img.resize((PREVIEW_W, PREVIEW_H), Image.LANCZOS), tmp_path, f"AI background generated ✓ ({os.path.basename(tmp_path)})"
|
|
|
|
| 267 |
except Exception as e:
|
| 268 |
+
logger.warning("AI generation unavailable: %s", e)
|
| 269 |
+
return None, None, f"AI generation unavailable: {e}"
|
| 270 |
|
| 271 |
# ---- PROCESS VIDEO ----
|
| 272 |
def process_video(
|
|
|
|
| 294 |
|
| 295 |
output_path = f"/tmp/output_{int(time.time())}.mp4"
|
| 296 |
|
| 297 |
+
# ✅ Validate input video (tuple: ok, reason)
|
| 298 |
+
ok, reason = validate_video_file(video)
|
| 299 |
if not ok:
|
| 300 |
+
logger.warning("Invalid/unreadable video: %s (%s)", video, reason)
|
| 301 |
+
return None, f"Invalid or unreadable video file: {reason}"
|
| 302 |
|
| 303 |
# Build bg_config based on source
|
| 304 |
src = (bg_source or "Preset").lower()
|
|
|
|
| 417 |
)
|
| 418 |
|
| 419 |
# ✅ Clear any previous AI image path when switching source (avoids stale AI background)
|
| 420 |
+
def _clear_ai_state(_):
|
| 421 |
return None
|
| 422 |
bg_source.change(fn=_clear_ai_state, inputs=[bg_source], outputs=[ai_bg_path_state])
|
| 423 |
|
|
|
|
| 463 |
def safe_load():
|
| 464 |
msg = app.load_models()
|
| 465 |
logger.info("UI: models loaded")
|
| 466 |
+
# Set initial preview (preset default)
|
| 467 |
+
default_key = preset_key.value if hasattr(preset_key, "value") else "office"
|
| 468 |
+
return msg, app.preview_preset(default_key)
|
| 469 |
btn_load.click(fn=safe_load, outputs=[status, bg_preview])
|
| 470 |
|
| 471 |
def safe_process(vid, src, pkey, file, gtype, c1, c2, ang, ai_path):
|