Spaces:
Paused
Paused
MacBook pro
commited on
Commit
·
b96043e
1
Parent(s):
8caf3b0
docs: document MIRAGE_PROC_MAX_DIM & MIRAGE_DEBUG_OVERLAY; add swap debug vars; capture active providers
Browse files- README.md +27 -2
- swap_pipeline.py +16 -1
- webrtc_server.py +11 -6
README.md
CHANGED
|
@@ -147,14 +147,39 @@ Pipeline stats (subset) from swap pipeline:
|
|
| 147 |
| `MIRAGE_TURN_TLS_ONLY` | Filter TURN to TLS/TCP | `1` |
|
| 148 |
| `MIRAGE_PREFER_H264` | Prefer H264 codec in SDP munging | `0` |
|
| 149 |
| `MIRAGE_VOICE_ENABLE` | Enable voice processor stub | `0` |
|
| 150 |
-
| `MIRAGE_PERSIST_MODELS` | Persist models
|
| 151 |
-
| `
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
CodeFormer fidelity example:
|
| 154 |
```bash
|
| 155 |
MIRAGE_CODEFORMER_FIDELITY=0.6
|
| 156 |
```
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
## 📋 Requirements
|
| 159 |
|
| 160 |
- **GPU**: NVIDIA (Ampere+ recommended). CPU-only will be extremely slow.
|
|
|
|
| 147 |
| `MIRAGE_TURN_TLS_ONLY` | Filter TURN to TLS/TCP | `1` |
|
| 148 |
| `MIRAGE_PREFER_H264` | Prefer H264 codec in SDP munging | `0` |
|
| 149 |
| `MIRAGE_VOICE_ENABLE` | Enable voice processor stub | `0` |
|
| 150 |
+
| `MIRAGE_PERSIST_MODELS` | Persist models in `/data/mirage_models` via symlink `/app/models` | `1` |
|
| 151 |
+
| `MIRAGE_PROVISION_FRESH` | Force re-download of required models (ignores sentinel) | `0` |
|
| 152 |
+
| `MIRAGE_PROC_MAX_DIM` | Max dimension (longest side) for processing downscale | `512` |
|
| 153 |
+
| `MIRAGE_DEBUG_OVERLAY` | Draw green bbox + SWAP label on swapped faces | `0` |
|
| 154 |
+
| `MIRAGE_SWAP_DEBUG` | Verbose per-frame swap decision logging | `0` |
|
| 155 |
|
| 156 |
CodeFormer fidelity example:
|
| 157 |
```bash
|
| 158 |
MIRAGE_CODEFORMER_FIDELITY=0.6
|
| 159 |
```
|
| 160 |
|
| 161 |
+
### Processing Resolution & Visual Debug Overlay
|
| 162 |
+
|
| 163 |
+
Two new controls help you verify that swapping is occurring and tune visual quality vs latency:
|
| 164 |
+
|
| 165 |
+
| Control | Effect | Guidance |
|
| 166 |
+
|---------|--------|----------|
|
| 167 |
+
| `MIRAGE_PROC_MAX_DIM` | Caps the longest side of a frame before inference. Frames larger than this are downscaled for detection/swap, then returned at original size. | Raise (e.g. 640, 720) for crisper facial detail if GPU headroom allows; lower (384–512) to reduce latency on weaker GPUs. Minimum enforced is 64. |
|
| 168 |
+
| `MIRAGE_DEBUG_OVERLAY` | When enabled (`1`), draws a green rectangle and the text `SWAP` over each face region that was swapped in the most recent frame. | Use temporarily to confirm active swapping; disable for production to avoid visual artifacts. |
|
| 169 |
+
|
| 170 |
+
Example (higher detail + overlay for confirmation):
|
| 171 |
+
```bash
|
| 172 |
+
MIRAGE_PROC_MAX_DIM=640
|
| 173 |
+
MIRAGE_DEBUG_OVERLAY=1
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
If you still perceive “no change” while counters show swaps:
|
| 177 |
+
1. Ensure your reference image is a clear, well-lit, frontal face (avoid extreme angles / occlusions).
|
| 178 |
+
2. Increase `MIRAGE_PROC_MAX_DIM` to 640 or 720 for sharper results.
|
| 179 |
+
3. Temporarily enable `MIRAGE_DEBUG_OVERLAY=1` to visualize the swapped region.
|
| 180 |
+
4. Check `/debug/pipeline` for `total_faces_swapped` and `swap_faces_last` > 0.
|
| 181 |
+
|
| 182 |
+
|
| 183 |
## 📋 Requirements
|
| 184 |
|
| 185 |
- **GPU**: NVIDIA (Ampere+ recommended). CPU-only will be extremely slow.
|
swap_pipeline.py
CHANGED
|
@@ -80,6 +80,11 @@ class FaceSwapPipeline:
|
|
| 80 |
raise ImportError("insightface (and its deps like onnxruntime) not available. Ensure onnxruntime, onnx, torch installed.")
|
| 81 |
self.app = FaceAnalysis(name='buffalo_l', providers=providers)
|
| 82 |
self.app.prepare(ctx_id=0, det_size=(640,640))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
# Load swapper
|
| 84 |
model_path = INSWAPPER_ONNX_PATH
|
| 85 |
if not os.path.isfile(model_path):
|
|
@@ -332,6 +337,15 @@ class FaceSwapPipeline:
|
|
| 332 |
except Exception as e:
|
| 333 |
logger.debug(f"Swap failed for face: {e}")
|
| 334 |
self._stats['total_faces_swapped'] += count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
if self.swap_debug:
|
| 336 |
logger.debug(f'process_frame: detected={len(faces)} swapped={count} stride={self.codeformer_frame_stride} apply_cf={count>0 and (self._frame_index % self.codeformer_frame_stride == 0)}')
|
| 337 |
# CodeFormer stride / face-region logic
|
|
@@ -391,12 +405,13 @@ class FaceSwapPipeline:
|
|
| 391 |
codeformer_face_only=self.codeformer_face_only,
|
| 392 |
codeformer_avg_latency_ms=cf_avg,
|
| 393 |
max_faces=self.max_faces,
|
|
|
|
| 394 |
)
|
| 395 |
# Provider diagnostics (best-effort)
|
| 396 |
try: # pragma: no cover
|
| 397 |
import onnxruntime as ort # type: ignore
|
| 398 |
info['available_providers'] = ort.get_available_providers()
|
| 399 |
-
info['active_providers'] = getattr(self
|
| 400 |
except Exception:
|
| 401 |
pass
|
| 402 |
return info
|
|
|
|
| 80 |
raise ImportError("insightface (and its deps like onnxruntime) not available. Ensure onnxruntime, onnx, torch installed.")
|
| 81 |
self.app = FaceAnalysis(name='buffalo_l', providers=providers)
|
| 82 |
self.app.prepare(ctx_id=0, det_size=(640,640))
|
| 83 |
+
# Capture active providers after prepare (best effort)
|
| 84 |
+
try:
|
| 85 |
+
self._active_providers = getattr(self.app, 'providers', providers)
|
| 86 |
+
except Exception:
|
| 87 |
+
self._active_providers = providers
|
| 88 |
# Load swapper
|
| 89 |
model_path = INSWAPPER_ONNX_PATH
|
| 90 |
if not os.path.isfile(model_path):
|
|
|
|
| 337 |
except Exception as e:
|
| 338 |
logger.debug(f"Swap failed for face: {e}")
|
| 339 |
self._stats['total_faces_swapped'] += count
|
| 340 |
+
# Optional debug overlay for visual confirmation
|
| 341 |
+
if count > 0 and os.getenv('MIRAGE_DEBUG_OVERLAY', '0').lower() in ('1','true','yes','on'):
|
| 342 |
+
try:
|
| 343 |
+
for f in faces[:self.max_faces]:
|
| 344 |
+
x1,y1,x2,y2 = f.bbox.astype(int)
|
| 345 |
+
cv2.rectangle(out, (x1,y1), (x2,y2), (0,255,0), 2)
|
| 346 |
+
cv2.putText(out, 'SWAP', (x1, max(0,y1-5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1, cv2.LINE_AA)
|
| 347 |
+
except Exception:
|
| 348 |
+
pass
|
| 349 |
if self.swap_debug:
|
| 350 |
logger.debug(f'process_frame: detected={len(faces)} swapped={count} stride={self.codeformer_frame_stride} apply_cf={count>0 and (self._frame_index % self.codeformer_frame_stride == 0)}')
|
| 351 |
# CodeFormer stride / face-region logic
|
|
|
|
| 405 |
codeformer_face_only=self.codeformer_face_only,
|
| 406 |
codeformer_avg_latency_ms=cf_avg,
|
| 407 |
max_faces=self.max_faces,
|
| 408 |
+
debug_overlay=os.getenv('MIRAGE_DEBUG_OVERLAY', '0'),
|
| 409 |
)
|
| 410 |
# Provider diagnostics (best-effort)
|
| 411 |
try: # pragma: no cover
|
| 412 |
import onnxruntime as ort # type: ignore
|
| 413 |
info['available_providers'] = ort.get_available_providers()
|
| 414 |
+
info['active_providers'] = getattr(self, '_active_providers', None)
|
| 415 |
except Exception:
|
| 416 |
pass
|
| 417 |
return info
|
webrtc_server.py
CHANGED
|
@@ -383,13 +383,18 @@ class IncomingVideoTrack(MediaStreamTrack):
|
|
| 383 |
img = frame.to_ndarray(format="bgr24")
|
| 384 |
h, w, _ = img.shape
|
| 385 |
proc_input = img
|
| 386 |
-
# Optionally downscale for processing to cap latency
|
| 387 |
try:
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
proc_input = cv2.resize(img, (max(1, scale_w), max(1, scale_h)))
|
| 394 |
except Exception as e:
|
| 395 |
logger.debug(f"Video downscale skip: {e}")
|
|
|
|
| 383 |
img = frame.to_ndarray(format="bgr24")
|
| 384 |
h, w, _ = img.shape
|
| 385 |
proc_input = img
|
| 386 |
+
# Optionally downscale for processing to cap latency (configurable)
|
| 387 |
try:
|
| 388 |
+
max_dim_cfg = int(os.getenv('MIRAGE_PROC_MAX_DIM', '512') or '512')
|
| 389 |
+
if max_dim_cfg < 64:
|
| 390 |
+
max_dim_cfg = 64
|
| 391 |
+
if max(h, w) > max_dim_cfg:
|
| 392 |
+
if w >= h:
|
| 393 |
+
scale_w = max_dim_cfg
|
| 394 |
+
scale_h = int(h * (max_dim_cfg / w))
|
| 395 |
+
else:
|
| 396 |
+
scale_h = max_dim_cfg
|
| 397 |
+
scale_w = int(w * (max_dim_cfg / h))
|
| 398 |
proc_input = cv2.resize(img, (max(1, scale_w), max(1, scale_h)))
|
| 399 |
except Exception as e:
|
| 400 |
logger.debug(f"Video downscale skip: {e}")
|