Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

VideoBackgroundReplacer / utils /system /memory_manager.py

MogensR

Update utils/system/memory_manager.py

a70dcf0 3 months ago

raw

history blame contribute delete

17.1 kB

	#!/usr/bin/env python3
	"""
	Memory Manager for BackgroundFX Pro
	- Safe on CPU/CUDA/MPS (mostly CUDA/T4 on Spaces)
	- Accepts `device` as str or torch.device
	- Optional per-process VRAM cap (env or method)
	- Detailed usage reporting (CPU/RAM + VRAM + torch allocator)
	- Light and aggressive cleanup paths
	- Background monitor (optional)

	Env switches:
	BFX_DISABLE_LIMIT=1 -> do not set VRAM fraction automatically
	BFX_CUDA_FRACTION=0.80 -> fraction to cap per-process VRAM (0.10..0.95)
	"""

	from __future__ import annotations
	import gc
	import os
	import time
	import logging
	import threading
	from typing import Dict, Any, Optional, Callable

	# Optional deps
	try:
	import psutil
	except Exception: # pragma: no cover
	psutil = None

	try:
	import torch
	except Exception: # pragma: no cover
	torch = None

	logger = logging.getLogger(__name__)

	# ---- local exception to avoid shadowing built-in MemoryError ----
	class MemoryManagerError(Exception):
	pass


	def _bytes_to_gb(x: int \| float) -> float:
	try:
	return float(x) / (1024**3)
	except Exception:
	return 0.0


	def _normalize_device(dev) -> "torch.device":
	if torch is None:
	# fake CPU device
	class _Fake:
	type = "cpu"
	index = None
	return _Fake() # type: ignore[return-value]

	if isinstance(dev, str):
	return torch.device(dev)
	if hasattr(dev, "type"):
	return dev
	# default CPU
	return torch.device("cpu")


	def _cuda_index(device) -> Optional[int]:
	if getattr(device, "type", "cpu") != "cuda":
	return None
	idx = getattr(device, "index", None)
	if idx is None:
	# normalize bare "cuda" to 0
	return 0
	return int(idx)


	class MemoryManager:
	"""
	Comprehensive memory management with VRAM cap + cleanup utilities.
	"""

	def __init__(self, device, memory_limit_gb: Optional[float] = None):
	self.device = _normalize_device(device)
	self.device_type = getattr(self.device, "type", "cpu")
	self.cuda_idx = _cuda_index(self.device)

	self.gpu_available = bool(
	torch and self.device_type == "cuda" and torch.cuda.is_available()
	)
	self.mps_available = bool(
	torch and self.device_type == "mps" and getattr(torch.backends, "mps", None)
	and torch.backends.mps.is_available()
	)

	self.memory_limit_gb = memory_limit_gb
	self.cleanup_callbacks: list[Callable] = []
	self.monitoring_active = False
	self.monitoring_thread: Optional[threading.Thread] = None
	self.stats = {
	"cleanup_count": 0,
	"peak_memory_usage": 0.0,
	"total_allocated": 0.0,
	"total_freed": 0.0,
	}
	self.applied_fraction: Optional[float] = None

	self._initialize_memory_limits()
	self._maybe_apply_vram_fraction()
	logger.info(f"MemoryManager initialized (device={self.device}, cuda={self.gpu_available})")

	# -------------------------------
	# init helpers
	# -------------------------------
	def _initialize_memory_limits(self):
	try:
	if self.gpu_available:
	props = torch.cuda.get_device_properties(self.cuda_idx or 0)
	total_gb = _bytes_to_gb(props.total_memory)
	if self.memory_limit_gb is None:
	self.memory_limit_gb = max(0.5, total_gb * 0.80) # default 80%
	logger.info(
	f"CUDA memory limit baseline ~{self.memory_limit_gb:.1f}GB "
	f"(device total {total_gb:.1f}GB)"
	)
	elif self.mps_available:
	vm = psutil.virtual_memory() if psutil else None
	total_gb = _bytes_to_gb(vm.total) if vm else 0.0
	if self.memory_limit_gb is None:
	self.memory_limit_gb = max(0.5, total_gb * 0.50)
	logger.info(f"MPS memory baseline ~{self.memory_limit_gb:.1f}GB (system {total_gb:.1f}GB)")
	else:
	vm = psutil.virtual_memory() if psutil else None
	total_gb = _bytes_to_gb(vm.total) if vm else 0.0
	if self.memory_limit_gb is None:
	self.memory_limit_gb = max(0.5, total_gb * 0.60)
	logger.info(f"CPU memory baseline ~{self.memory_limit_gb:.1f}GB (system {total_gb:.1f}GB)")
	except Exception as e:
	logger.warning(f"Memory limit init failed: {e}")
	if self.memory_limit_gb is None:
	self.memory_limit_gb = 4.0 # conservative fallback

	def _maybe_apply_vram_fraction(self):
	if not self.gpu_available or torch is None:
	return
	if os.environ.get("BFX_DISABLE_LIMIT", ""):
	return
	frac_env = os.environ.get("BFX_CUDA_FRACTION", "").strip()
	try:
	fraction = float(frac_env) if frac_env else 0.80
	except Exception:
	fraction = 0.80
	applied = self.limit_cuda_memory(fraction=fraction)
	if applied:
	logger.info(f"Per-process CUDA memory fraction set to {applied:.2f} on device {self.cuda_idx or 0}")

	# -------------------------------
	# public API
	# -------------------------------
	def get_memory_usage(self) -> Dict[str, Any]:
	usage: Dict[str, Any] = {
	"device_type": self.device_type,
	"memory_limit_gb": self.memory_limit_gb,
	"timestamp": time.time(),
	}

	# CPU / system
	if psutil:
	try:
	vm = psutil.virtual_memory()
	usage.update(
	dict(
	system_total_gb=round(_bytes_to_gb(vm.total), 3),
	system_available_gb=round(_bytes_to_gb(vm.available), 3),
	system_used_gb=round(_bytes_to_gb(vm.used), 3),
	system_percent=float(vm.percent),
	)
	)
	swap = psutil.swap_memory()
	usage.update(
	dict(
	swap_total_gb=round(_bytes_to_gb(swap.total), 3),
	swap_used_gb=round(_bytes_to_gb(swap.used), 3),
	swap_percent=float(swap.percent),
	)
	)
	proc = psutil.Process()
	mi = proc.memory_info()
	usage.update(
	dict(
	process_rss_gb=round(_bytes_to_gb(mi.rss), 3),
	process_vms_gb=round(_bytes_to_gb(mi.vms), 3),
	)
	)
	except Exception as e:
	logger.debug(f"psutil stats error: {e}")

	# GPU
	if self.gpu_available and torch is not None:
	try:
	# mem_get_info returns (free, total) in bytes
	free_b, total_b = torch.cuda.mem_get_info(self.cuda_idx or 0)
	used_b = total_b - free_b
	usage.update(
	dict(
	vram_total_gb=round(_bytes_to_gb(total_b), 3),
	vram_used_gb=round(_bytes_to_gb(used_b), 3),
	vram_free_gb=round(_bytes_to_gb(free_b), 3),
	vram_used_percent=float(used_b / total_b * 100.0) if total_b else 0.0,
	)
	)
	except Exception as e:
	logger.debug(f"mem_get_info failed: {e}")

	# torch allocator stats
	try:
	idx = self.cuda_idx or 0
	allocated = torch.cuda.memory_allocated(idx)
	reserved = torch.cuda.memory_reserved(idx)
	usage["torch_allocated_gb"] = round(_bytes_to_gb(allocated), 3)
	usage["torch_reserved_gb"] = round(_bytes_to_gb(reserved), 3)
	# inactive split (2.x)
	try:
	inactive = torch.cuda.memory_stats(idx).get("inactive_split_bytes.all.current", 0)
	usage["torch_inactive_split_gb"] = round(_bytes_to_gb(inactive), 3)
	except Exception:
	pass
	except Exception as e:
	logger.debug(f"allocator stats failed: {e}")

	usage["applied_fraction"] = self.applied_fraction

	# Update peak tracker
	current = usage.get("vram_used_gb", usage.get("system_used_gb", 0.0))
	try:
	if float(current) > float(self.stats["peak_memory_usage"]):
	self.stats["peak_memory_usage"] = float(current)
	except Exception:
	pass

	return usage

	def limit_cuda_memory(self, fraction: Optional[float] = None, max_gb: Optional[float] = None) -> Optional[float]:
	if not self.gpu_available or torch is None:
	return None

	# derive fraction from max_gb if provided
	if max_gb is not None:
	try:
	_, total_b = torch.cuda.mem_get_info(self.cuda_idx or 0)
	total_gb = _bytes_to_gb(total_b)
	if total_gb <= 0:
	return None
	fraction = min(max(0.10, max_gb / total_gb), 0.95)
	except Exception as e:
	logger.debug(f"fraction from max_gb failed: {e}")
	return None

	if fraction is None:
	fraction = 0.80
	fraction = float(max(0.10, min(0.95, fraction)))

	try:
	torch.cuda.set_per_process_memory_fraction(fraction, device=self.cuda_idx or 0)
	self.applied_fraction = fraction
	return fraction
	except Exception as e:
	logger.debug(f"set_per_process_memory_fraction failed: {e}")
	return None

	def cleanup(self) -> None:
	"""Light cleanup used frequently between steps."""
	try:
	gc.collect()
	except Exception:
	pass
	if self.gpu_available and torch is not None:
	try:
	torch.cuda.empty_cache()
	except Exception:
	pass
	self.stats["cleanup_count"] += 1

	def cleanup_basic(self) -> None:
	"""Alias kept for compatibility."""
	self.cleanup()

	def cleanup_aggressive(self) -> None:
	"""Aggressive cleanup for OOM recovery or big scene switches."""
	if self.gpu_available and torch is not None:
	try:
	torch.cuda.synchronize(self.cuda_idx or 0)
	except Exception:
	pass
	try:
	torch.cuda.empty_cache()
	except Exception:
	pass
	try:
	torch.cuda.reset_peak_memory_stats(self.cuda_idx or 0)
	except Exception:
	pass
	try:
	if hasattr(torch.cuda, "ipc_collect"):
	torch.cuda.ipc_collect()
	except Exception:
	pass
	try:
	gc.collect(); gc.collect()
	except Exception:
	pass
	self.stats["cleanup_count"] += 1

	def register_cleanup_callback(self, callback: Callable):
	self.cleanup_callbacks.append(callback)

	def start_monitoring(self, interval_seconds: float = 30.0, pressure_callback: Optional[Callable] = None):
	if self.monitoring_active:
	logger.warning("Memory monitoring already active")
	return
	self.monitoring_active = True

	def loop():
	while self.monitoring_active:
	try:
	pressure = self.check_memory_pressure()
	if pressure["under_pressure"]:
	logger.warning(
	f"Memory pressure: {pressure['pressure_level']} "
	f"({pressure['usage_percent']:.1f}%)"
	)
	if pressure_callback:
	try:
	pressure_callback(pressure)
	except Exception as e:
	logger.error(f"Pressure callback failed: {e}")
	if pressure["pressure_level"] == "critical":
	self.cleanup_aggressive()
	except Exception as e:
	logger.error(f"Memory monitoring error: {e}")
	time.sleep(interval_seconds)

	self.monitoring_thread = threading.Thread(target=loop, daemon=True)
	self.monitoring_thread.start()
	logger.info(f"Memory monitoring started (interval: {interval_seconds}s)")

	def stop_monitoring(self):
	if self.monitoring_active:
	self.monitoring_active = False
	if self.monitoring_thread and self.monitoring_thread.is_alive():
	self.monitoring_thread.join(timeout=5.0)
	logger.info("Memory monitoring stopped")

	def check_memory_pressure(self, threshold_percent: float = 85.0) -> Dict[str, Any]:
	usage = self.get_memory_usage()
	info = {
	"under_pressure": False,
	"pressure_level": "normal",
	"usage_percent": 0.0,
	"recommendations": [],
	}

	if self.gpu_available:
	percent = usage.get("vram_used_percent", 0.0)
	info["usage_percent"] = percent
	if percent >= threshold_percent:
	info["under_pressure"] = True
	if percent >= 95:
	info["pressure_level"] = "critical"
	info["recommendations"] += [
	"Run aggressive memory cleanup",
	"Reduce frame cache / chunk size",
	"Lower resolution or disable previews",
	]
	else:
	info["pressure_level"] = "warning"
	info["recommendations"] += [
	"Run cleanup",
	"Monitor memory usage",
	"Reduce keyframe interval",
	]
	else:
	percent = usage.get("system_percent", 0.0)
	info["usage_percent"] = percent
	if percent >= threshold_percent:
	info["under_pressure"] = True
	if percent >= 95:
	info["pressure_level"] = "critical"
	info["recommendations"] += [
	"Close other processes",
	"Reduce resolution",
	"Split video into chunks",
	]
	else:
	info["pressure_level"] = "warning"
	info["recommendations"] += [
	"Run cleanup",
	"Monitor usage",
	"Reduce processing footprint",
	]
	return info

	def estimate_memory_requirement(self, video_width: int, video_height: int, frames_in_memory: int = 5) -> Dict[str, float]:
	bytes_per_frame = video_width * video_height * 3
	overhead_multiplier = 3.0 # masks/intermediates
	frames_gb = _bytes_to_gb(bytes_per_frame * frames_in_memory * overhead_multiplier)
	estimate = {
	"frames_memory_gb": round(frames_gb, 3),
	"model_memory_gb": 4.0,
	"system_overhead_gb": 2.0,
	}
	estimate["total_estimated_gb"] = round(
	estimate["frames_memory_gb"] + estimate["model_memory_gb"] + estimate["system_overhead_gb"], 3
	)
	return estimate

	def can_process_video(self, video_width: int, video_height: int, frames_in_memory: int = 5) -> Dict[str, Any]:
	estimate = self.estimate_memory_requirement(video_width, video_height, frames_in_memory)
	usage = self.get_memory_usage()
	if self.gpu_available:
	available = usage.get("vram_free_gb", 0.0)
	else:
	available = usage.get("system_available_gb", 0.0)

	can = estimate["total_estimated_gb"] <= available
	return {
	"can_process": can,
	"estimated_memory_gb": estimate["total_estimated_gb"],
	"available_memory_gb": available,
	"memory_margin_gb": round(available - estimate["total_estimated_gb"], 3),
	"recommendations": [] if can else [
	"Reduce resolution or duration",
	"Process in smaller chunks",
	"Run aggressive cleanup before start",
	],
	}

	def get_stats(self) -> Dict[str, Any]:
	return {
	"cleanup_count": self.stats["cleanup_count"],
	"peak_memory_usage_gb": self.stats["peak_memory_usage"],
	"device_type": self.device_type,
	"memory_limit_gb": self.memory_limit_gb,
	"applied_fraction": self.applied_fraction,
	"monitoring_active": self.monitoring_active,
	"callbacks_registered": len(self.cleanup_callbacks),
	}

	def __del__(self):
	try:
	self.stop_monitoring()
	self.cleanup_aggressive()
	except Exception:
	pass