MogensR's picture
Create utils/device.py
d6467ce
"""
Device and Hardware Management Module
====================================
Handles device detection, CUDA compatibility, memory management,
and threading configuration for BackgroundFX Pro.
Fixes:
- CUDA multiprocessor_count compatibility error
- OpenMP threading issues (OMP_NUM_THREADS)
- GPU memory optimization
- Automatic device selection
Author: BackgroundFX Pro Team
License: MIT
"""
import os
import logging
import warnings
from typing import Dict, Optional, List, Tuple
import platform
import psutil
# Fix threading issues immediately at module import
os.environ.setdefault('OMP_NUM_THREADS', '4')
os.environ.setdefault('MKL_NUM_THREADS', '4')
os.environ.setdefault('NUMEXPR_NUM_THREADS', '4')
try:
import torch
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
warnings.warn("PyTorch not available - using CPU-only processing")
try:
import cv2
OPENCV_AVAILABLE = True
except ImportError:
OPENCV_AVAILABLE = False
warnings.warn("OpenCV not available")
logger = logging.getLogger(__name__)
class DeviceManager:
"""Manages device detection, selection and optimization"""
def __init__(self):
self.device = None
self.device_info = {}
self.cuda_available = False
self.gpu_count = 0
self.memory_info = {}
self.threading_configured = False
def initialize(self) -> bool:
"""Initialize device manager and configure optimal settings"""
try:
logger.info("πŸ”§ Initializing Device Manager...")
# Fix threading first
self._configure_threading()
# Detect available devices
self._detect_devices()
# Configure CUDA if available
if self.cuda_available:
self._configure_cuda()
# Select optimal device
self.device = self._select_optimal_device()
# Log system information
self._log_system_info()
logger.info(f"βœ… Device Manager initialized - Using: {self.device}")
return True
except Exception as e:
logger.error(f"❌ Device Manager initialization failed: {e}")
self.device = 'cpu'
return False
def _configure_threading(self):
"""Configure threading for optimal performance"""
try:
# Set OpenMP threads
if 'OMP_NUM_THREADS' not in os.environ:
os.environ['OMP_NUM_THREADS'] = '4'
# Set MKL threads
if 'MKL_NUM_THREADS' not in os.environ:
os.environ['MKL_NUM_THREADS'] = '4'
# Set NumExpr threads
if 'NUMEXPR_NUM_THREADS' not in os.environ:
os.environ['NUMEXPR_NUM_THREADS'] = '4'
# Configure PyTorch threads
if TORCH_AVAILABLE:
torch.set_num_threads(4)
torch.set_num_interop_threads(4)
# Configure OpenCV threads
if OPENCV_AVAILABLE:
cv2.setNumThreads(4)
self.threading_configured = True
logger.info(f"βœ… Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}")
except Exception as e:
logger.warning(f"⚠️ Threading configuration warning: {e}")
def _detect_devices(self):
"""Detect available computing devices"""
try:
if not TORCH_AVAILABLE:
self.cuda_available = False
self.gpu_count = 0
return
# Check CUDA availability
self.cuda_available = torch.cuda.is_available()
self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0
if self.cuda_available:
logger.info(f"βœ… CUDA available: {self.gpu_count} GPU(s)")
# Get device properties for each GPU
for i in range(self.gpu_count):
try:
props = self._get_cuda_properties_safe(i)
self.device_info[f'cuda:{i}'] = props
logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)")
except Exception as e:
logger.warning(f" GPU {i}: Properties unavailable ({e})")
else:
logger.info("ℹ️ CUDA not available - using CPU")
except Exception as e:
logger.error(f"❌ Device detection failed: {e}")
self.cuda_available = False
self.gpu_count = 0
def _get_cuda_properties_safe(self, device_id: int) -> Dict:
"""Safely get CUDA device properties with compatibility handling"""
try:
if not TORCH_AVAILABLE or not torch.cuda.is_available():
return {}
props = torch.cuda.get_device_properties(device_id)
# Handle different PyTorch versions for multiprocessor count
if hasattr(props, 'multi_processor_count'):
sm_count = props.multi_processor_count
elif hasattr(props, 'multiprocessor_count'):
sm_count = props.multiprocessor_count
else:
# Fallback calculation for older PyTorch versions
try:
major, minor = torch.cuda.get_device_capability(device_id)
# Rough estimation based on compute capability
sm_count = major * 8 if major >= 6 else major * 4
except:
sm_count = 'Unknown'
device_props = {
'name': props.name,
'memory_gb': props.total_memory / (1024**3),
'memory_bytes': props.total_memory,
'multiprocessor_count': sm_count,
'major': props.major,
'minor': props.minor,
'compute_capability': f"{props.major}.{props.minor}"
}
return device_props
except Exception as e:
logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}")
return {
'name': 'Unknown GPU',
'memory_gb': 0.0,
'memory_bytes': 0,
'multiprocessor_count': 'Unknown',
'error': str(e)
}
def _configure_cuda(self):
"""Configure CUDA for optimal performance"""
try:
if not self.cuda_available or not TORCH_AVAILABLE:
return
# Enable TensorRT optimization if available
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
# Set memory management
torch.cuda.empty_cache()
# Enable mixed precision if supported
try:
# Check if Automatic Mixed Precision is available
from torch.cuda.amp import autocast
logger.info("βœ… Mixed precision available")
except ImportError:
logger.info("ℹ️ Mixed precision not available")
logger.info("βœ… CUDA optimization configured")
except Exception as e:
logger.warning(f"⚠️ CUDA configuration warning: {e}")
def _select_optimal_device(self) -> str:
"""Select the optimal device for processing"""
try:
if not TORCH_AVAILABLE:
return 'cpu'
if not self.cuda_available or self.gpu_count == 0:
return 'cpu'
# Select GPU with most memory
best_device = 'cuda:0'
best_memory = 0
for device_name, props in self.device_info.items():
if device_name.startswith('cuda:'):
memory = props.get('memory_gb', 0)
if memory > best_memory:
best_memory = memory
best_device = device_name
# Minimum memory check
if best_memory < 2.0: # Require at least 2GB
logger.warning(f"⚠️ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU")
return 'cpu'
return best_device
except Exception as e:
logger.error(f"❌ Device selection failed: {e}")
return 'cpu'
def _log_system_info(self):
"""Log comprehensive system information"""
try:
# System information
logger.info(f"πŸ“Š System: {platform.system()} {platform.release()}")
logger.info(f"πŸ’Ύ CPU: {platform.processor()}")
logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB")
# Python and package versions
logger.info(f"🐍 Python: {platform.python_version()}")
if TORCH_AVAILABLE:
logger.info(f"πŸ”₯ PyTorch: {torch.__version__}")
if torch.cuda.is_available():
logger.info(f"⚑ CUDA: {torch.version.cuda}")
if OPENCV_AVAILABLE:
logger.info(f"πŸ“· OpenCV: {cv2.__version__}")
except Exception as e:
logger.warning(f"⚠️ System info logging failed: {e}")
def get_device(self) -> str:
"""Get the selected device"""
return self.device or 'cpu'
def get_device_info(self) -> Dict:
"""Get device information"""
return {
'device': self.device,
'cuda_available': self.cuda_available,
'gpu_count': self.gpu_count,
'device_info': self.device_info,
'threading_configured': self.threading_configured
}
def get_memory_usage(self) -> Dict:
"""Get current memory usage"""
memory_info = {
'system_memory_gb': psutil.virtual_memory().total / (1024**3),
'system_memory_used_gb': psutil.virtual_memory().used / (1024**3),
'system_memory_percent': psutil.virtual_memory().percent
}
if self.cuda_available and TORCH_AVAILABLE:
try:
for i in range(self.gpu_count):
allocated = torch.cuda.memory_allocated(i) / (1024**3)
reserved = torch.cuda.memory_reserved(i) / (1024**3)
total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0)
memory_info[f'gpu_{i}_allocated_gb'] = allocated
memory_info[f'gpu_{i}_reserved_gb'] = reserved
memory_info[f'gpu_{i}_total_gb'] = total
memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100
except Exception as e:
logger.warning(f"⚠️ GPU memory info failed: {e}")
return memory_info
def optimize_for_model(self, model_name: str) -> Dict:
"""Optimize device settings for specific model"""
optimizations = {
'device': self.device,
'mixed_precision': False,
'gradient_checkpointing': False,
'batch_size': 1
}
try:
# Model-specific optimizations
if model_name.lower() == 'sam2':
if self.cuda_available and self._get_gpu_memory_gb() >= 8:
optimizations.update({
'mixed_precision': True,
'batch_size': 2
})
elif model_name.lower() == 'matanyone':
if self.cuda_available and self._get_gpu_memory_gb() >= 6:
optimizations.update({
'mixed_precision': True
})
logger.info(f"βš™οΈ Optimizations for {model_name}: {optimizations}")
except Exception as e:
logger.warning(f"⚠️ Model optimization failed: {e}")
return optimizations
def _get_gpu_memory_gb(self) -> float:
"""Get GPU memory in GB"""
if not self.cuda_available or not self.device_info:
return 0.0
device_key = self.device if self.device in self.device_info else 'cuda:0'
return self.device_info.get(device_key, {}).get('memory_gb', 0.0)
def cleanup(self):
"""Cleanup device resources"""
try:
if self.cuda_available and TORCH_AVAILABLE:
torch.cuda.empty_cache()
logger.info("βœ… GPU cache cleared")
except Exception as e:
logger.warning(f"⚠️ Cleanup warning: {e}")
# Global device manager instance
_device_manager = None
def get_device_manager() -> DeviceManager:
"""Get the global device manager instance"""
global _device_manager
if _device_manager is None:
_device_manager = DeviceManager()
_device_manager.initialize()
return _device_manager
def get_optimal_device() -> str:
"""Get the optimal device for processing"""
return get_device_manager().get_device()
def fix_cuda_compatibility():
"""Fix CUDA compatibility issues"""
try:
dm = get_device_manager()
logger.info("βœ… CUDA compatibility checked and fixed")
return dm.get_device_info()
except Exception as e:
logger.error(f"❌ CUDA compatibility fix failed: {e}")
return {'device': 'cpu', 'error': str(e)}
def setup_optimal_threading():
"""Setup optimal threading configuration"""
try:
dm = get_device_manager()
if dm.threading_configured:
logger.info("βœ… Threading already configured optimally")
else:
dm._configure_threading()
return True
except Exception as e:
logger.error(f"❌ Threading setup failed: {e}")
return False
def get_system_diagnostics() -> Dict:
"""Get comprehensive system diagnostics"""
dm = get_device_manager()
return {
'device_info': dm.get_device_info(),
'memory_usage': dm.get_memory_usage(),
'system_ready': dm.device is not None
}
# Initialize on module import
try:
_device_manager = DeviceManager()
_device_manager.initialize()
logger.info("βœ… Device manager initialized on import")
except Exception as e:
logger.warning(f"⚠️ Device manager initialization warning: {e}")