File size: 14,856 Bytes
d6467ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 |
"""
Device and Hardware Management Module
====================================
Handles device detection, CUDA compatibility, memory management,
and threading configuration for BackgroundFX Pro.
Fixes:
- CUDA multiprocessor_count compatibility error
- OpenMP threading issues (OMP_NUM_THREADS)
- GPU memory optimization
- Automatic device selection
Author: BackgroundFX Pro Team
License: MIT
"""
import os
import logging
import warnings
from typing import Dict, Optional, List, Tuple
import platform
import psutil
# Fix threading issues immediately at module import
os.environ.setdefault('OMP_NUM_THREADS', '4')
os.environ.setdefault('MKL_NUM_THREADS', '4')
os.environ.setdefault('NUMEXPR_NUM_THREADS', '4')
try:
import torch
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
warnings.warn("PyTorch not available - using CPU-only processing")
try:
import cv2
OPENCV_AVAILABLE = True
except ImportError:
OPENCV_AVAILABLE = False
warnings.warn("OpenCV not available")
logger = logging.getLogger(__name__)
class DeviceManager:
"""Manages device detection, selection and optimization"""
def __init__(self):
self.device = None
self.device_info = {}
self.cuda_available = False
self.gpu_count = 0
self.memory_info = {}
self.threading_configured = False
def initialize(self) -> bool:
"""Initialize device manager and configure optimal settings"""
try:
logger.info("π§ Initializing Device Manager...")
# Fix threading first
self._configure_threading()
# Detect available devices
self._detect_devices()
# Configure CUDA if available
if self.cuda_available:
self._configure_cuda()
# Select optimal device
self.device = self._select_optimal_device()
# Log system information
self._log_system_info()
logger.info(f"β
Device Manager initialized - Using: {self.device}")
return True
except Exception as e:
logger.error(f"β Device Manager initialization failed: {e}")
self.device = 'cpu'
return False
def _configure_threading(self):
"""Configure threading for optimal performance"""
try:
# Set OpenMP threads
if 'OMP_NUM_THREADS' not in os.environ:
os.environ['OMP_NUM_THREADS'] = '4'
# Set MKL threads
if 'MKL_NUM_THREADS' not in os.environ:
os.environ['MKL_NUM_THREADS'] = '4'
# Set NumExpr threads
if 'NUMEXPR_NUM_THREADS' not in os.environ:
os.environ['NUMEXPR_NUM_THREADS'] = '4'
# Configure PyTorch threads
if TORCH_AVAILABLE:
torch.set_num_threads(4)
torch.set_num_interop_threads(4)
# Configure OpenCV threads
if OPENCV_AVAILABLE:
cv2.setNumThreads(4)
self.threading_configured = True
logger.info(f"β
Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}")
except Exception as e:
logger.warning(f"β οΈ Threading configuration warning: {e}")
def _detect_devices(self):
"""Detect available computing devices"""
try:
if not TORCH_AVAILABLE:
self.cuda_available = False
self.gpu_count = 0
return
# Check CUDA availability
self.cuda_available = torch.cuda.is_available()
self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0
if self.cuda_available:
logger.info(f"β
CUDA available: {self.gpu_count} GPU(s)")
# Get device properties for each GPU
for i in range(self.gpu_count):
try:
props = self._get_cuda_properties_safe(i)
self.device_info[f'cuda:{i}'] = props
logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)")
except Exception as e:
logger.warning(f" GPU {i}: Properties unavailable ({e})")
else:
logger.info("βΉοΈ CUDA not available - using CPU")
except Exception as e:
logger.error(f"β Device detection failed: {e}")
self.cuda_available = False
self.gpu_count = 0
def _get_cuda_properties_safe(self, device_id: int) -> Dict:
"""Safely get CUDA device properties with compatibility handling"""
try:
if not TORCH_AVAILABLE or not torch.cuda.is_available():
return {}
props = torch.cuda.get_device_properties(device_id)
# Handle different PyTorch versions for multiprocessor count
if hasattr(props, 'multi_processor_count'):
sm_count = props.multi_processor_count
elif hasattr(props, 'multiprocessor_count'):
sm_count = props.multiprocessor_count
else:
# Fallback calculation for older PyTorch versions
try:
major, minor = torch.cuda.get_device_capability(device_id)
# Rough estimation based on compute capability
sm_count = major * 8 if major >= 6 else major * 4
except:
sm_count = 'Unknown'
device_props = {
'name': props.name,
'memory_gb': props.total_memory / (1024**3),
'memory_bytes': props.total_memory,
'multiprocessor_count': sm_count,
'major': props.major,
'minor': props.minor,
'compute_capability': f"{props.major}.{props.minor}"
}
return device_props
except Exception as e:
logger.error(f"β Error getting CUDA properties for device {device_id}: {e}")
return {
'name': 'Unknown GPU',
'memory_gb': 0.0,
'memory_bytes': 0,
'multiprocessor_count': 'Unknown',
'error': str(e)
}
def _configure_cuda(self):
"""Configure CUDA for optimal performance"""
try:
if not self.cuda_available or not TORCH_AVAILABLE:
return
# Enable TensorRT optimization if available
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
# Set memory management
torch.cuda.empty_cache()
# Enable mixed precision if supported
try:
# Check if Automatic Mixed Precision is available
from torch.cuda.amp import autocast
logger.info("β
Mixed precision available")
except ImportError:
logger.info("βΉοΈ Mixed precision not available")
logger.info("β
CUDA optimization configured")
except Exception as e:
logger.warning(f"β οΈ CUDA configuration warning: {e}")
def _select_optimal_device(self) -> str:
"""Select the optimal device for processing"""
try:
if not TORCH_AVAILABLE:
return 'cpu'
if not self.cuda_available or self.gpu_count == 0:
return 'cpu'
# Select GPU with most memory
best_device = 'cuda:0'
best_memory = 0
for device_name, props in self.device_info.items():
if device_name.startswith('cuda:'):
memory = props.get('memory_gb', 0)
if memory > best_memory:
best_memory = memory
best_device = device_name
# Minimum memory check
if best_memory < 2.0: # Require at least 2GB
logger.warning(f"β οΈ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU")
return 'cpu'
return best_device
except Exception as e:
logger.error(f"β Device selection failed: {e}")
return 'cpu'
def _log_system_info(self):
"""Log comprehensive system information"""
try:
# System information
logger.info(f"π System: {platform.system()} {platform.release()}")
logger.info(f"πΎ CPU: {platform.processor()}")
logger.info(f"π§ RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB")
# Python and package versions
logger.info(f"π Python: {platform.python_version()}")
if TORCH_AVAILABLE:
logger.info(f"π₯ PyTorch: {torch.__version__}")
if torch.cuda.is_available():
logger.info(f"β‘ CUDA: {torch.version.cuda}")
if OPENCV_AVAILABLE:
logger.info(f"π· OpenCV: {cv2.__version__}")
except Exception as e:
logger.warning(f"β οΈ System info logging failed: {e}")
def get_device(self) -> str:
"""Get the selected device"""
return self.device or 'cpu'
def get_device_info(self) -> Dict:
"""Get device information"""
return {
'device': self.device,
'cuda_available': self.cuda_available,
'gpu_count': self.gpu_count,
'device_info': self.device_info,
'threading_configured': self.threading_configured
}
def get_memory_usage(self) -> Dict:
"""Get current memory usage"""
memory_info = {
'system_memory_gb': psutil.virtual_memory().total / (1024**3),
'system_memory_used_gb': psutil.virtual_memory().used / (1024**3),
'system_memory_percent': psutil.virtual_memory().percent
}
if self.cuda_available and TORCH_AVAILABLE:
try:
for i in range(self.gpu_count):
allocated = torch.cuda.memory_allocated(i) / (1024**3)
reserved = torch.cuda.memory_reserved(i) / (1024**3)
total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0)
memory_info[f'gpu_{i}_allocated_gb'] = allocated
memory_info[f'gpu_{i}_reserved_gb'] = reserved
memory_info[f'gpu_{i}_total_gb'] = total
memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100
except Exception as e:
logger.warning(f"β οΈ GPU memory info failed: {e}")
return memory_info
def optimize_for_model(self, model_name: str) -> Dict:
"""Optimize device settings for specific model"""
optimizations = {
'device': self.device,
'mixed_precision': False,
'gradient_checkpointing': False,
'batch_size': 1
}
try:
# Model-specific optimizations
if model_name.lower() == 'sam2':
if self.cuda_available and self._get_gpu_memory_gb() >= 8:
optimizations.update({
'mixed_precision': True,
'batch_size': 2
})
elif model_name.lower() == 'matanyone':
if self.cuda_available and self._get_gpu_memory_gb() >= 6:
optimizations.update({
'mixed_precision': True
})
logger.info(f"βοΈ Optimizations for {model_name}: {optimizations}")
except Exception as e:
logger.warning(f"β οΈ Model optimization failed: {e}")
return optimizations
def _get_gpu_memory_gb(self) -> float:
"""Get GPU memory in GB"""
if not self.cuda_available or not self.device_info:
return 0.0
device_key = self.device if self.device in self.device_info else 'cuda:0'
return self.device_info.get(device_key, {}).get('memory_gb', 0.0)
def cleanup(self):
"""Cleanup device resources"""
try:
if self.cuda_available and TORCH_AVAILABLE:
torch.cuda.empty_cache()
logger.info("β
GPU cache cleared")
except Exception as e:
logger.warning(f"β οΈ Cleanup warning: {e}")
# Global device manager instance
_device_manager = None
def get_device_manager() -> DeviceManager:
"""Get the global device manager instance"""
global _device_manager
if _device_manager is None:
_device_manager = DeviceManager()
_device_manager.initialize()
return _device_manager
def get_optimal_device() -> str:
"""Get the optimal device for processing"""
return get_device_manager().get_device()
def fix_cuda_compatibility():
"""Fix CUDA compatibility issues"""
try:
dm = get_device_manager()
logger.info("β
CUDA compatibility checked and fixed")
return dm.get_device_info()
except Exception as e:
logger.error(f"β CUDA compatibility fix failed: {e}")
return {'device': 'cpu', 'error': str(e)}
def setup_optimal_threading():
"""Setup optimal threading configuration"""
try:
dm = get_device_manager()
if dm.threading_configured:
logger.info("β
Threading already configured optimally")
else:
dm._configure_threading()
return True
except Exception as e:
logger.error(f"β Threading setup failed: {e}")
return False
def get_system_diagnostics() -> Dict:
"""Get comprehensive system diagnostics"""
dm = get_device_manager()
return {
'device_info': dm.get_device_info(),
'memory_usage': dm.get_memory_usage(),
'system_ready': dm.device is not None
}
# Initialize on module import
try:
_device_manager = DeviceManager()
_device_manager.initialize()
logger.info("β
Device manager initialized on import")
except Exception as e:
logger.warning(f"β οΈ Device manager initialization warning: {e}") |