|
|
""" |
|
|
Device and Hardware Management Module |
|
|
==================================== |
|
|
|
|
|
Handles device detection, CUDA compatibility, memory management, |
|
|
and threading configuration for BackgroundFX Pro. |
|
|
|
|
|
Fixes: |
|
|
- CUDA multiprocessor_count compatibility error |
|
|
- OpenMP threading issues (OMP_NUM_THREADS) |
|
|
- GPU memory optimization |
|
|
- Automatic device selection |
|
|
|
|
|
Author: BackgroundFX Pro Team |
|
|
License: MIT |
|
|
""" |
|
|
|
|
|
import os |
|
|
import logging |
|
|
import warnings |
|
|
from typing import Dict, Optional, List, Tuple |
|
|
import platform |
|
|
import psutil |
|
|
|
|
|
|
|
|
os.environ.setdefault('OMP_NUM_THREADS', '4') |
|
|
os.environ.setdefault('MKL_NUM_THREADS', '4') |
|
|
os.environ.setdefault('NUMEXPR_NUM_THREADS', '4') |
|
|
|
|
|
try: |
|
|
import torch |
|
|
TORCH_AVAILABLE = True |
|
|
except ImportError: |
|
|
TORCH_AVAILABLE = False |
|
|
warnings.warn("PyTorch not available - using CPU-only processing") |
|
|
|
|
|
try: |
|
|
import cv2 |
|
|
OPENCV_AVAILABLE = True |
|
|
except ImportError: |
|
|
OPENCV_AVAILABLE = False |
|
|
warnings.warn("OpenCV not available") |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class DeviceManager: |
|
|
"""Manages device detection, selection and optimization""" |
|
|
|
|
|
def __init__(self): |
|
|
self.device = None |
|
|
self.device_info = {} |
|
|
self.cuda_available = False |
|
|
self.gpu_count = 0 |
|
|
self.memory_info = {} |
|
|
self.threading_configured = False |
|
|
|
|
|
def initialize(self) -> bool: |
|
|
"""Initialize device manager and configure optimal settings""" |
|
|
try: |
|
|
logger.info("π§ Initializing Device Manager...") |
|
|
|
|
|
|
|
|
self._configure_threading() |
|
|
|
|
|
|
|
|
self._detect_devices() |
|
|
|
|
|
|
|
|
if self.cuda_available: |
|
|
self._configure_cuda() |
|
|
|
|
|
|
|
|
self.device = self._select_optimal_device() |
|
|
|
|
|
|
|
|
self._log_system_info() |
|
|
|
|
|
logger.info(f"β
Device Manager initialized - Using: {self.device}") |
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Device Manager initialization failed: {e}") |
|
|
self.device = 'cpu' |
|
|
return False |
|
|
|
|
|
def _configure_threading(self): |
|
|
"""Configure threading for optimal performance""" |
|
|
try: |
|
|
|
|
|
if 'OMP_NUM_THREADS' not in os.environ: |
|
|
os.environ['OMP_NUM_THREADS'] = '4' |
|
|
|
|
|
|
|
|
if 'MKL_NUM_THREADS' not in os.environ: |
|
|
os.environ['MKL_NUM_THREADS'] = '4' |
|
|
|
|
|
|
|
|
if 'NUMEXPR_NUM_THREADS' not in os.environ: |
|
|
os.environ['NUMEXPR_NUM_THREADS'] = '4' |
|
|
|
|
|
|
|
|
if TORCH_AVAILABLE: |
|
|
torch.set_num_threads(4) |
|
|
torch.set_num_interop_threads(4) |
|
|
|
|
|
|
|
|
if OPENCV_AVAILABLE: |
|
|
cv2.setNumThreads(4) |
|
|
|
|
|
self.threading_configured = True |
|
|
logger.info(f"β
Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Threading configuration warning: {e}") |
|
|
|
|
|
def _detect_devices(self): |
|
|
"""Detect available computing devices""" |
|
|
try: |
|
|
if not TORCH_AVAILABLE: |
|
|
self.cuda_available = False |
|
|
self.gpu_count = 0 |
|
|
return |
|
|
|
|
|
|
|
|
self.cuda_available = torch.cuda.is_available() |
|
|
self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0 |
|
|
|
|
|
if self.cuda_available: |
|
|
logger.info(f"β
CUDA available: {self.gpu_count} GPU(s)") |
|
|
|
|
|
|
|
|
for i in range(self.gpu_count): |
|
|
try: |
|
|
props = self._get_cuda_properties_safe(i) |
|
|
self.device_info[f'cuda:{i}'] = props |
|
|
logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)") |
|
|
except Exception as e: |
|
|
logger.warning(f" GPU {i}: Properties unavailable ({e})") |
|
|
else: |
|
|
logger.info("βΉοΈ CUDA not available - using CPU") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Device detection failed: {e}") |
|
|
self.cuda_available = False |
|
|
self.gpu_count = 0 |
|
|
|
|
|
def _get_cuda_properties_safe(self, device_id: int) -> Dict: |
|
|
"""Safely get CUDA device properties with compatibility handling""" |
|
|
try: |
|
|
if not TORCH_AVAILABLE or not torch.cuda.is_available(): |
|
|
return {} |
|
|
|
|
|
props = torch.cuda.get_device_properties(device_id) |
|
|
|
|
|
|
|
|
if hasattr(props, 'multi_processor_count'): |
|
|
sm_count = props.multi_processor_count |
|
|
elif hasattr(props, 'multiprocessor_count'): |
|
|
sm_count = props.multiprocessor_count |
|
|
else: |
|
|
|
|
|
try: |
|
|
major, minor = torch.cuda.get_device_capability(device_id) |
|
|
|
|
|
sm_count = major * 8 if major >= 6 else major * 4 |
|
|
except: |
|
|
sm_count = 'Unknown' |
|
|
|
|
|
device_props = { |
|
|
'name': props.name, |
|
|
'memory_gb': props.total_memory / (1024**3), |
|
|
'memory_bytes': props.total_memory, |
|
|
'multiprocessor_count': sm_count, |
|
|
'major': props.major, |
|
|
'minor': props.minor, |
|
|
'compute_capability': f"{props.major}.{props.minor}" |
|
|
} |
|
|
|
|
|
return device_props |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Error getting CUDA properties for device {device_id}: {e}") |
|
|
return { |
|
|
'name': 'Unknown GPU', |
|
|
'memory_gb': 0.0, |
|
|
'memory_bytes': 0, |
|
|
'multiprocessor_count': 'Unknown', |
|
|
'error': str(e) |
|
|
} |
|
|
|
|
|
def _configure_cuda(self): |
|
|
"""Configure CUDA for optimal performance""" |
|
|
try: |
|
|
if not self.cuda_available or not TORCH_AVAILABLE: |
|
|
return |
|
|
|
|
|
|
|
|
torch.backends.cudnn.benchmark = True |
|
|
torch.backends.cudnn.deterministic = False |
|
|
|
|
|
|
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
from torch.cuda.amp import autocast |
|
|
logger.info("β
Mixed precision available") |
|
|
except ImportError: |
|
|
logger.info("βΉοΈ Mixed precision not available") |
|
|
|
|
|
logger.info("β
CUDA optimization configured") |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ CUDA configuration warning: {e}") |
|
|
|
|
|
def _select_optimal_device(self) -> str: |
|
|
"""Select the optimal device for processing""" |
|
|
try: |
|
|
if not TORCH_AVAILABLE: |
|
|
return 'cpu' |
|
|
|
|
|
if not self.cuda_available or self.gpu_count == 0: |
|
|
return 'cpu' |
|
|
|
|
|
|
|
|
best_device = 'cuda:0' |
|
|
best_memory = 0 |
|
|
|
|
|
for device_name, props in self.device_info.items(): |
|
|
if device_name.startswith('cuda:'): |
|
|
memory = props.get('memory_gb', 0) |
|
|
if memory > best_memory: |
|
|
best_memory = memory |
|
|
best_device = device_name |
|
|
|
|
|
|
|
|
if best_memory < 2.0: |
|
|
logger.warning(f"β οΈ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU") |
|
|
return 'cpu' |
|
|
|
|
|
return best_device |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Device selection failed: {e}") |
|
|
return 'cpu' |
|
|
|
|
|
def _log_system_info(self): |
|
|
"""Log comprehensive system information""" |
|
|
try: |
|
|
|
|
|
logger.info(f"π System: {platform.system()} {platform.release()}") |
|
|
logger.info(f"πΎ CPU: {platform.processor()}") |
|
|
logger.info(f"π§ RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB") |
|
|
|
|
|
|
|
|
logger.info(f"π Python: {platform.python_version()}") |
|
|
|
|
|
if TORCH_AVAILABLE: |
|
|
logger.info(f"π₯ PyTorch: {torch.__version__}") |
|
|
if torch.cuda.is_available(): |
|
|
logger.info(f"β‘ CUDA: {torch.version.cuda}") |
|
|
|
|
|
if OPENCV_AVAILABLE: |
|
|
logger.info(f"π· OpenCV: {cv2.__version__}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ System info logging failed: {e}") |
|
|
|
|
|
def get_device(self) -> str: |
|
|
"""Get the selected device""" |
|
|
return self.device or 'cpu' |
|
|
|
|
|
def get_device_info(self) -> Dict: |
|
|
"""Get device information""" |
|
|
return { |
|
|
'device': self.device, |
|
|
'cuda_available': self.cuda_available, |
|
|
'gpu_count': self.gpu_count, |
|
|
'device_info': self.device_info, |
|
|
'threading_configured': self.threading_configured |
|
|
} |
|
|
|
|
|
def get_memory_usage(self) -> Dict: |
|
|
"""Get current memory usage""" |
|
|
memory_info = { |
|
|
'system_memory_gb': psutil.virtual_memory().total / (1024**3), |
|
|
'system_memory_used_gb': psutil.virtual_memory().used / (1024**3), |
|
|
'system_memory_percent': psutil.virtual_memory().percent |
|
|
} |
|
|
|
|
|
if self.cuda_available and TORCH_AVAILABLE: |
|
|
try: |
|
|
for i in range(self.gpu_count): |
|
|
allocated = torch.cuda.memory_allocated(i) / (1024**3) |
|
|
reserved = torch.cuda.memory_reserved(i) / (1024**3) |
|
|
total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0) |
|
|
|
|
|
memory_info[f'gpu_{i}_allocated_gb'] = allocated |
|
|
memory_info[f'gpu_{i}_reserved_gb'] = reserved |
|
|
memory_info[f'gpu_{i}_total_gb'] = total |
|
|
memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100 |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ GPU memory info failed: {e}") |
|
|
|
|
|
return memory_info |
|
|
|
|
|
def optimize_for_model(self, model_name: str) -> Dict: |
|
|
"""Optimize device settings for specific model""" |
|
|
optimizations = { |
|
|
'device': self.device, |
|
|
'mixed_precision': False, |
|
|
'gradient_checkpointing': False, |
|
|
'batch_size': 1 |
|
|
} |
|
|
|
|
|
try: |
|
|
|
|
|
if model_name.lower() == 'sam2': |
|
|
if self.cuda_available and self._get_gpu_memory_gb() >= 8: |
|
|
optimizations.update({ |
|
|
'mixed_precision': True, |
|
|
'batch_size': 2 |
|
|
}) |
|
|
|
|
|
elif model_name.lower() == 'matanyone': |
|
|
if self.cuda_available and self._get_gpu_memory_gb() >= 6: |
|
|
optimizations.update({ |
|
|
'mixed_precision': True |
|
|
}) |
|
|
|
|
|
logger.info(f"βοΈ Optimizations for {model_name}: {optimizations}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Model optimization failed: {e}") |
|
|
|
|
|
return optimizations |
|
|
|
|
|
def _get_gpu_memory_gb(self) -> float: |
|
|
"""Get GPU memory in GB""" |
|
|
if not self.cuda_available or not self.device_info: |
|
|
return 0.0 |
|
|
|
|
|
device_key = self.device if self.device in self.device_info else 'cuda:0' |
|
|
return self.device_info.get(device_key, {}).get('memory_gb', 0.0) |
|
|
|
|
|
def cleanup(self): |
|
|
"""Cleanup device resources""" |
|
|
try: |
|
|
if self.cuda_available and TORCH_AVAILABLE: |
|
|
torch.cuda.empty_cache() |
|
|
logger.info("β
GPU cache cleared") |
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Cleanup warning: {e}") |
|
|
|
|
|
|
|
|
_device_manager = None |
|
|
|
|
|
def get_device_manager() -> DeviceManager: |
|
|
"""Get the global device manager instance""" |
|
|
global _device_manager |
|
|
if _device_manager is None: |
|
|
_device_manager = DeviceManager() |
|
|
_device_manager.initialize() |
|
|
return _device_manager |
|
|
|
|
|
def get_optimal_device() -> str: |
|
|
"""Get the optimal device for processing""" |
|
|
return get_device_manager().get_device() |
|
|
|
|
|
def fix_cuda_compatibility(): |
|
|
"""Fix CUDA compatibility issues""" |
|
|
try: |
|
|
dm = get_device_manager() |
|
|
logger.info("β
CUDA compatibility checked and fixed") |
|
|
return dm.get_device_info() |
|
|
except Exception as e: |
|
|
logger.error(f"β CUDA compatibility fix failed: {e}") |
|
|
return {'device': 'cpu', 'error': str(e)} |
|
|
|
|
|
def setup_optimal_threading(): |
|
|
"""Setup optimal threading configuration""" |
|
|
try: |
|
|
dm = get_device_manager() |
|
|
if dm.threading_configured: |
|
|
logger.info("β
Threading already configured optimally") |
|
|
else: |
|
|
dm._configure_threading() |
|
|
return True |
|
|
except Exception as e: |
|
|
logger.error(f"β Threading setup failed: {e}") |
|
|
return False |
|
|
|
|
|
def get_system_diagnostics() -> Dict: |
|
|
"""Get comprehensive system diagnostics""" |
|
|
dm = get_device_manager() |
|
|
return { |
|
|
'device_info': dm.get_device_info(), |
|
|
'memory_usage': dm.get_memory_usage(), |
|
|
'system_ready': dm.device is not None |
|
|
} |
|
|
|
|
|
|
|
|
try: |
|
|
_device_manager = DeviceManager() |
|
|
_device_manager.initialize() |
|
|
logger.info("β
Device manager initialized on import") |
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Device manager initialization warning: {e}") |