|
|
""" |
|
|
Device Manager for BackgroundFX Pro |
|
|
Handles device detection, optimization, and hardware compatibility |
|
|
""" |
|
|
|
|
|
|
|
|
import os |
|
|
if 'OMP_NUM_THREADS' not in os.environ: |
|
|
os.environ['OMP_NUM_THREADS'] = '4' |
|
|
os.environ['MKL_NUM_THREADS'] = '4' |
|
|
|
|
|
import sys |
|
|
import platform |
|
|
import subprocess |
|
|
import logging |
|
|
from typing import Dict, Any, Optional, Tuple |
|
|
from dataclasses import dataclass |
|
|
from enum import Enum |
|
|
|
|
|
import torch |
|
|
import psutil |
|
|
import cpuinfo |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
class DeviceType(Enum): |
|
|
"""Enumeration of supported device types""" |
|
|
CUDA = "cuda" |
|
|
MPS = "mps" |
|
|
CPU = "cpu" |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class DeviceInfo: |
|
|
"""Information about a compute device""" |
|
|
type: DeviceType |
|
|
index: int |
|
|
name: str |
|
|
memory_total: int |
|
|
memory_available: int |
|
|
compute_capability: Optional[Tuple[int, int]] = None |
|
|
|
|
|
|
|
|
class DeviceManager: |
|
|
"""Manages compute devices and system optimization""" |
|
|
|
|
|
_instance = None |
|
|
|
|
|
def __init__(self): |
|
|
"""Initialize device manager""" |
|
|
self.devices = [] |
|
|
self.optimal_device = None |
|
|
self.cpu_info = None |
|
|
self.system_info = {} |
|
|
|
|
|
|
|
|
self._detect_devices() |
|
|
self._gather_system_info() |
|
|
self._determine_optimal_device() |
|
|
|
|
|
def _detect_devices(self): |
|
|
"""Detect available compute devices""" |
|
|
self.devices = [] |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
for i in range(torch.cuda.device_count()): |
|
|
props = torch.cuda.get_device_properties(i) |
|
|
self.devices.append(DeviceInfo( |
|
|
type=DeviceType.CUDA, |
|
|
index=i, |
|
|
name=props.name, |
|
|
memory_total=props.total_memory, |
|
|
memory_available=props.total_memory - torch.cuda.memory_allocated(i), |
|
|
compute_capability=(props.major, props.minor) |
|
|
)) |
|
|
|
|
|
|
|
|
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): |
|
|
|
|
|
self.devices.append(DeviceInfo( |
|
|
type=DeviceType.MPS, |
|
|
index=0, |
|
|
name="Apple Silicon GPU", |
|
|
memory_total=psutil.virtual_memory().total, |
|
|
memory_available=psutil.virtual_memory().available |
|
|
)) |
|
|
|
|
|
|
|
|
try: |
|
|
cpu_info = cpuinfo.get_cpu_info() |
|
|
cpu_name = cpu_info.get('brand_raw', 'Unknown CPU') |
|
|
except: |
|
|
cpu_name = platform.processor() or "Unknown CPU" |
|
|
|
|
|
self.devices.append(DeviceInfo( |
|
|
type=DeviceType.CPU, |
|
|
index=0, |
|
|
name=cpu_name, |
|
|
memory_total=psutil.virtual_memory().total, |
|
|
memory_available=psutil.virtual_memory().available |
|
|
)) |
|
|
|
|
|
def _gather_system_info(self): |
|
|
"""Gather system information""" |
|
|
try: |
|
|
cpu_info = cpuinfo.get_cpu_info() |
|
|
self.cpu_info = cpu_info |
|
|
except: |
|
|
self.cpu_info = {} |
|
|
|
|
|
self.system_info = { |
|
|
'platform': platform.system(), |
|
|
'platform_release': platform.release(), |
|
|
'platform_version': platform.version(), |
|
|
'architecture': platform.machine(), |
|
|
'processor': platform.processor(), |
|
|
'cpu_count': psutil.cpu_count(logical=False), |
|
|
'cpu_count_logical': psutil.cpu_count(logical=True), |
|
|
'ram_total': psutil.virtual_memory().total, |
|
|
'ram_available': psutil.virtual_memory().available, |
|
|
'python_version': sys.version, |
|
|
'torch_version': torch.__version__, |
|
|
} |
|
|
|
|
|
def _determine_optimal_device(self): |
|
|
"""Determine the optimal device for computation""" |
|
|
|
|
|
cuda_devices = [d for d in self.devices if d.type == DeviceType.CUDA] |
|
|
mps_devices = [d for d in self.devices if d.type == DeviceType.MPS] |
|
|
cpu_devices = [d for d in self.devices if d.type == DeviceType.CPU] |
|
|
|
|
|
if cuda_devices: |
|
|
|
|
|
self.optimal_device = max(cuda_devices, key=lambda d: d.memory_available) |
|
|
elif mps_devices: |
|
|
self.optimal_device = mps_devices[0] |
|
|
else: |
|
|
self.optimal_device = cpu_devices[0] |
|
|
|
|
|
logger.info(f"Optimal device: {self.optimal_device.name} ({self.optimal_device.type.value})") |
|
|
|
|
|
def get_optimal_device(self) -> str: |
|
|
"""Get the optimal device string for PyTorch""" |
|
|
if self.optimal_device.type == DeviceType.CUDA: |
|
|
return f"cuda:{self.optimal_device.index}" |
|
|
elif self.optimal_device.type == DeviceType.MPS: |
|
|
return "mps" |
|
|
else: |
|
|
return "cpu" |
|
|
|
|
|
def fix_cuda_compatibility(self): |
|
|
"""Apply CUDA compatibility fixes""" |
|
|
if not torch.cuda.is_available(): |
|
|
logger.info("CUDA not available, skipping compatibility fixes") |
|
|
return |
|
|
|
|
|
try: |
|
|
|
|
|
os.environ['CUDA_LAUNCH_BLOCKING'] = '1' |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
|
torch.backends.cudnn.allow_tf32 = True |
|
|
|
|
|
|
|
|
if 'PYTORCH_CUDA_ALLOC_CONF' not in os.environ: |
|
|
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512' |
|
|
|
|
|
logger.info("CUDA compatibility settings applied") |
|
|
except Exception as e: |
|
|
logger.warning(f"Error applying CUDA compatibility fixes: {e}") |
|
|
|
|
|
def setup_optimal_threading(self): |
|
|
"""Configure optimal threading for the system""" |
|
|
try: |
|
|
|
|
|
current_omp = os.environ.get('OMP_NUM_THREADS') |
|
|
if current_omp and current_omp.isdigit() and int(current_omp) > 0: |
|
|
logger.info(f"Threading already configured: OMP_NUM_THREADS={current_omp}") |
|
|
|
|
|
torch.set_num_threads(int(current_omp)) |
|
|
|
|
|
|
|
|
if 'MKL_NUM_THREADS' not in os.environ: |
|
|
os.environ['MKL_NUM_THREADS'] = current_omp |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
physical_cores = psutil.cpu_count(logical=False) |
|
|
if physical_cores is None: |
|
|
physical_cores = 4 |
|
|
|
|
|
|
|
|
num_threads = str(min(physical_cores, 8)) |
|
|
|
|
|
|
|
|
if num_threads.isdigit() and int(num_threads) > 0: |
|
|
os.environ['OMP_NUM_THREADS'] = num_threads |
|
|
else: |
|
|
os.environ['OMP_NUM_THREADS'] = '4' |
|
|
|
|
|
|
|
|
if 'intel' in self.system_info.get('processor', '').lower(): |
|
|
os.environ['MKL_NUM_THREADS'] = os.environ['OMP_NUM_THREADS'] |
|
|
|
|
|
|
|
|
torch.set_num_threads(int(os.environ['OMP_NUM_THREADS'])) |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
torch.set_num_interop_threads(2) |
|
|
|
|
|
logger.info(f"Threading configured: OMP_NUM_THREADS={os.environ.get('OMP_NUM_THREADS')}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"Error setting up threading: {e}") |
|
|
|
|
|
if 'OMP_NUM_THREADS' not in os.environ: |
|
|
os.environ['OMP_NUM_THREADS'] = '4' |
|
|
if 'MKL_NUM_THREADS' not in os.environ: |
|
|
os.environ['MKL_NUM_THREADS'] = '4' |
|
|
|
|
|
def get_system_diagnostics(self) -> Dict[str, Any]: |
|
|
"""Get comprehensive system diagnostics""" |
|
|
diagnostics = { |
|
|
'system': self.system_info.copy(), |
|
|
'devices': [], |
|
|
'optimal_device': None, |
|
|
'threading': { |
|
|
'omp_num_threads': os.environ.get('OMP_NUM_THREADS', 'not set'), |
|
|
'mkl_num_threads': os.environ.get('MKL_NUM_THREADS', 'not set'), |
|
|
'torch_num_threads': torch.get_num_threads(), |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for device in self.devices: |
|
|
device_info = { |
|
|
'type': device.type.value, |
|
|
'index': device.index, |
|
|
'name': device.name, |
|
|
'memory_total_gb': device.memory_total / (1024**3), |
|
|
'memory_available_gb': device.memory_available / (1024**3), |
|
|
} |
|
|
if device.compute_capability: |
|
|
device_info['compute_capability'] = f"{device.compute_capability[0]}.{device.compute_capability[1]}" |
|
|
diagnostics['devices'].append(device_info) |
|
|
|
|
|
|
|
|
if self.optimal_device: |
|
|
diagnostics['optimal_device'] = { |
|
|
'type': self.optimal_device.type.value, |
|
|
'name': self.optimal_device.name, |
|
|
'pytorch_device': self.get_optimal_device() |
|
|
} |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
diagnostics['cuda'] = { |
|
|
'available': True, |
|
|
'version': torch.version.cuda, |
|
|
'device_count': torch.cuda.device_count(), |
|
|
'current_device': torch.cuda.current_device() if torch.cuda.is_initialized() else None, |
|
|
} |
|
|
else: |
|
|
diagnostics['cuda'] = {'available': False} |
|
|
|
|
|
|
|
|
if hasattr(torch.backends, 'mps'): |
|
|
diagnostics['mps'] = { |
|
|
'available': torch.backends.mps.is_available(), |
|
|
'built': torch.backends.mps.is_built() |
|
|
} |
|
|
else: |
|
|
diagnostics['mps'] = {'available': False} |
|
|
|
|
|
return diagnostics |
|
|
|
|
|
def get_device_for_model(self, model_size_gb: float = 2.0) -> str: |
|
|
"""Get appropriate device based on model size requirements""" |
|
|
required_memory = model_size_gb * 1024**3 * 1.5 |
|
|
|
|
|
|
|
|
cuda_devices = [d for d in self.devices if d.type == DeviceType.CUDA] |
|
|
for device in cuda_devices: |
|
|
if device.memory_available > required_memory: |
|
|
return f"cuda:{device.index}" |
|
|
|
|
|
|
|
|
mps_devices = [d for d in self.devices if d.type == DeviceType.MPS] |
|
|
if mps_devices and mps_devices[0].memory_available > required_memory: |
|
|
return "mps" |
|
|
|
|
|
|
|
|
return "cpu" |
|
|
|
|
|
|
|
|
|
|
|
_device_manager_instance = None |
|
|
|
|
|
|
|
|
def get_device_manager() -> DeviceManager: |
|
|
"""Get or create the singleton DeviceManager instance""" |
|
|
global _device_manager_instance |
|
|
if _device_manager_instance is None: |
|
|
_device_manager_instance = DeviceManager() |
|
|
return _device_manager_instance |
|
|
|
|
|
|
|
|
def get_optimal_device() -> str: |
|
|
""" |
|
|
Get the optimal device string for PyTorch operations. |
|
|
|
|
|
Returns: |
|
|
str: Device string like 'cuda:0', 'mps', or 'cpu' |
|
|
""" |
|
|
manager = get_device_manager() |
|
|
return manager.get_optimal_device() |
|
|
|
|
|
|
|
|
def fix_cuda_compatibility(): |
|
|
""" |
|
|
Apply CUDA compatibility settings for stable operation. |
|
|
Sets environment variables and PyTorch settings for CUDA compatibility. |
|
|
""" |
|
|
manager = get_device_manager() |
|
|
manager.fix_cuda_compatibility() |
|
|
|
|
|
|
|
|
def setup_optimal_threading(): |
|
|
""" |
|
|
Configure optimal threading settings for the current system. |
|
|
Sets OMP_NUM_THREADS, MKL_NUM_THREADS, and PyTorch thread counts. |
|
|
""" |
|
|
manager = get_device_manager() |
|
|
manager.setup_optimal_threading() |
|
|
|
|
|
|
|
|
def get_system_diagnostics() -> Dict[str, Any]: |
|
|
""" |
|
|
Get comprehensive system diagnostics information. |
|
|
|
|
|
Returns: |
|
|
Dict containing system info, device info, and configuration details |
|
|
""" |
|
|
manager = get_device_manager() |
|
|
return manager.get_system_diagnostics() |
|
|
|
|
|
|
|
|
|
|
|
if __name__ != "__main__": |
|
|
|
|
|
try: |
|
|
|
|
|
manager = get_device_manager() |
|
|
|
|
|
manager.setup_optimal_threading() |
|
|
except Exception as e: |
|
|
logger.warning(f"Error during device manager initialization: {e}") |