Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 24

Commit

d6467ce

1 Parent(s): 46d1c50

Create utils/device.py

Browse files

Files changed (1) hide show

utils/device.py +410 -0

utils/device.py ADDED Viewed

	@@ -0,0 +1,410 @@

+"""
+Device and Hardware Management Module
+====================================
+Handles device detection, CUDA compatibility, memory management,
+and threading configuration for BackgroundFX Pro.
+Fixes:
+- CUDA multiprocessor_count compatibility error
+- OpenMP threading issues (OMP_NUM_THREADS)
+- GPU memory optimization
+- Automatic device selection
+Author: BackgroundFX Pro Team
+License: MIT
+"""
+import os
+import logging
+import warnings
+from typing import Dict, Optional, List, Tuple
+import platform
+import psutil
+# Fix threading issues immediately at module import
+os.environ.setdefault('OMP_NUM_THREADS', '4')
+os.environ.setdefault('MKL_NUM_THREADS', '4')
+os.environ.setdefault('NUMEXPR_NUM_THREADS', '4')
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+    warnings.warn("PyTorch not available - using CPU-only processing")
+try:
+    import cv2
+    OPENCV_AVAILABLE = True
+except ImportError:
+    OPENCV_AVAILABLE = False
+    warnings.warn("OpenCV not available")
+logger = logging.getLogger(__name__)
+class DeviceManager:
+    """Manages device detection, selection and optimization"""
+    def __init__(self):
+        self.device = None
+        self.device_info = {}
+        self.cuda_available = False
+        self.gpu_count = 0
+        self.memory_info = {}
+        self.threading_configured = False
+    def initialize(self) -> bool:
+        """Initialize device manager and configure optimal settings"""
+        try:
+            logger.info("🔧 Initializing Device Manager...")
+            # Fix threading first
+            self._configure_threading()
+            # Detect available devices
+            self._detect_devices()
+            # Configure CUDA if available
+            if self.cuda_available:
+                self._configure_cuda()
+            # Select optimal device
+            self.device = self._select_optimal_device()
+            # Log system information
+            self._log_system_info()
+            logger.info(f"✅ Device Manager initialized - Using: {self.device}")
+            return True
+        except Exception as e:
+            logger.error(f"❌ Device Manager initialization failed: {e}")
+            self.device = 'cpu'
+            return False
+    def _configure_threading(self):
+        """Configure threading for optimal performance"""
+        try:
+            # Set OpenMP threads
+            if 'OMP_NUM_THREADS' not in os.environ:
+                os.environ['OMP_NUM_THREADS'] = '4'
+            # Set MKL threads
+            if 'MKL_NUM_THREADS' not in os.environ:
+                os.environ['MKL_NUM_THREADS'] = '4'
+            # Set NumExpr threads
+            if 'NUMEXPR_NUM_THREADS' not in os.environ:
+                os.environ['NUMEXPR_NUM_THREADS'] = '4'
+            # Configure PyTorch threads
+            if TORCH_AVAILABLE:
+                torch.set_num_threads(4)
+                torch.set_num_interop_threads(4)
+            # Configure OpenCV threads
+            if OPENCV_AVAILABLE:
+                cv2.setNumThreads(4)
+            self.threading_configured = True
+            logger.info(f"✅ Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}")
+        except Exception as e:
+            logger.warning(f"⚠️ Threading configuration warning: {e}")
+    def _detect_devices(self):
+        """Detect available computing devices"""
+        try:
+            if not TORCH_AVAILABLE:
+                self.cuda_available = False
+                self.gpu_count = 0
+                return
+            # Check CUDA availability
+            self.cuda_available = torch.cuda.is_available()
+            self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0
+            if self.cuda_available:
+                logger.info(f"✅ CUDA available: {self.gpu_count} GPU(s)")
+                # Get device properties for each GPU
+                for i in range(self.gpu_count):
+                    try:
+                        props = self._get_cuda_properties_safe(i)
+                        self.device_info[f'cuda:{i}'] = props
+                        logger.info(f"   GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)")
+                    except Exception as e:
+                        logger.warning(f"   GPU {i}: Properties unavailable ({e})")
+            else:
+                logger.info("ℹ️ CUDA not available - using CPU")
+        except Exception as e:
+            logger.error(f"❌ Device detection failed: {e}")
+            self.cuda_available = False
+            self.gpu_count = 0
+    def _get_cuda_properties_safe(self, device_id: int) -> Dict:
+        """Safely get CUDA device properties with compatibility handling"""
+        try:
+            if not TORCH_AVAILABLE or not torch.cuda.is_available():
+                return {}
+            props = torch.cuda.get_device_properties(device_id)
+            # Handle different PyTorch versions for multiprocessor count
+            if hasattr(props, 'multi_processor_count'):
+                sm_count = props.multi_processor_count
+            elif hasattr(props, 'multiprocessor_count'):
+                sm_count = props.multiprocessor_count
+            else:
+                # Fallback calculation for older PyTorch versions
+                try:
+                    major, minor = torch.cuda.get_device_capability(device_id)
+                    # Rough estimation based on compute capability
+                    sm_count = major * 8 if major >= 6 else major * 4
+                except:
+                    sm_count = 'Unknown'
+            device_props = {
+                'name': props.name,
+                'memory_gb': props.total_memory / (1024**3),
+                'memory_bytes': props.total_memory,
+                'multiprocessor_count': sm_count,
+                'major': props.major,
+                'minor': props.minor,
+                'compute_capability': f"{props.major}.{props.minor}"
+            }
+            return device_props
+        except Exception as e:
+            logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}")
+            return {
+                'name': 'Unknown GPU',
+                'memory_gb': 0.0,
+                'memory_bytes': 0,
+                'multiprocessor_count': 'Unknown',
+                'error': str(e)
+            }
+    def _configure_cuda(self):
+        """Configure CUDA for optimal performance"""
+        try:
+            if not self.cuda_available or not TORCH_AVAILABLE:
+                return
+            # Enable TensorRT optimization if available
+            torch.backends.cudnn.benchmark = True
+            torch.backends.cudnn.deterministic = False
+            # Set memory management
+            torch.cuda.empty_cache()
+            # Enable mixed precision if supported
+            try:
+                # Check if Automatic Mixed Precision is available
+                from torch.cuda.amp import autocast
+                logger.info("✅ Mixed precision available")
+            except ImportError:
+                logger.info("ℹ️ Mixed precision not available")
+            logger.info("✅ CUDA optimization configured")
+        except Exception as e:
+            logger.warning(f"⚠️ CUDA configuration warning: {e}")
+    def _select_optimal_device(self) -> str:
+        """Select the optimal device for processing"""
+        try:
+            if not TORCH_AVAILABLE:
+                return 'cpu'
+            if not self.cuda_available or self.gpu_count == 0:
+                return 'cpu'
+            # Select GPU with most memory
+            best_device = 'cuda:0'
+            best_memory = 0
+            for device_name, props in self.device_info.items():
+                if device_name.startswith('cuda:'):
+                    memory = props.get('memory_gb', 0)
+                    if memory > best_memory:
+                        best_memory = memory
+                        best_device = device_name
+            # Minimum memory check
+            if best_memory < 2.0:  # Require at least 2GB
+                logger.warning(f"⚠️ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU")
+                return 'cpu'
+            return best_device
+        except Exception as e:
+            logger.error(f"❌ Device selection failed: {e}")
+            return 'cpu'
+    def _log_system_info(self):
+        """Log comprehensive system information"""
+        try:
+            # System information
+            logger.info(f"📊 System: {platform.system()} {platform.release()}")
+            logger.info(f"💾 CPU: {platform.processor()}")
+            logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB")
+            # Python and package versions
+            logger.info(f"🐍 Python: {platform.python_version()}")
+            if TORCH_AVAILABLE:
+                logger.info(f"🔥 PyTorch: {torch.__version__}")
+                if torch.cuda.is_available():
+                    logger.info(f"⚡ CUDA: {torch.version.cuda}")
+            if OPENCV_AVAILABLE:
+                logger.info(f"📷 OpenCV: {cv2.__version__}")
+        except Exception as e:
+            logger.warning(f"⚠️ System info logging failed: {e}")
+    def get_device(self) -> str:
+        """Get the selected device"""
+        return self.device or 'cpu'
+    def get_device_info(self) -> Dict:
+        """Get device information"""
+        return {
+            'device': self.device,
+            'cuda_available': self.cuda_available,
+            'gpu_count': self.gpu_count,
+            'device_info': self.device_info,
+            'threading_configured': self.threading_configured
+        }
+    def get_memory_usage(self) -> Dict:
+        """Get current memory usage"""
+        memory_info = {
+            'system_memory_gb': psutil.virtual_memory().total / (1024**3),
+            'system_memory_used_gb': psutil.virtual_memory().used / (1024**3),
+            'system_memory_percent': psutil.virtual_memory().percent
+        }
+        if self.cuda_available and TORCH_AVAILABLE:
+            try:
+                for i in range(self.gpu_count):
+                    allocated = torch.cuda.memory_allocated(i) / (1024**3)
+                    reserved = torch.cuda.memory_reserved(i) / (1024**3)
+                    total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0)
+                    memory_info[f'gpu_{i}_allocated_gb'] = allocated
+                    memory_info[f'gpu_{i}_reserved_gb'] = reserved
+                    memory_info[f'gpu_{i}_total_gb'] = total
+                    memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100
+            except Exception as e:
+                logger.warning(f"⚠️ GPU memory info failed: {e}")
+        return memory_info
+    def optimize_for_model(self, model_name: str) -> Dict:
+        """Optimize device settings for specific model"""
+        optimizations = {
+            'device': self.device,
+            'mixed_precision': False,
+            'gradient_checkpointing': False,
+            'batch_size': 1
+        }
+        try:
+            # Model-specific optimizations
+            if model_name.lower() == 'sam2':
+                if self.cuda_available and self._get_gpu_memory_gb() >= 8:
+                    optimizations.update({
+                        'mixed_precision': True,
+                        'batch_size': 2
+                    })
+            elif model_name.lower() == 'matanyone':
+                if self.cuda_available and self._get_gpu_memory_gb() >= 6:
+                    optimizations.update({
+                        'mixed_precision': True
+                    })
+            logger.info(f"⚙️ Optimizations for {model_name}: {optimizations}")
+        except Exception as e:
+            logger.warning(f"⚠️ Model optimization failed: {e}")
+        return optimizations
+    def _get_gpu_memory_gb(self) -> float:
+        """Get GPU memory in GB"""
+        if not self.cuda_available or not self.device_info:
+            return 0.0
+        device_key = self.device if self.device in self.device_info else 'cuda:0'
+        return self.device_info.get(device_key, {}).get('memory_gb', 0.0)
+    def cleanup(self):
+        """Cleanup device resources"""
+        try:
+            if self.cuda_available and TORCH_AVAILABLE:
+                torch.cuda.empty_cache()
+                logger.info("✅ GPU cache cleared")
+        except Exception as e:
+            logger.warning(f"⚠️ Cleanup warning: {e}")
+# Global device manager instance
+_device_manager = None
+def get_device_manager() -> DeviceManager:
+    """Get the global device manager instance"""
+    global _device_manager
+    if _device_manager is None:
+        _device_manager = DeviceManager()
+        _device_manager.initialize()
+    return _device_manager
+def get_optimal_device() -> str:
+    """Get the optimal device for processing"""
+    return get_device_manager().get_device()
+def fix_cuda_compatibility():
+    """Fix CUDA compatibility issues"""
+    try:
+        dm = get_device_manager()
+        logger.info("✅ CUDA compatibility checked and fixed")
+        return dm.get_device_info()
+    except Exception as e:
+        logger.error(f"❌ CUDA compatibility fix failed: {e}")
+        return {'device': 'cpu', 'error': str(e)}
+def setup_optimal_threading():
+    """Setup optimal threading configuration"""
+    try:
+        dm = get_device_manager()
+        if dm.threading_configured:
+            logger.info("✅ Threading already configured optimally")
+        else:
+            dm._configure_threading()
+        return True
+    except Exception as e:
+        logger.error(f"❌ Threading setup failed: {e}")
+        return False
+def get_system_diagnostics() -> Dict:
+    """Get comprehensive system diagnostics"""
+    dm = get_device_manager()
+    return {
+        'device_info': dm.get_device_info(),
+        'memory_usage': dm.get_memory_usage(),
+        'system_ready': dm.device is not None
+    }
+# Initialize on module import
+try:
+    _device_manager = DeviceManager()
+    _device_manager.initialize()
+    logger.info("✅ Device manager initialized on import")
+except Exception as e:
+    logger.warning(f"⚠️ Device manager initialization warning: {e}")