MogensR commited on
Commit
d6467ce
Β·
1 Parent(s): 46d1c50

Create utils/device.py

Browse files
Files changed (1) hide show
  1. utils/device.py +410 -0
utils/device.py ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Device and Hardware Management Module
3
+ ====================================
4
+
5
+ Handles device detection, CUDA compatibility, memory management,
6
+ and threading configuration for BackgroundFX Pro.
7
+
8
+ Fixes:
9
+ - CUDA multiprocessor_count compatibility error
10
+ - OpenMP threading issues (OMP_NUM_THREADS)
11
+ - GPU memory optimization
12
+ - Automatic device selection
13
+
14
+ Author: BackgroundFX Pro Team
15
+ License: MIT
16
+ """
17
+
18
+ import os
19
+ import logging
20
+ import warnings
21
+ from typing import Dict, Optional, List, Tuple
22
+ import platform
23
+ import psutil
24
+
25
+ # Fix threading issues immediately at module import
26
+ os.environ.setdefault('OMP_NUM_THREADS', '4')
27
+ os.environ.setdefault('MKL_NUM_THREADS', '4')
28
+ os.environ.setdefault('NUMEXPR_NUM_THREADS', '4')
29
+
30
+ try:
31
+ import torch
32
+ TORCH_AVAILABLE = True
33
+ except ImportError:
34
+ TORCH_AVAILABLE = False
35
+ warnings.warn("PyTorch not available - using CPU-only processing")
36
+
37
+ try:
38
+ import cv2
39
+ OPENCV_AVAILABLE = True
40
+ except ImportError:
41
+ OPENCV_AVAILABLE = False
42
+ warnings.warn("OpenCV not available")
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ class DeviceManager:
47
+ """Manages device detection, selection and optimization"""
48
+
49
+ def __init__(self):
50
+ self.device = None
51
+ self.device_info = {}
52
+ self.cuda_available = False
53
+ self.gpu_count = 0
54
+ self.memory_info = {}
55
+ self.threading_configured = False
56
+
57
+ def initialize(self) -> bool:
58
+ """Initialize device manager and configure optimal settings"""
59
+ try:
60
+ logger.info("πŸ”§ Initializing Device Manager...")
61
+
62
+ # Fix threading first
63
+ self._configure_threading()
64
+
65
+ # Detect available devices
66
+ self._detect_devices()
67
+
68
+ # Configure CUDA if available
69
+ if self.cuda_available:
70
+ self._configure_cuda()
71
+
72
+ # Select optimal device
73
+ self.device = self._select_optimal_device()
74
+
75
+ # Log system information
76
+ self._log_system_info()
77
+
78
+ logger.info(f"βœ… Device Manager initialized - Using: {self.device}")
79
+ return True
80
+
81
+ except Exception as e:
82
+ logger.error(f"❌ Device Manager initialization failed: {e}")
83
+ self.device = 'cpu'
84
+ return False
85
+
86
+ def _configure_threading(self):
87
+ """Configure threading for optimal performance"""
88
+ try:
89
+ # Set OpenMP threads
90
+ if 'OMP_NUM_THREADS' not in os.environ:
91
+ os.environ['OMP_NUM_THREADS'] = '4'
92
+
93
+ # Set MKL threads
94
+ if 'MKL_NUM_THREADS' not in os.environ:
95
+ os.environ['MKL_NUM_THREADS'] = '4'
96
+
97
+ # Set NumExpr threads
98
+ if 'NUMEXPR_NUM_THREADS' not in os.environ:
99
+ os.environ['NUMEXPR_NUM_THREADS'] = '4'
100
+
101
+ # Configure PyTorch threads
102
+ if TORCH_AVAILABLE:
103
+ torch.set_num_threads(4)
104
+ torch.set_num_interop_threads(4)
105
+
106
+ # Configure OpenCV threads
107
+ if OPENCV_AVAILABLE:
108
+ cv2.setNumThreads(4)
109
+
110
+ self.threading_configured = True
111
+ logger.info(f"βœ… Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}")
112
+
113
+ except Exception as e:
114
+ logger.warning(f"⚠️ Threading configuration warning: {e}")
115
+
116
+ def _detect_devices(self):
117
+ """Detect available computing devices"""
118
+ try:
119
+ if not TORCH_AVAILABLE:
120
+ self.cuda_available = False
121
+ self.gpu_count = 0
122
+ return
123
+
124
+ # Check CUDA availability
125
+ self.cuda_available = torch.cuda.is_available()
126
+ self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0
127
+
128
+ if self.cuda_available:
129
+ logger.info(f"βœ… CUDA available: {self.gpu_count} GPU(s)")
130
+
131
+ # Get device properties for each GPU
132
+ for i in range(self.gpu_count):
133
+ try:
134
+ props = self._get_cuda_properties_safe(i)
135
+ self.device_info[f'cuda:{i}'] = props
136
+ logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)")
137
+ except Exception as e:
138
+ logger.warning(f" GPU {i}: Properties unavailable ({e})")
139
+ else:
140
+ logger.info("ℹ️ CUDA not available - using CPU")
141
+
142
+ except Exception as e:
143
+ logger.error(f"❌ Device detection failed: {e}")
144
+ self.cuda_available = False
145
+ self.gpu_count = 0
146
+
147
+ def _get_cuda_properties_safe(self, device_id: int) -> Dict:
148
+ """Safely get CUDA device properties with compatibility handling"""
149
+ try:
150
+ if not TORCH_AVAILABLE or not torch.cuda.is_available():
151
+ return {}
152
+
153
+ props = torch.cuda.get_device_properties(device_id)
154
+
155
+ # Handle different PyTorch versions for multiprocessor count
156
+ if hasattr(props, 'multi_processor_count'):
157
+ sm_count = props.multi_processor_count
158
+ elif hasattr(props, 'multiprocessor_count'):
159
+ sm_count = props.multiprocessor_count
160
+ else:
161
+ # Fallback calculation for older PyTorch versions
162
+ try:
163
+ major, minor = torch.cuda.get_device_capability(device_id)
164
+ # Rough estimation based on compute capability
165
+ sm_count = major * 8 if major >= 6 else major * 4
166
+ except:
167
+ sm_count = 'Unknown'
168
+
169
+ device_props = {
170
+ 'name': props.name,
171
+ 'memory_gb': props.total_memory / (1024**3),
172
+ 'memory_bytes': props.total_memory,
173
+ 'multiprocessor_count': sm_count,
174
+ 'major': props.major,
175
+ 'minor': props.minor,
176
+ 'compute_capability': f"{props.major}.{props.minor}"
177
+ }
178
+
179
+ return device_props
180
+
181
+ except Exception as e:
182
+ logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}")
183
+ return {
184
+ 'name': 'Unknown GPU',
185
+ 'memory_gb': 0.0,
186
+ 'memory_bytes': 0,
187
+ 'multiprocessor_count': 'Unknown',
188
+ 'error': str(e)
189
+ }
190
+
191
+ def _configure_cuda(self):
192
+ """Configure CUDA for optimal performance"""
193
+ try:
194
+ if not self.cuda_available or not TORCH_AVAILABLE:
195
+ return
196
+
197
+ # Enable TensorRT optimization if available
198
+ torch.backends.cudnn.benchmark = True
199
+ torch.backends.cudnn.deterministic = False
200
+
201
+ # Set memory management
202
+ torch.cuda.empty_cache()
203
+
204
+ # Enable mixed precision if supported
205
+ try:
206
+ # Check if Automatic Mixed Precision is available
207
+ from torch.cuda.amp import autocast
208
+ logger.info("βœ… Mixed precision available")
209
+ except ImportError:
210
+ logger.info("ℹ️ Mixed precision not available")
211
+
212
+ logger.info("βœ… CUDA optimization configured")
213
+
214
+ except Exception as e:
215
+ logger.warning(f"⚠️ CUDA configuration warning: {e}")
216
+
217
+ def _select_optimal_device(self) -> str:
218
+ """Select the optimal device for processing"""
219
+ try:
220
+ if not TORCH_AVAILABLE:
221
+ return 'cpu'
222
+
223
+ if not self.cuda_available or self.gpu_count == 0:
224
+ return 'cpu'
225
+
226
+ # Select GPU with most memory
227
+ best_device = 'cuda:0'
228
+ best_memory = 0
229
+
230
+ for device_name, props in self.device_info.items():
231
+ if device_name.startswith('cuda:'):
232
+ memory = props.get('memory_gb', 0)
233
+ if memory > best_memory:
234
+ best_memory = memory
235
+ best_device = device_name
236
+
237
+ # Minimum memory check
238
+ if best_memory < 2.0: # Require at least 2GB
239
+ logger.warning(f"⚠️ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU")
240
+ return 'cpu'
241
+
242
+ return best_device
243
+
244
+ except Exception as e:
245
+ logger.error(f"❌ Device selection failed: {e}")
246
+ return 'cpu'
247
+
248
+ def _log_system_info(self):
249
+ """Log comprehensive system information"""
250
+ try:
251
+ # System information
252
+ logger.info(f"πŸ“Š System: {platform.system()} {platform.release()}")
253
+ logger.info(f"πŸ’Ύ CPU: {platform.processor()}")
254
+ logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB")
255
+
256
+ # Python and package versions
257
+ logger.info(f"🐍 Python: {platform.python_version()}")
258
+
259
+ if TORCH_AVAILABLE:
260
+ logger.info(f"πŸ”₯ PyTorch: {torch.__version__}")
261
+ if torch.cuda.is_available():
262
+ logger.info(f"⚑ CUDA: {torch.version.cuda}")
263
+
264
+ if OPENCV_AVAILABLE:
265
+ logger.info(f"πŸ“· OpenCV: {cv2.__version__}")
266
+
267
+ except Exception as e:
268
+ logger.warning(f"⚠️ System info logging failed: {e}")
269
+
270
+ def get_device(self) -> str:
271
+ """Get the selected device"""
272
+ return self.device or 'cpu'
273
+
274
+ def get_device_info(self) -> Dict:
275
+ """Get device information"""
276
+ return {
277
+ 'device': self.device,
278
+ 'cuda_available': self.cuda_available,
279
+ 'gpu_count': self.gpu_count,
280
+ 'device_info': self.device_info,
281
+ 'threading_configured': self.threading_configured
282
+ }
283
+
284
+ def get_memory_usage(self) -> Dict:
285
+ """Get current memory usage"""
286
+ memory_info = {
287
+ 'system_memory_gb': psutil.virtual_memory().total / (1024**3),
288
+ 'system_memory_used_gb': psutil.virtual_memory().used / (1024**3),
289
+ 'system_memory_percent': psutil.virtual_memory().percent
290
+ }
291
+
292
+ if self.cuda_available and TORCH_AVAILABLE:
293
+ try:
294
+ for i in range(self.gpu_count):
295
+ allocated = torch.cuda.memory_allocated(i) / (1024**3)
296
+ reserved = torch.cuda.memory_reserved(i) / (1024**3)
297
+ total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0)
298
+
299
+ memory_info[f'gpu_{i}_allocated_gb'] = allocated
300
+ memory_info[f'gpu_{i}_reserved_gb'] = reserved
301
+ memory_info[f'gpu_{i}_total_gb'] = total
302
+ memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100
303
+
304
+ except Exception as e:
305
+ logger.warning(f"⚠️ GPU memory info failed: {e}")
306
+
307
+ return memory_info
308
+
309
+ def optimize_for_model(self, model_name: str) -> Dict:
310
+ """Optimize device settings for specific model"""
311
+ optimizations = {
312
+ 'device': self.device,
313
+ 'mixed_precision': False,
314
+ 'gradient_checkpointing': False,
315
+ 'batch_size': 1
316
+ }
317
+
318
+ try:
319
+ # Model-specific optimizations
320
+ if model_name.lower() == 'sam2':
321
+ if self.cuda_available and self._get_gpu_memory_gb() >= 8:
322
+ optimizations.update({
323
+ 'mixed_precision': True,
324
+ 'batch_size': 2
325
+ })
326
+
327
+ elif model_name.lower() == 'matanyone':
328
+ if self.cuda_available and self._get_gpu_memory_gb() >= 6:
329
+ optimizations.update({
330
+ 'mixed_precision': True
331
+ })
332
+
333
+ logger.info(f"βš™οΈ Optimizations for {model_name}: {optimizations}")
334
+
335
+ except Exception as e:
336
+ logger.warning(f"⚠️ Model optimization failed: {e}")
337
+
338
+ return optimizations
339
+
340
+ def _get_gpu_memory_gb(self) -> float:
341
+ """Get GPU memory in GB"""
342
+ if not self.cuda_available or not self.device_info:
343
+ return 0.0
344
+
345
+ device_key = self.device if self.device in self.device_info else 'cuda:0'
346
+ return self.device_info.get(device_key, {}).get('memory_gb', 0.0)
347
+
348
+ def cleanup(self):
349
+ """Cleanup device resources"""
350
+ try:
351
+ if self.cuda_available and TORCH_AVAILABLE:
352
+ torch.cuda.empty_cache()
353
+ logger.info("βœ… GPU cache cleared")
354
+ except Exception as e:
355
+ logger.warning(f"⚠️ Cleanup warning: {e}")
356
+
357
+ # Global device manager instance
358
+ _device_manager = None
359
+
360
+ def get_device_manager() -> DeviceManager:
361
+ """Get the global device manager instance"""
362
+ global _device_manager
363
+ if _device_manager is None:
364
+ _device_manager = DeviceManager()
365
+ _device_manager.initialize()
366
+ return _device_manager
367
+
368
+ def get_optimal_device() -> str:
369
+ """Get the optimal device for processing"""
370
+ return get_device_manager().get_device()
371
+
372
+ def fix_cuda_compatibility():
373
+ """Fix CUDA compatibility issues"""
374
+ try:
375
+ dm = get_device_manager()
376
+ logger.info("βœ… CUDA compatibility checked and fixed")
377
+ return dm.get_device_info()
378
+ except Exception as e:
379
+ logger.error(f"❌ CUDA compatibility fix failed: {e}")
380
+ return {'device': 'cpu', 'error': str(e)}
381
+
382
+ def setup_optimal_threading():
383
+ """Setup optimal threading configuration"""
384
+ try:
385
+ dm = get_device_manager()
386
+ if dm.threading_configured:
387
+ logger.info("βœ… Threading already configured optimally")
388
+ else:
389
+ dm._configure_threading()
390
+ return True
391
+ except Exception as e:
392
+ logger.error(f"❌ Threading setup failed: {e}")
393
+ return False
394
+
395
+ def get_system_diagnostics() -> Dict:
396
+ """Get comprehensive system diagnostics"""
397
+ dm = get_device_manager()
398
+ return {
399
+ 'device_info': dm.get_device_info(),
400
+ 'memory_usage': dm.get_memory_usage(),
401
+ 'system_ready': dm.device is not None
402
+ }
403
+
404
+ # Initialize on module import
405
+ try:
406
+ _device_manager = DeviceManager()
407
+ _device_manager.initialize()
408
+ logger.info("βœ… Device manager initialized on import")
409
+ except Exception as e:
410
+ logger.warning(f"⚠️ Device manager initialization warning: {e}")