| """ |
| Device and Hardware Management Module |
| ==================================== |
| |
| Handles device detection, CUDA compatibility, memory management, |
| and threading configuration for BackgroundFX Pro. |
| |
| Fixes: |
| - CUDA multiprocessor_count compatibility error |
| - OpenMP threading issues (OMP_NUM_THREADS) |
| - GPU memory optimization |
| - Automatic device selection |
| |
| Author: BackgroundFX Pro Team |
| License: MIT |
| """ |
|
|
| import os |
| import logging |
| import warnings |
| from typing import Dict, Optional, List, Tuple |
| import platform |
| import psutil |
|
|
| |
| os.environ.setdefault('OMP_NUM_THREADS', '4') |
| os.environ.setdefault('MKL_NUM_THREADS', '4') |
| os.environ.setdefault('NUMEXPR_NUM_THREADS', '4') |
|
|
| try: |
| import torch |
| TORCH_AVAILABLE = True |
| except ImportError: |
| TORCH_AVAILABLE = False |
| warnings.warn("PyTorch not available - using CPU-only processing") |
|
|
| try: |
| import cv2 |
| OPENCV_AVAILABLE = True |
| except ImportError: |
| OPENCV_AVAILABLE = False |
| warnings.warn("OpenCV not available") |
|
|
| logger = logging.getLogger(__name__) |
|
|
| class DeviceManager: |
| """Manages device detection, selection and optimization""" |
| |
| def __init__(self): |
| self.device = None |
| self.device_info = {} |
| self.cuda_available = False |
| self.gpu_count = 0 |
| self.memory_info = {} |
| self.threading_configured = False |
| |
| def initialize(self) -> bool: |
| """Initialize device manager and configure optimal settings""" |
| try: |
| logger.info("🔧 Initializing Device Manager...") |
| |
| |
| self._configure_threading() |
| |
| |
| self._detect_devices() |
| |
| |
| if self.cuda_available: |
| self._configure_cuda() |
| |
| |
| self.device = self._select_optimal_device() |
| |
| |
| self._log_system_info() |
| |
| logger.info(f"✅ Device Manager initialized - Using: {self.device}") |
| return True |
| |
| except Exception as e: |
| logger.error(f"❌ Device Manager initialization failed: {e}") |
| self.device = 'cpu' |
| return False |
| |
| def _configure_threading(self): |
| """Configure threading for optimal performance""" |
| try: |
| |
| if 'OMP_NUM_THREADS' not in os.environ: |
| os.environ['OMP_NUM_THREADS'] = '4' |
| |
| |
| if 'MKL_NUM_THREADS' not in os.environ: |
| os.environ['MKL_NUM_THREADS'] = '4' |
| |
| |
| if 'NUMEXPR_NUM_THREADS' not in os.environ: |
| os.environ['NUMEXPR_NUM_THREADS'] = '4' |
| |
| |
| if TORCH_AVAILABLE: |
| torch.set_num_threads(4) |
| torch.set_num_interop_threads(4) |
| |
| |
| if OPENCV_AVAILABLE: |
| cv2.setNumThreads(4) |
| |
| self.threading_configured = True |
| logger.info(f"✅ Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}") |
| |
| except Exception as e: |
| logger.warning(f"⚠️ Threading configuration warning: {e}") |
| |
| def _detect_devices(self): |
| """Detect available computing devices""" |
| try: |
| if not TORCH_AVAILABLE: |
| self.cuda_available = False |
| self.gpu_count = 0 |
| return |
| |
| |
| self.cuda_available = torch.cuda.is_available() |
| self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0 |
| |
| if self.cuda_available: |
| logger.info(f"✅ CUDA available: {self.gpu_count} GPU(s)") |
| |
| |
| for i in range(self.gpu_count): |
| try: |
| props = self._get_cuda_properties_safe(i) |
| self.device_info[f'cuda:{i}'] = props |
| logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)") |
| except Exception as e: |
| logger.warning(f" GPU {i}: Properties unavailable ({e})") |
| else: |
| logger.info("ℹ️ CUDA not available - using CPU") |
| |
| except Exception as e: |
| logger.error(f"❌ Device detection failed: {e}") |
| self.cuda_available = False |
| self.gpu_count = 0 |
| |
| def _get_cuda_properties_safe(self, device_id: int) -> Dict: |
| """Safely get CUDA device properties with compatibility handling""" |
| try: |
| if not TORCH_AVAILABLE or not torch.cuda.is_available(): |
| return {} |
| |
| props = torch.cuda.get_device_properties(device_id) |
| |
| |
| if hasattr(props, 'multi_processor_count'): |
| sm_count = props.multi_processor_count |
| elif hasattr(props, 'multiprocessor_count'): |
| sm_count = props.multiprocessor_count |
| else: |
| |
| try: |
| major, minor = torch.cuda.get_device_capability(device_id) |
| |
| sm_count = major * 8 if major >= 6 else major * 4 |
| except: |
| sm_count = 'Unknown' |
| |
| device_props = { |
| 'name': props.name, |
| 'memory_gb': props.total_memory / (1024**3), |
| 'memory_bytes': props.total_memory, |
| 'multiprocessor_count': sm_count, |
| 'major': props.major, |
| 'minor': props.minor, |
| 'compute_capability': f"{props.major}.{props.minor}" |
| } |
| |
| return device_props |
| |
| except Exception as e: |
| logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}") |
| return { |
| 'name': 'Unknown GPU', |
| 'memory_gb': 0.0, |
| 'memory_bytes': 0, |
| 'multiprocessor_count': 'Unknown', |
| 'error': str(e) |
| } |
| |
| def _configure_cuda(self): |
| """Configure CUDA for optimal performance""" |
| try: |
| if not self.cuda_available or not TORCH_AVAILABLE: |
| return |
| |
| |
| torch.backends.cudnn.benchmark = True |
| torch.backends.cudnn.deterministic = False |
| |
| |
| torch.cuda.empty_cache() |
| |
| |
| try: |
| |
| from torch.cuda.amp import autocast |
| logger.info("✅ Mixed precision available") |
| except ImportError: |
| logger.info("ℹ️ Mixed precision not available") |
| |
| logger.info("✅ CUDA optimization configured") |
| |
| except Exception as e: |
| logger.warning(f"⚠️ CUDA configuration warning: {e}") |
| |
| def _select_optimal_device(self) -> str: |
| """Select the optimal device for processing""" |
| try: |
| if not TORCH_AVAILABLE: |
| return 'cpu' |
| |
| if not self.cuda_available or self.gpu_count == 0: |
| return 'cpu' |
| |
| |
| best_device = 'cuda:0' |
| best_memory = 0 |
| |
| for device_name, props in self.device_info.items(): |
| if device_name.startswith('cuda:'): |
| memory = props.get('memory_gb', 0) |
| if memory > best_memory: |
| best_memory = memory |
| best_device = device_name |
| |
| |
| if best_memory < 2.0: |
| logger.warning(f"⚠️ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU") |
| return 'cpu' |
| |
| return best_device |
| |
| except Exception as e: |
| logger.error(f"❌ Device selection failed: {e}") |
| return 'cpu' |
| |
| def _log_system_info(self): |
| """Log comprehensive system information""" |
| try: |
| |
| logger.info(f"📊 System: {platform.system()} {platform.release()}") |
| logger.info(f"💾 CPU: {platform.processor()}") |
| logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB") |
| |
| |
| logger.info(f"🐍 Python: {platform.python_version()}") |
| |
| if TORCH_AVAILABLE: |
| logger.info(f"🔥 PyTorch: {torch.__version__}") |
| if torch.cuda.is_available(): |
| logger.info(f"⚡ CUDA: {torch.version.cuda}") |
| |
| if OPENCV_AVAILABLE: |
| logger.info(f"📷 OpenCV: {cv2.__version__}") |
| |
| except Exception as e: |
| logger.warning(f"⚠️ System info logging failed: {e}") |
| |
| def get_device(self) -> str: |
| """Get the selected device""" |
| return self.device or 'cpu' |
| |
| def get_device_info(self) -> Dict: |
| """Get device information""" |
| return { |
| 'device': self.device, |
| 'cuda_available': self.cuda_available, |
| 'gpu_count': self.gpu_count, |
| 'device_info': self.device_info, |
| 'threading_configured': self.threading_configured |
| } |
| |
| def get_memory_usage(self) -> Dict: |
| """Get current memory usage""" |
| memory_info = { |
| 'system_memory_gb': psutil.virtual_memory().total / (1024**3), |
| 'system_memory_used_gb': psutil.virtual_memory().used / (1024**3), |
| 'system_memory_percent': psutil.virtual_memory().percent |
| } |
| |
| if self.cuda_available and TORCH_AVAILABLE: |
| try: |
| for i in range(self.gpu_count): |
| allocated = torch.cuda.memory_allocated(i) / (1024**3) |
| reserved = torch.cuda.memory_reserved(i) / (1024**3) |
| total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0) |
| |
| memory_info[f'gpu_{i}_allocated_gb'] = allocated |
| memory_info[f'gpu_{i}_reserved_gb'] = reserved |
| memory_info[f'gpu_{i}_total_gb'] = total |
| memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100 |
| |
| except Exception as e: |
| logger.warning(f"⚠️ GPU memory info failed: {e}") |
| |
| return memory_info |
| |
| def optimize_for_model(self, model_name: str) -> Dict: |
| """Optimize device settings for specific model""" |
| optimizations = { |
| 'device': self.device, |
| 'mixed_precision': False, |
| 'gradient_checkpointing': False, |
| 'batch_size': 1 |
| } |
| |
| try: |
| |
| if model_name.lower() == 'sam2': |
| if self.cuda_available and self._get_gpu_memory_gb() >= 8: |
| optimizations.update({ |
| 'mixed_precision': True, |
| 'batch_size': 2 |
| }) |
| |
| elif model_name.lower() == 'matanyone': |
| if self.cuda_available and self._get_gpu_memory_gb() >= 6: |
| optimizations.update({ |
| 'mixed_precision': True |
| }) |
| |
| logger.info(f"⚙️ Optimizations for {model_name}: {optimizations}") |
| |
| except Exception as e: |
| logger.warning(f"⚠️ Model optimization failed: {e}") |
| |
| return optimizations |
| |
| def _get_gpu_memory_gb(self) -> float: |
| """Get GPU memory in GB""" |
| if not self.cuda_available or not self.device_info: |
| return 0.0 |
| |
| device_key = self.device if self.device in self.device_info else 'cuda:0' |
| return self.device_info.get(device_key, {}).get('memory_gb', 0.0) |
| |
| def cleanup(self): |
| """Cleanup device resources""" |
| try: |
| if self.cuda_available and TORCH_AVAILABLE: |
| torch.cuda.empty_cache() |
| logger.info("✅ GPU cache cleared") |
| except Exception as e: |
| logger.warning(f"⚠️ Cleanup warning: {e}") |
|
|
| |
| _device_manager = None |
|
|
| def get_device_manager() -> DeviceManager: |
| """Get the global device manager instance""" |
| global _device_manager |
| if _device_manager is None: |
| _device_manager = DeviceManager() |
| _device_manager.initialize() |
| return _device_manager |
|
|
| def get_optimal_device() -> str: |
| """Get the optimal device for processing""" |
| return get_device_manager().get_device() |
|
|
| def fix_cuda_compatibility(): |
| """Fix CUDA compatibility issues""" |
| try: |
| dm = get_device_manager() |
| logger.info("✅ CUDA compatibility checked and fixed") |
| return dm.get_device_info() |
| except Exception as e: |
| logger.error(f"❌ CUDA compatibility fix failed: {e}") |
| return {'device': 'cpu', 'error': str(e)} |
|
|
| def setup_optimal_threading(): |
| """Setup optimal threading configuration""" |
| try: |
| dm = get_device_manager() |
| if dm.threading_configured: |
| logger.info("✅ Threading already configured optimally") |
| else: |
| dm._configure_threading() |
| return True |
| except Exception as e: |
| logger.error(f"❌ Threading setup failed: {e}") |
| return False |
|
|
| def get_system_diagnostics() -> Dict: |
| """Get comprehensive system diagnostics""" |
| dm = get_device_manager() |
| return { |
| 'device_info': dm.get_device_info(), |
| 'memory_usage': dm.get_memory_usage(), |
| 'system_ready': dm.device is not None |
| } |
|
|
| |
| try: |
| _device_manager = DeviceManager() |
| _device_manager.initialize() |
| logger.info("✅ Device manager initialized on import") |
| except Exception as e: |
| logger.warning(f"⚠️ Device manager initialization warning: {e}") |