Spaces:
Paused
Paused
| # FILE: managers/gpu_manager.py | |
| # DESCRIPTION: A hardware-aware, service-agnostic GPU allocator for the ADUC-SDR suite. | |
| # This module inspects available GPUs and partitions them according to a predefined | |
| import os | |
| import torch | |
| import math | |
| import logging | |
| from typing import List | |
| class GPUManager: | |
| """ | |
| Manages and allocates available GPUs among different services. | |
| It operates agnostically, providing device information without knowing | |
| the specifics of the services that will use them. | |
| """ | |
| def __init__(self): | |
| """Initializes the manager, detects GPUs, and runs the allocation logic.""" | |
| self.total_gpus = torch.cuda.device_count() | |
| self.ltx_main_gpus = [] | |
| self.ltx_vae_gpu = [] | |
| self.seedvr_gpus = [] | |
| self.vincie_gpus = [] | |
| self._allocate_gpus() | |
| def _allocate_gpus(self): | |
| """ | |
| Implements the GPU allocation strategy based on the total number of detected GPUs. | |
| """ | |
| logging.info("="*60) | |
| logging.info("🤖 Initializing GPU Manager (LTX, SeedVR, VINCIE)") | |
| logging.info(f" > Total GPUs detected: {self.total_gpus}") | |
| all_indices = list(range(self.total_gpus)) | |
| if self.total_gpus == 0: | |
| logging.warning(" > No GPUs detected. All services will operate in CPU mode.") | |
| elif self.total_gpus == 1: | |
| logging.warning(" > 1 GPU detected. All services will share GPU 0. Memory swapping will be active.") | |
| self.ltx_main_gpus = [0] | |
| self.ltx_vae_gpu = [0] # Shares with the main LTX pipeline | |
| self.seedvr_gpus = [0] | |
| self.vincie_gpus = [0] | |
| elif self.total_gpus == 2: | |
| logging.info(" > 2 GPUs detected. LTX will use a dedicated VAE device.") | |
| self.ltx_main_gpus = [0] | |
| self.ltx_vae_gpu = [1] # VAE gets the second GPU | |
| self.seedvr_gpus = [0] # Shares with main LTX | |
| self.vincie_gpus = [0] # Shares with main LTX | |
| else: # 3 or more GPUs | |
| logging.info(f" > {self.total_gpus} GPUs detected. Distributing allocation.") | |
| # LTX always gets the first two GPUs if available for optimal performance | |
| self.ltx_main_gpus = [0] | |
| self.ltx_vae_gpu = [1] | |
| remaining_gpus = all_indices[2:] | |
| # The rest are divided between SeedVR and VINCIE | |
| # VINCIE gets priority as it can scale well with more GPUs | |
| vincie_count = max(1, math.ceil(len(remaining_gpus) / 2)) | |
| seedvr_count = len(remaining_gpus) - vincie_count | |
| self.vincie_gpus = remaining_gpus[:vincie_count] | |
| # If there are GPUs left, assign them to SeedVR | |
| if seedvr_count > 0: | |
| self.seedvr_gpus = remaining_gpus[vincie_count:] | |
| else: | |
| # If no GPUs are left for SeedVR, it shares with the main LTX GPU | |
| self.seedvr_gpus = [0] | |
| logging.info(f" > Final Allocation:") | |
| logging.info(f" - LTX (Transformer): GPUs {self.ltx_main_gpus}") | |
| logging.info(f" - LTX (VAE): GPU {self.ltx_vae_gpu[0] if self.ltx_vae_gpu else 'N/A'}") | |
| logging.info(f" - SeedVR: GPUs {self.seedvr_gpus}") | |
| logging.info(f" - VINCIE: GPUs {self.vincie_gpus}") | |
| logging.info("="*60) | |
| def get_ltx_device(self) -> torch.device: | |
| """Returns the primary device for the LTX Transformer pipeline.""" | |
| if not self.ltx_main_gpus: | |
| return torch.device("cpu") | |
| return torch.device(f"cuda:{self.ltx_main_gpus[0]}") | |
| def get_ltx_vae_device(self) -> torch.device: | |
| """Returns the dedicated device for the LTX VAE.""" | |
| if not self.ltx_vae_gpu: | |
| return torch.device("cpu") | |
| return torch.device(f"cuda:{self.ltx_vae_gpu[0]}") | |
| def get_seedvr_devices(self) -> List[int]: | |
| """Returns the list of GPU indices for the SeedVR service.""" | |
| return self.seedvr_gpus | |
| def get_vincie_devices(self) -> List[int]: | |
| """Returns the list of GPU indices for the VINCIE service.""" | |
| return self.vincie_gpus | |
| def requires_memory_swap(self) -> bool: | |
| """ | |
| Determines if memory swapping is necessary because multiple services | |
| are sharing the same primary GPU. | |
| The dedicated VAE GPU is not considered for swapping logic. | |
| """ | |
| # Collect all GPUs used by the main, memory-intensive parts of the services | |
| all_main_allocations = self.ltx_main_gpus + self.seedvr_gpus + self.vincie_gpus | |
| # Count how many services are allocated to each unique GPU | |
| gpu_usage_count = {} | |
| for gpu_idx in all_main_allocations: | |
| gpu_usage_count[gpu_idx] = gpu_usage_count.get(gpu_idx, 0) + 1 | |
| # Swapping is required if any GPU is used by more than one service | |
| for gpu_idx in gpu_usage_count: | |
| if gpu_usage_count[gpu_idx] > 1: | |
| logging.warning(f"Memory swapping is ACTIVE because GPU {gpu_idx} is shared by multiple services.") | |
| return True | |
| logging.info("Memory swapping is INACTIVE. Each service has dedicated primary GPUs.") | |
| return False | |
| # --- Singleton Instantiation --- | |
| # This global instance is created once and imported by all other modules. | |
| gpu_manager = GPUManager() |