Spaces:
Paused
Paused
File size: 5,426 Bytes
dfbfabf 60bd16c 8fd9bdd 99dd76a 60bd16c 90c54c2 8fd9bdd 60bd16c 8fd9bdd 60bd16c 8fd9bdd 99dd76a 8fd9bdd 99dd76a 8fd9bdd 60bd16c 8fd9bdd 99dd76a 8fd9bdd 60bd16c 8fd9bdd 60bd16c 99dd76a 60bd16c 8fd9bdd 99dd76a 60bd16c 99dd76a 60bd16c 99dd76a 60bd16c 99dd76a 60bd16c 8fd9bdd 60bd16c 99dd76a 60bd16c 99dd76a 8fd9bdd 99dd76a 60bd16c 99dd76a 8fd9bdd 99dd76a 8fd9bdd 60bd16c 8fd9bdd 99dd76a 60bd16c 99dd76a 8fd9bdd 60bd16c 8fd9bdd 60bd16c 8fd9bdd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# FILE: managers/gpu_manager.py
# DESCRIPTION: A hardware-aware, service-agnostic GPU allocator for the ADUC-SDR suite.
# This module inspects available GPUs and partitions them according to a predefined
import os
import torch
import math
import logging
from typing import List
class GPUManager:
"""
Manages and allocates available GPUs among different services.
It operates agnostically, providing device information without knowing
the specifics of the services that will use them.
"""
def __init__(self):
"""Initializes the manager, detects GPUs, and runs the allocation logic."""
self.total_gpus = torch.cuda.device_count()
self.ltx_main_gpus = []
self.ltx_vae_gpu = []
self.seedvr_gpus = []
self.vincie_gpus = []
self._allocate_gpus()
def _allocate_gpus(self):
"""
Implements the GPU allocation strategy based on the total number of detected GPUs.
"""
logging.info("="*60)
logging.info("🤖 Initializing GPU Manager (LTX, SeedVR, VINCIE)")
logging.info(f" > Total GPUs detected: {self.total_gpus}")
all_indices = list(range(self.total_gpus))
if self.total_gpus == 0:
logging.warning(" > No GPUs detected. All services will operate in CPU mode.")
elif self.total_gpus == 1:
logging.warning(" > 1 GPU detected. All services will share GPU 0. Memory swapping will be active.")
self.ltx_main_gpus = [0]
self.ltx_vae_gpu = [0] # Shares with the main LTX pipeline
self.seedvr_gpus = [0]
self.vincie_gpus = [0]
elif self.total_gpus == 2:
logging.info(" > 2 GPUs detected. LTX will use a dedicated VAE device.")
self.ltx_main_gpus = [0]
self.ltx_vae_gpu = [1] # VAE gets the second GPU
self.seedvr_gpus = [0] # Shares with main LTX
self.vincie_gpus = [0] # Shares with main LTX
else: # 3 or more GPUs
logging.info(f" > {self.total_gpus} GPUs detected. Distributing allocation.")
# LTX always gets the first two GPUs if available for optimal performance
self.ltx_main_gpus = [0]
self.ltx_vae_gpu = [1]
remaining_gpus = all_indices[2:]
# The rest are divided between SeedVR and VINCIE
# VINCIE gets priority as it can scale well with more GPUs
vincie_count = max(1, math.ceil(len(remaining_gpus) / 2))
seedvr_count = len(remaining_gpus) - vincie_count
self.vincie_gpus = remaining_gpus[:vincie_count]
# If there are GPUs left, assign them to SeedVR
if seedvr_count > 0:
self.seedvr_gpus = remaining_gpus[vincie_count:]
else:
# If no GPUs are left for SeedVR, it shares with the main LTX GPU
self.seedvr_gpus = [0]
logging.info(f" > Final Allocation:")
logging.info(f" - LTX (Transformer): GPUs {self.ltx_main_gpus}")
logging.info(f" - LTX (VAE): GPU {self.ltx_vae_gpu[0] if self.ltx_vae_gpu else 'N/A'}")
logging.info(f" - SeedVR: GPUs {self.seedvr_gpus}")
logging.info(f" - VINCIE: GPUs {self.vincie_gpus}")
logging.info("="*60)
def get_ltx_device(self) -> torch.device:
"""Returns the primary device for the LTX Transformer pipeline."""
if not self.ltx_main_gpus:
return torch.device("cpu")
return torch.device(f"cuda:{self.ltx_main_gpus[0]}")
def get_ltx_vae_device(self) -> torch.device:
"""Returns the dedicated device for the LTX VAE."""
if not self.ltx_vae_gpu:
return torch.device("cpu")
return torch.device(f"cuda:{self.ltx_vae_gpu[0]}")
def get_seedvr_devices(self) -> List[int]:
"""Returns the list of GPU indices for the SeedVR service."""
return self.seedvr_gpus
def get_vincie_devices(self) -> List[int]:
"""Returns the list of GPU indices for the VINCIE service."""
return self.vincie_gpus
def requires_memory_swap(self) -> bool:
"""
Determines if memory swapping is necessary because multiple services
are sharing the same primary GPU.
The dedicated VAE GPU is not considered for swapping logic.
"""
# Collect all GPUs used by the main, memory-intensive parts of the services
all_main_allocations = self.ltx_main_gpus + self.seedvr_gpus + self.vincie_gpus
# Count how many services are allocated to each unique GPU
gpu_usage_count = {}
for gpu_idx in all_main_allocations:
gpu_usage_count[gpu_idx] = gpu_usage_count.get(gpu_idx, 0) + 1
# Swapping is required if any GPU is used by more than one service
for gpu_idx in gpu_usage_count:
if gpu_usage_count[gpu_idx] > 1:
logging.warning(f"Memory swapping is ACTIVE because GPU {gpu_idx} is shared by multiple services.")
return True
logging.info("Memory swapping is INACTIVE. Each service has dedicated primary GPUs.")
return False
# --- Singleton Instantiation ---
# This global instance is created once and imported by all other modules.
gpu_manager = GPUManager() |