File size: 5,426 Bytes
dfbfabf
60bd16c
 
8fd9bdd
 
 
99dd76a
60bd16c
90c54c2
8fd9bdd
 
 
60bd16c
 
 
8fd9bdd
 
60bd16c
8fd9bdd
99dd76a
 
8fd9bdd
99dd76a
8fd9bdd
 
 
60bd16c
 
 
 
 
 
8fd9bdd
99dd76a
 
8fd9bdd
60bd16c
8fd9bdd
60bd16c
99dd76a
60bd16c
8fd9bdd
99dd76a
 
60bd16c
99dd76a
60bd16c
 
 
 
 
 
99dd76a
 
 
 
 
60bd16c
 
 
 
 
 
 
 
99dd76a
60bd16c
 
 
8fd9bdd
60bd16c
 
 
 
 
 
99dd76a
 
60bd16c
99dd76a
 
 
8fd9bdd
99dd76a
60bd16c
99dd76a
8fd9bdd
99dd76a
8fd9bdd
60bd16c
 
8fd9bdd
99dd76a
60bd16c
 
99dd76a
8fd9bdd
 
60bd16c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fd9bdd
60bd16c
 
8fd9bdd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# FILE: managers/gpu_manager.py
# DESCRIPTION: A hardware-aware, service-agnostic GPU allocator for the ADUC-SDR suite.
# This module inspects available GPUs and partitions them according to a predefined

import os
import torch
import math
import logging
from typing import List

class GPUManager:
    """
    Manages and allocates available GPUs among different services.
    It operates agnostically, providing device information without knowing
    the specifics of the services that will use them.
    """
    def __init__(self):
        """Initializes the manager, detects GPUs, and runs the allocation logic."""
        self.total_gpus = torch.cuda.device_count()
        self.ltx_main_gpus = []
        self.ltx_vae_gpu = []
        self.seedvr_gpus = []
        self.vincie_gpus = []
        self._allocate_gpus()

    def _allocate_gpus(self):
        """
        Implements the GPU allocation strategy based on the total number of detected GPUs.
        """
        logging.info("="*60)
        logging.info("🤖 Initializing GPU Manager (LTX, SeedVR, VINCIE)")
        logging.info(f"   > Total GPUs detected: {self.total_gpus}")

        all_indices = list(range(self.total_gpus))

        if self.total_gpus == 0:
            logging.warning("   > No GPUs detected. All services will operate in CPU mode.")
        elif self.total_gpus == 1:
            logging.warning("   > 1 GPU detected. All services will share GPU 0. Memory swapping will be active.")
            self.ltx_main_gpus = [0]
            self.ltx_vae_gpu = [0] # Shares with the main LTX pipeline
            self.seedvr_gpus = [0]
            self.vincie_gpus = [0]
        elif self.total_gpus == 2:
            logging.info("   > 2 GPUs detected. LTX will use a dedicated VAE device.")
            self.ltx_main_gpus = [0]
            self.ltx_vae_gpu = [1] # VAE gets the second GPU
            self.seedvr_gpus = [0] # Shares with main LTX
            self.vincie_gpus = [0] # Shares with main LTX
        else: # 3 or more GPUs
            logging.info(f"   > {self.total_gpus} GPUs detected. Distributing allocation.")
            # LTX always gets the first two GPUs if available for optimal performance
            self.ltx_main_gpus = [0]
            self.ltx_vae_gpu = [1]
            
            remaining_gpus = all_indices[2:]
            
            # The rest are divided between SeedVR and VINCIE
            # VINCIE gets priority as it can scale well with more GPUs
            vincie_count = max(1, math.ceil(len(remaining_gpus) / 2))
            seedvr_count = len(remaining_gpus) - vincie_count
            
            self.vincie_gpus = remaining_gpus[:vincie_count]
            # If there are GPUs left, assign them to SeedVR
            if seedvr_count > 0:
                self.seedvr_gpus = remaining_gpus[vincie_count:]
            else:
                # If no GPUs are left for SeedVR, it shares with the main LTX GPU
                self.seedvr_gpus = [0]
        
        logging.info(f"   > Final Allocation:")
        logging.info(f"     - LTX (Transformer): GPUs {self.ltx_main_gpus}")
        logging.info(f"     - LTX (VAE):         GPU  {self.ltx_vae_gpu[0] if self.ltx_vae_gpu else 'N/A'}")
        logging.info(f"     - SeedVR:            GPUs {self.seedvr_gpus}")
        logging.info(f"     - VINCIE:            GPUs {self.vincie_gpus}")
        logging.info("="*60)

    def get_ltx_device(self) -> torch.device:
        """Returns the primary device for the LTX Transformer pipeline."""
        if not self.ltx_main_gpus:
            return torch.device("cpu")
        return torch.device(f"cuda:{self.ltx_main_gpus[0]}")

    def get_ltx_vae_device(self) -> torch.device:
        """Returns the dedicated device for the LTX VAE."""
        if not self.ltx_vae_gpu:
            return torch.device("cpu")
        return torch.device(f"cuda:{self.ltx_vae_gpu[0]}")

    def get_seedvr_devices(self) -> List[int]:
        """Returns the list of GPU indices for the SeedVR service."""
        return self.seedvr_gpus
    
    def get_vincie_devices(self) -> List[int]:
        """Returns the list of GPU indices for the VINCIE service."""
        return self.vincie_gpus

    def requires_memory_swap(self) -> bool:
        """
        Determines if memory swapping is necessary because multiple services
        are sharing the same primary GPU.
        The dedicated VAE GPU is not considered for swapping logic.
        """
        # Collect all GPUs used by the main, memory-intensive parts of the services
        all_main_allocations = self.ltx_main_gpus + self.seedvr_gpus + self.vincie_gpus
        
        # Count how many services are allocated to each unique GPU
        gpu_usage_count = {}
        for gpu_idx in all_main_allocations:
            gpu_usage_count[gpu_idx] = gpu_usage_count.get(gpu_idx, 0) + 1
            
        # Swapping is required if any GPU is used by more than one service
        for gpu_idx in gpu_usage_count:
            if gpu_usage_count[gpu_idx] > 1:
                logging.warning(f"Memory swapping is ACTIVE because GPU {gpu_idx} is shared by multiple services.")
                return True
        
        logging.info("Memory swapping is INACTIVE. Each service has dedicated primary GPUs.")
        return False

# --- Singleton Instantiation ---
# This global instance is created once and imported by all other modules.
gpu_manager = GPUManager()