File size: 10,905 Bytes
550dd1d
98b590e
550dd1d
 
 
 
98b590e
 
 
 
 
 
 
 
 
 
3526526
98b590e
 
 
 
40b10a0
98b590e
 
 
 
550dd1d
 
 
 
98b590e
 
 
 
 
 
 
 
 
550dd1d
98b590e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550dd1d
98b590e
 
 
550dd1d
98b590e
 
 
 
 
 
 
 
550dd1d
98b590e
 
 
 
 
 
 
550dd1d
98b590e
 
 
550dd1d
 
 
 
98b590e
 
 
 
 
550dd1d
98b590e
 
 
 
 
 
 
 
587a0e1
 
 
 
 
3526526
 
 
 
4553f61
3526526
 
 
4553f61
 
 
3526526
 
 
4553f61
 
 
 
 
 
 
 
 
3526526
 
 
 
 
 
 
 
4553f61
3526526
 
587a0e1
 
98b590e
4553f61
587a0e1
 
 
4553f61
587a0e1
3526526
 
4553f61
 
02b4ee7
 
 
 
 
98b590e
 
587a0e1
98b590e
 
 
 
587a0e1
98b590e
587a0e1
 
 
4553f61
 
 
 
 
 
 
 
3526526
 
 
98b590e
3526526
587a0e1
4553f61
40b10a0
02b4ee7
3526526
 
 
 
 
 
 
 
587a0e1
02b4ee7
587a0e1
 
 
 
 
98b590e
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# ltx_manager_helpers.py
# Copyright (C) 4 de Agosto de 2025  Carlos Rodrigues dos Santos
#
# Este programa é software livre: você pode redistribuí-lo e/ou modificá-lo
# sob os termos da Licença Pública Geral Affero GNU...
# AVISO DE PATENTE PENDENTE: Consulte NOTICE.md.

import torch
import gc
import os
import yaml
import logging
import huggingface_hub
import time
import threading
import json
from typing import Optional, List

from optimization import optimize_ltx_worker, can_optimize_fp8
from hardware_manager import hardware_manager
from inference import create_ltx_video_pipeline, calculate_padding
from ltx_video.pipelines.pipeline_ltx_video import LatentConditioningItem, LTXMultiScalePipeline

logger = logging.getLogger(__name__)

class LtxWorker:
    """
    Representa uma única instância da pipeline LTX-Video em um dispositivo específico.
    Gerencia o carregamento do modelo para a CPU e a movimentação de/para a GPU.
    """
    def __init__(self, device_id, ltx_config_file):
        self.cpu_device = torch.device('cpu')
        self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
        logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...")
        
        with open(ltx_config_file, "r") as file:
            self.config = yaml.safe_load(file)
        
        self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")

        models_dir = "downloaded_models_gradio"
        
        logger.info(f"LTX Worker ({self.device}): Carregando modelo para a CPU...")
        model_path = os.path.join(models_dir, self.config["checkpoint_path"])
        if not os.path.exists(model_path):
             model_path = huggingface_hub.hf_hub_download(
                repo_id="Lightricks/LTX-Video", filename=self.config["checkpoint_path"],
                local_dir=models_dir, local_dir_use_symlinks=False
            )
        
        self.pipeline = create_ltx_video_pipeline(
            ckpt_path=model_path, precision=self.config["precision"],
            text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
            sampler=self.config["sampler"], device='cpu'
        )
        logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo destilado? {self.is_distilled}")

    def to_gpu(self):
        """Move o pipeline para a GPU designada E OTIMIZA SE POSSÍVEL."""
        if self.device.type == 'cpu': return
        logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
        self.pipeline.to(self.device)
        
        if self.device.type == 'cuda' and can_optimize_fp8():
            logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Iniciando otimização...")
            optimize_ltx_worker(self)
            logger.info(f"LTX Worker ({self.device}): Otimização concluída.")
        elif self.device.type == 'cuda':
            logger.info(f"LTX Worker ({self.device}): Otimização FP8 não suportada ou desativada.")

    def to_cpu(self):
        """Move o pipeline de volta para a CPU e libera a memória da GPU."""
        if self.device.type == 'cpu': return
        logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
        self.pipeline.to('cpu')
        gc.collect()
        if torch.cuda.is_available(): torch.cuda.empty_cache()

    def generate_video_fragment_internal(self, **kwargs):
        """Invoca a pipeline de geração."""
        return self.pipeline(**kwargs).images

class LtxPoolManager:
    """
    Gerencia um pool de LtxWorkers para otimizar o uso de múltiplas GPUs.
    MODO "HOT START": Mantém todos os modelos carregados na VRAM para latência mínima.
    """
    def __init__(self, device_ids, ltx_config_file):
        logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
        self.workers = [LtxWorker(dev_id, ltx_config_file) for dev_id in device_ids]
        self.current_worker_index = 0
        self.lock = threading.Lock()

        if all(w.device.type == 'cuda' for w in self.workers):
            logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...")
            for worker in self.workers:
                worker.to_gpu()
            logger.info("LTX POOL MANAGER: Todas as GPUs estão quentes e prontas.")
        else:
            logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. O pré-aquecimento de GPU foi ignorado.")

    def _get_next_worker(self):
        with self.lock:
            worker = self.workers[self.current_worker_index]
            self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
            return worker
    
    def _prepare_pipeline_params(self, worker: LtxWorker, **kwargs) -> dict:
        """Prepara o dicionário de parâmetros para a pipeline, tratando casos especiais como modelos destilados."""
        pipeline_params = {
            "height": kwargs['height'], "width": kwargs['width'], "num_frames": kwargs['video_total_frames'],
            "frame_rate": kwargs.get('video_fps', 24),
            "generator": torch.Generator(device=worker.device).manual_seed(int(time.time()) + kwargs.get('current_fragment_index', 0)),
            "is_video": True, "vae_per_channel_normalize": True,
            "prompt": kwargs.get('motion_prompt', ""), "negative_prompt": kwargs.get('negative_prompt', "blurry, distorted, static, bad quality"),
            "guidance_scale": kwargs.get('guidance_scale', 1.0), "stg_scale": kwargs.get('stg_scale', 0.0),
            "rescaling_scale": kwargs.get('rescaling_scale', 0.15), "num_inference_steps": kwargs.get('num_inference_steps', 20),
            "output_type": "latent"
        }

        if 'latents' in kwargs:
            pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
        if 'strength' in kwargs:
            pipeline_params["strength"] = kwargs['strength']
        if 'conditioning_items_data' in kwargs:
            final_conditioning_items = []
            for item in kwargs['conditioning_items_data']:
                item.latent_tensor = item.latent_tensor.to(worker.device)
                final_conditioning_items.append(item)
            pipeline_params["conditioning_items"] = final_conditioning_items

        if worker.is_distilled:
            logger.info(f"Worker {worker.device} está usando um modelo destilado. Usando timesteps fixos.")
            fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
            pipeline_params["timesteps"] = fixed_timesteps
            if fixed_timesteps:
                pipeline_params["num_inference_steps"] = len(fixed_timesteps)
        
        return pipeline_params

    def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
        worker_to_use = self._get_next_worker()
        try:
            # [CORREÇÃO] A lógica de padding é específica para a geração do zero.
            height, width = kwargs['height'], kwargs['width']
            padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
            padding_vals = calculate_padding(height, width, padded_h, padded_w)
            kwargs['height'], kwargs['width'] = padded_h, padded_w

            pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
            
            logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}")

            if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
                result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
            else:
                result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
            
            return result, padding_vals
        except Exception as e:
            logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True)
            raise e
        finally:
            if worker_to_use and worker_to_use.device.type == 'cuda':
                with torch.cuda.device(worker_to_use.device):
                    gc.collect(); torch.cuda.empty_cache()

    def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
        worker_to_use = self._get_next_worker()
        try:
            # [CORREÇÃO] A lógica de dimensionamento para refinamento deriva da forma do latente.
            _b, _c, _f, latent_h, latent_w = latents_to_refine.shape
            vae_scale_factor = worker_to_use.pipeline.vae_scale_factor
            
            # Garante que as dimensões correspondam EXATAMENTE ao latente fornecido.
            kwargs['height'] = latent_h * vae_scale_factor
            kwargs['width'] = latent_w * vae_scale_factor
            kwargs['video_total_frames'] = kwargs.get('video_total_frames', _f * worker_to_use.pipeline.video_scale_factor)
            kwargs['latents'] = latents_to_refine
            kwargs['strength'] = kwargs.get('denoise_strength', 0.4)
            kwargs['num_inference_steps'] = int(kwargs.get('refine_steps', 10))
            
            pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)

            logger.info(f"Iniciando REFINAMENTO em {worker_to_use.device} com shape {kwargs['width']}x{kwargs['height']}")
            
            pipeline_to_call = worker_to_use.pipeline.video_pipeline if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline) else worker_to_use.pipeline
            result = pipeline_to_call(**pipeline_params).images
            return result, None
        
        except torch.cuda.OutOfMemoryError as e:
            logger.error(f"FALHA DE MEMÓRIA DURANTE O REFINAMENTO em {worker_to_use.device}: {e}")
            logger.warning("Limpando VRAM e retornando None para sinalizar a falha.")
            gc.collect(); torch.cuda.empty_cache()
            return None, None
        except Exception as e:
            logger.error(f"LTX POOL MANAGER: Erro inesperado durante o refinamento em {worker_to_use.device}: {e}", exc_info=True)
            raise e
        finally:
            if worker_to_use and worker_to_use.device.type == 'cuda':
                with torch.cuda.device(worker_to_use.device):
                    gc.collect(); torch.cuda.empty_cache()

# --- Instanciação Singleton ---
logger.info("Lendo config.yaml para inicializar o LTX Pool Manager...")
with open("config.yaml", 'r') as f:
    config = yaml.safe_load(f)
ltx_gpus_required = config['specialists']['ltx']['gpus_required']
ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
ltx_config_path = config['specialists']['ltx']['config_file']
ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file=ltx_config_path)
logger.info("Especialista de Vídeo (LTX) pronto.")