File size: 8,642 Bytes
587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e 587a0e1 98b590e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# ltx_manager_helpers.py (Com Lógica de Refinamento Especializada)
# Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
import torch
import gc
import os
import yaml
import logging
import huggingface_hub
import time
import threading
import json
from optimization import optimize_ltx_worker, can_optimize_fp8
from hardware_manager import hardware_manager
from inference import create_ltx_video_pipeline, calculate_padding
from ltx_video.pipelines.pipeline_ltx_video import LatentConditioningItem
logger = logging.getLogger(__name__)
class LtxWorker:
def __init__(self, device_id, ltx_config_file):
self.cpu_device = torch.device('cpu')
self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
logger.info(f"LTX Worker ({self.device}): Inicializando com config '{ltx_config_file}'...")
with open(ltx_config_file, "r") as file:
self.config = yaml.safe_load(file)
self.is_distilled = "distilled" in self.config.get("checkpoint_path", "")
models_dir = "downloaded_models_gradio"
logger.info(f"LTX Worker ({self.device}): Carregando modelo para a CPU...")
model_path = os.path.join(models_dir, self.config["checkpoint_path"])
if not os.path.exists(model_path):
model_path = huggingface_hub.hf_hub_download(
repo_id="Lightricks/LTX-Video", filename=self.config["checkpoint_path"],
local_dir=models_dir, local_dir_use_symlinks=False
)
self.pipeline = create_ltx_video_pipeline(
ckpt_path=model_path, precision=self.config["precision"],
text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
sampler=self.config["sampler"], device='cpu'
)
logger.info(f"LTX Worker ({self.device}): Modelo pronto na CPU. É um modelo destilado? {self.is_distilled}")
def to_gpu(self):
if self.device.type == 'cpu': return
logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
self.pipeline.to(self.device)
if self.device.type == 'cuda' and can_optimize_fp8():
logger.info(f"LTX Worker ({self.device}): GPU com suporte a FP8 detectada. Iniciando otimização...")
optimize_ltx_worker(self)
logger.info(f"LTX Worker ({self.device}): Otimização concluída.")
elif self.device.type == 'cuda':
logger.info(f"LTX Worker ({self.device}): Otimização FP8 não suportada ou desativada.")
def to_cpu(self):
if self.device.type == 'cpu': return
logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
self.pipeline.to('cpu')
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
def generate_video_fragment_internal(self, **kwargs):
return self.pipeline(**kwargs).images
class LtxPoolManager:
def __init__(self, device_ids, ltx_config_file):
logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
self.workers = [LtxWorker(dev_id, ltx_config_file) for dev_id in device_ids]
self.current_worker_index = 0
self.lock = threading.Lock()
if all(w.device.type == 'cuda' for w in self.workers):
logger.info("LTX POOL MANAGER: MODO HOT START ATIVADO. Pré-aquecendo todas as GPUs...")
for worker in self.workers:
worker.to_gpu()
logger.info("LTX POOL MANAGER: Todas as GPUs estão quentes e prontas.")
else:
logger.info("LTX POOL MANAGER: Operando em modo CPU ou misto. O pré-aquecimento de GPU foi ignorado.")
def _get_next_worker(self):
with self.lock:
worker = self.workers[self.current_worker_index]
self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
return worker
def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
worker_to_use = self._get_next_worker()
try:
height, width = kwargs['height'], kwargs['width']
padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
padding_vals = calculate_padding(height, width, padded_h, padded_w)
conditioning_items = [item.to(worker_to_use.device) for item in kwargs.get('conditioning_items_data', [])]
pipeline_params = {
"height": padded_h, "width": padded_w, "num_frames": kwargs['video_total_frames'],
"frame_rate": kwargs['video_fps'], "generator": torch.Generator(device=worker_to_use.device).manual_seed(int(time.time()) + kwargs['current_fragment_index']),
"conditioning_items": conditioning_items, "is_video": True, "vae_per_channel_normalize": True,
"prompt": kwargs['motion_prompt'], "negative_prompt": "blurry, distorted, static, bad quality",
"guidance_scale": kwargs['guidance_scale'], "stg_scale": kwargs['stg_scale'],
"rescaling_scale": kwargs['rescaling_scale'], "num_inference_steps": kwargs['num_inference_steps']
}
if worker_to_use.is_distilled:
pipeline_params["timesteps"] = worker_to_use.config.get("first_pass", {}).get("timesteps")
pipeline_params["num_inference_steps"] = len(pipeline_params["timesteps"]) if pipeline_params["timesteps"] else 20
result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
return result, padding_vals
except Exception as e:
logger.error(f"LTX POOL MANAGER: Erro durante a geração em {worker_to_use.device}: {e}", exc_info=True)
raise e
finally:
if worker_to_use and worker_to_use.device.type == 'cuda':
with torch.cuda.device(worker_to_use.device):
gc.collect(); torch.cuda.empty_cache()
def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
worker_to_use = self._get_next_worker()
try:
# --- [INÍCIO DA CORREÇÃO] ---
# Para refinamento, as dimensões são derivadas DIRETAMENTE do tensor latente.
# Não há padding. A resolução em pixels é passada, mas a forma latente é a fonte da verdade.
height, width, num_frames = kwargs['height'], kwargs['width'], kwargs['video_total_frames']
pipeline_params = {
"latents": latents_to_refine.to(worker_to_use.device, dtype=worker_to_use.pipeline.transformer.dtype),
"height": height, "width": width, "num_frames": num_frames, "frame_rate": kwargs['video_fps'],
"generator": torch.Generator(device=worker_to_use.device).manual_seed(int(time.time()) + kwargs['current_fragment_index']),
"is_video": True, "vae_per_channel_normalize": True,
"prompt": kwargs['motion_prompt'], "negative_prompt": "blurry, distorted, static, bad quality",
"guidance_scale": kwargs.get('guidance_scale', 1.0), # Força 1.0 para refinamento incondicional se não especificado
"num_inference_steps": int(kwargs.get('refine_steps', 10)),
"strength": kwargs.get('denoise_strength', 0.4),
"output_type": "latent"
}
# --- [FIM DA CORREÇÃO] ---
logger.info("LTX POOL MANAGER: Iniciando passe de refinamento (denoise)...")
result = worker_to_use.generate_video_fragment_internal(**pipeline_params)
return result, None # Nenhum padding é aplicado no refinamento
except Exception as e:
logger.error(f"LTX POOL MANAGER: Erro durante o refinamento em {worker_to_use.device}: {e}", exc_info=True)
raise e
finally:
if worker_to_use and worker_to_use.device.type == 'cuda':
with torch.cuda.device(worker_to_use.device):
gc.collect(); torch.cuda.empty_cache()
# --- Instanciação Singleton ---
logger.info("Lendo config.yaml para inicializar o LTX Pool Manager...")
with open("config.yaml", 'r') as f:
config = yaml.safe_load(f)
ltx_gpus_required = config['specialists']['ltx']['gpus_required']
ltx_device_ids = hardware_manager.allocate_gpus('LTX', ltx_gpus_required)
ltx_config_path = config['specialists']['ltx']['config_file']
ltx_manager_singleton = LtxPoolManager(device_ids=ltx_device_ids, ltx_config_file=ltx_config_path)
logger.info("Especialista de Vídeo (LTX) pronto.") |