Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 2

Commit

a0d2dcb

verified ·

1 Parent(s): 795e89c

Update ltx_manager_helpers.py

Browse files

Files changed (1) hide show

ltx_manager_helpers.py +66 -16

ltx_manager_helpers.py CHANGED Viewed

@@ -25,11 +25,14 @@ from optimization import optimize_ltx_worker, can_optimize_fp8
 from hardware_manager import hardware_manager
 from inference import create_ltx_video_pipeline, calculate_padding
 from ltx_video.pipelines.pipeline_ltx_video import LatentConditioningItem
-from ltx_video.models.autoencoders.vae_encode import vae_decode
 logger = logging.getLogger(__name__)
 class LtxWorker:
     def __init__(self, device_id, ltx_config_file):
         self.cpu_device = torch.device('cpu')
         self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
@@ -67,11 +70,13 @@ class LtxWorker:
             logger.info(f"LTX Worker ({self.device}): Otimização FP8 não suportada ou desativada. Usando modelo padrão.")
     def to_gpu(self):
         if self.device.type == 'cpu': return
         logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
         self.pipeline.to(self.device)
     def to_cpu(self):
         if self.device.type == 'cpu': return
         logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
         self.pipeline.to('cpu')
@@ -79,9 +84,14 @@ class LtxWorker:
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     def generate_video_fragment_internal(self, **kwargs):
         return self.pipeline(**kwargs).images
 class LtxPoolManager:
     def __init__(self, device_ids, ltx_config_file):
         logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
         self.workers = [LtxWorker(dev_id, ltx_config_file) for dev_id in device_ids]
@@ -90,19 +100,20 @@ class LtxPoolManager:
         self.last_cleanup_thread = None
     def _cleanup_worker_thread(self, worker):
         logger.info(f"LTX CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...")
         worker.to_cpu()
     def _prepare_and_log_params(self, worker_to_use, **kwargs):
         target_device = worker_to_use.device
         height, width = kwargs['height'], kwargs['width']
         conditioning_data = kwargs.get('conditioning_items_data', [])
         final_conditioning_items = []
-        # --- LOG ADICIONADO: Detalhes dos tensores de condicionamento ---
         conditioning_log_details = []
         for i, item in enumerate(conditioning_data):
             if hasattr(item, 'latent_tensor'):
                 item.latent_tensor = item.latent_tensor.to(target_device)
                 final_conditioning_items.append(item)
@@ -121,23 +132,21 @@ class LtxPoolManager:
             "conditioning_items": final_conditioning_items,
             "is_video": True, "vae_per_channel_normalize": True,
             "decode_timestep": float(kwargs.get('decode_timestep', worker_to_use.config.get("decode_timestep", 0.05))),
-            "decode_noise_scale": float(kwargs.get('decode_noise_scale', worker_to_use.config.get("decode_noise_scale", 0.025))),
             "image_cond_noise_scale": float(kwargs.get('image_cond_noise_scale', 0.0)),
-            "stochastic_sampling": bool(kwargs.get('stochastic_sampling', worker_to_use.config.get("stochastic_sampling", False))),
             "prompt": kwargs['motion_prompt'],
             "negative_prompt": kwargs.get('negative_prompt', "blurry, distorted, static, bad quality, artifacts"),
-            "guidance_scale": float(kwargs.get('guidance_scale', 1.0)),
-            "stg_scale": float(kwargs.get('stg_scale', 0.0)),
-            "rescaling_scale": float(kwargs.get('rescaling_scale', 1.0)),
         }
         if worker_to_use.is_distilled:
             pipeline_params["timesteps"] = first_pass_config.get("timesteps")
-            pipeline_params["num_inference_steps"] = len(pipeline_params["timesteps"]) if "timesteps" in first_pass_config else 8
         else:
-            pipeline_params["num_inference_steps"] = int(kwargs.get('num_inference_steps', 7))
-        # --- LOG ADICIONADO: Exibição completa dos parâmetros da pipeline ---
         log_friendly_params = pipeline_params.copy()
         log_friendly_params.pop('generator', None)
         log_friendly_params.pop('conditioning_items', None)
@@ -148,15 +157,16 @@ class LtxPoolManager:
         logger.info("-" * 20 + " PARÂMETROS DA PIPELINE " + "-" * 20)
         logger.info(json.dumps(log_friendly_params, indent=2))
         logger.info("-" * 20 + " ITENS DE CONDICIONAMENTO " + "-" * 19)
-        logger.info("\n".join(conditioning_log_details))
         logger.info("="*60)
-        # --- FIM DO LOG ADICIONADO ---
         return pipeline_params, padding_vals
     def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
         worker_to_use = None
-        progress = kwargs.get('progress')
         try:
             with self.lock:
                 if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
@@ -173,8 +183,6 @@ class LtxPoolManager:
             pipeline_params, padding_vals = self._prepare_and_log_params(worker_to_use, **kwargs)
             pipeline_params['output_type'] = "latent"
-            if progress: progress(0.1, desc=f"[Especialista LTX em {worker_to_use.device}] Gerando latentes...")
             with torch.no_grad():
                 result_tensor = worker_to_use.generate_video_fragment_internal(**pipeline_params)
@@ -187,7 +195,49 @@ class LtxPoolManager:
                 logger.info(f"LTX POOL MANAGER: Executando limpeza final para {worker_to_use.device}...")
                 worker_to_use.to_cpu()
 logger.info("Lendo config.yaml para inicializar o LTX Pool Manager...")
 with open("config.yaml", 'r') as f:
     config = yaml.safe_load(f)

 from hardware_manager import hardware_manager
 from inference import create_ltx_video_pipeline, calculate_padding
 from ltx_video.pipelines.pipeline_ltx_video import LatentConditioningItem
 logger = logging.getLogger(__name__)
 class LtxWorker:
+    """
+    Representa uma única instância da pipeline LTX-Video em um dispositivo específico.
+    Gerencia o carregamento do modelo para a CPU e a movimentação de/para a GPU.
+    """
     def __init__(self, device_id, ltx_config_file):
         self.cpu_device = torch.device('cpu')
         self.device = torch.device(device_id if torch.cuda.is_available() else 'cpu')
             logger.info(f"LTX Worker ({self.device}): Otimização FP8 não suportada ou desativada. Usando modelo padrão.")
     def to_gpu(self):
+        """Move o pipeline para a GPU designada."""
         if self.device.type == 'cpu': return
         logger.info(f"LTX Worker: Movendo pipeline para a GPU {self.device}...")
         self.pipeline.to(self.device)
     def to_cpu(self):
+        """Move o pipeline de volta para a CPU e libera a memória da GPU."""
         if self.device.type == 'cpu': return
         logger.info(f"LTX Worker: Descarregando pipeline da GPU {self.device}...")
         self.pipeline.to('cpu')
         if torch.cuda.is_available(): torch.cuda.empty_cache()
     def generate_video_fragment_internal(self, **kwargs):
+        """Invoca a pipeline de geração."""
         return self.pipeline(**kwargs).images
 class LtxPoolManager:
+    """
+    Gerencia um pool de LtxWorkers para otimizar o uso de múltiplas GPUs,
+    alternando o worker ativo para permitir que o anterior descarregue da VRAM em segundo plano.
+    """
     def __init__(self, device_ids, ltx_config_file):
         logger.info(f"LTX POOL MANAGER: Criando workers para os dispositivos: {device_ids}")
         self.workers = [LtxWorker(dev_id, ltx_config_file) for dev_id in device_ids]
         self.last_cleanup_thread = None
     def _cleanup_worker_thread(self, worker):
+        """Thread para descarregar um worker da GPU em segundo plano."""
         logger.info(f"LTX CLEANUP THREAD: Iniciando limpeza de {worker.device} em background...")
         worker.to_cpu()
     def _prepare_and_log_params(self, worker_to_use, **kwargs):
+        """Prepara e registra os parâmetros para a chamada da pipeline LTX."""
         target_device = worker_to_use.device
         height, width = kwargs['height'], kwargs['width']
         conditioning_data = kwargs.get('conditioning_items_data', [])
         final_conditioning_items = []
         conditioning_log_details = []
         for i, item in enumerate(conditioning_data):
+            # Lida tanto com LatentConditioningItem quanto ConditioningItem (se usado no futuro)
             if hasattr(item, 'latent_tensor'):
                 item.latent_tensor = item.latent_tensor.to(target_device)
                 final_conditioning_items.append(item)
             "conditioning_items": final_conditioning_items,
             "is_video": True, "vae_per_channel_normalize": True,
             "decode_timestep": float(kwargs.get('decode_timestep', worker_to_use.config.get("decode_timestep", 0.05))),
             "image_cond_noise_scale": float(kwargs.get('image_cond_noise_scale', 0.0)),
             "prompt": kwargs['motion_prompt'],
             "negative_prompt": kwargs.get('negative_prompt', "blurry, distorted, static, bad quality, artifacts"),
+            "guidance_scale": float(kwargs.get('guidance_scale', 2.0)),
+            "stg_scale": float(kwargs.get('stg_scale', 0.025)),
+            "rescaling_scale": float(kwargs.get('rescaling_scale', 0.15)),
         }
         if worker_to_use.is_distilled:
             pipeline_params["timesteps"] = first_pass_config.get("timesteps")
+            pipeline_params["num_inference_steps"] = len(pipeline_params["timesteps"]) if "timesteps" in first_pass_config else 20
         else:
+            pipeline_params["num_inference_steps"] = int(kwargs.get('num_inference_steps', 20))
+        # Log detalhado dos parâmetros para depuração.
         log_friendly_params = pipeline_params.copy()
         log_friendly_params.pop('generator', None)
         log_friendly_params.pop('conditioning_items', None)
         logger.info("-" * 20 + " PARÂMETROS DA PIPELINE " + "-" * 20)
         logger.info(json.dumps(log_friendly_params, indent=2))
         logger.info("-" * 20 + " ITENS DE CONDICIONAMENTO " + "-" * 19)
+        logger.info("\n".join(conditioning_log_details) if conditioning_log_details else "  - Nenhum")
         logger.info("="*60)
         return pipeline_params, padding_vals
     def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
+        """
+        Orquestra a geração de um novo fragmento de vídeo a partir do zero (ruído).
+        """
         worker_to_use = None
         try:
             with self.lock:
                 if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
             pipeline_params, padding_vals = self._prepare_and_log_params(worker_to_use, **kwargs)
             pipeline_params['output_type'] = "latent"
             with torch.no_grad():
                 result_tensor = worker_to_use.generate_video_fragment_internal(**pipeline_params)
                 logger.info(f"LTX POOL MANAGER: Executando limpeza final para {worker_to_use.device}...")
                 worker_to_use.to_cpu()
+    def refine_latents(self, upscaled_latents: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
+        """
+        Orquestra um passe de difusão curto em latentes já existentes para refinar texturas.
+        Usado na etapa de pós-produção de upscale.
+        """
+        worker_to_use = None
+        try:
+            with self.lock:
+                if self.last_cleanup_thread and self.last_cleanup_thread.is_alive():
+                    self.last_cleanup_thread.join()
+                worker_to_use = self.workers[self.current_worker_index]
+                previous_worker_index = (self.current_worker_index - 1 + len(self.workers)) % len(self.workers)
+                worker_to_cleanup = self.workers[previous_worker_index]
+                cleanup_thread = threading.Thread(target=self._cleanup_worker_thread, args=(worker_to_cleanup,))
+                cleanup_thread.start()
+                self.last_cleanup_thread = cleanup_thread
+                worker_to_use.to_gpu()
+                self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
+            pipeline_params, padding_vals = self._prepare_and_log_params(worker_to_use, **kwargs)
+            # Parâmetros específicos para o passe de refinamento (denoise)
+            pipeline_params['latents'] = upscaled_latents.to(worker_to_use.device, dtype=worker_to_use.pipeline.transformer.dtype)
+            pipeline_params['strength'] = kwargs.get('denoise_strength', 0.4)
+            pipeline_params['num_inference_steps'] = int(kwargs.get('refine_steps', 10))
+            pipeline_params['output_type'] = "latent"
+            logger.info("LTX POOL MANAGER: Iniciando passe de refinamento (denoise) em latentes de alta resolução.")
+            with torch.no_grad():
+                refined_tensor = worker_to_use.generate_video_fragment_internal(**pipeline_params)
+            return refined_tensor, padding_vals
+        except Exception as e:
+            logger.error(f"LTX POOL MANAGER: Erro durante o refinamento de latentes: {e}", exc_info=True)
+            raise e
+        finally:
+            if worker_to_use:
+                logger.info(f"LTX POOL MANAGER: Executando limpeza final para {worker_to_use.device}...")
+                worker_to_use.to_cpu()
+# --- Instanciação Singleton ---
 logger.info("Lendo config.yaml para inicializar o LTX Pool Manager...")
 with open("config.yaml", 'r') as f:
     config = yaml.safe_load(f)