Aduc_sdr

Paused

App Files Files Community

euiia commited on Sep 3

Commit

4553f61

verified ·

1 Parent(s): 70b520b

Update ltx_manager_helpers.py

Browse files

Files changed (1) hide show

ltx_manager_helpers.py +27 -58

ltx_manager_helpers.py CHANGED Viewed

@@ -105,87 +105,52 @@ class LtxPoolManager:
             self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
             return worker
-    # --- [NOVO] Função centralizada para preparar parâmetros da pipeline ---
     def _prepare_pipeline_params(self, worker: LtxWorker, **kwargs) -> dict:
         """Prepara o dicionário de parâmetros para a pipeline, tratando casos especiais como modelos destilados."""
-        # Parâmetros obrigatórios
-        height = kwargs['height']
-        width = kwargs['width']
-        num_frames = kwargs['video_total_frames']
-        # Parâmetros com valores padrão
-        motion_prompt = kwargs.get('motion_prompt', "")
-        negative_prompt = kwargs.get('negative_prompt', "blurry, distorted, static, bad quality")
-        guidance_scale = kwargs.get('guidance_scale', 1.0)
-        stg_scale = kwargs.get('stg_scale', 0.0)
-        rescaling_scale = kwargs.get('rescaling_scale', 0.15)
-        num_inference_steps = kwargs.get('num_inference_steps', 20)
-        # Parâmetros opcionais (para geração ou refinamento)
-        latents_input = kwargs.get('latents')
-        strength = kwargs.get('strength')
-        conditioning_data = kwargs.get('conditioning_items_data')
-        # Prepara os itens de condicionamento se existirem
-        final_conditioning_items = []
-        if conditioning_data:
-            for item in conditioning_data:
-                item.latent_tensor = item.latent_tensor.to(worker.device)
-                final_conditioning_items.append(item)
-        # Constrói o dicionário base de parâmetros
         pipeline_params = {
-            "height": height, "width": width, "num_frames": num_frames,
             "frame_rate": kwargs.get('video_fps', 24),
             "generator": torch.Generator(device=worker.device).manual_seed(int(time.time()) + kwargs.get('current_fragment_index', 0)),
             "is_video": True, "vae_per_channel_normalize": True,
-            "prompt": motion_prompt, "negative_prompt": negative_prompt,
-            "guidance_scale": guidance_scale, "stg_scale": stg_scale,
-            "rescaling_scale": rescaling_scale, "num_inference_steps": num_inference_steps,
             "output_type": "latent"
         }
-        # Adiciona parâmetros opcionais se eles foram fornecidos
-        if latents_input is not None:
-            pipeline_params["latents"] = latents_input.to(worker.device, dtype=worker.pipeline.transformer.dtype)
-        if strength is not None:
-            pipeline_params["strength"] = strength
-        if final_conditioning_items:
             pipeline_params["conditioning_items"] = final_conditioning_items
-        # --- LÓGICA CENTRALIZADA E À PROVA DE ERRO ---
-        # Se o modelo for destilado, sobrescreve os passos com os timesteps fixos obrigatórios.
         if worker.is_distilled:
             logger.info(f"Worker {worker.device} está usando um modelo destilado. Usando timesteps fixos.")
             fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
             pipeline_params["timesteps"] = fixed_timesteps
             if fixed_timesteps:
                 pipeline_params["num_inference_steps"] = len(fixed_timesteps)
-        # Log dos parâmetros para depuração
-        log_params = {k: v for k, v in pipeline_params.items() if k not in ['generator', 'latents', 'conditioning_items']}
-        logger.info(f"Parâmetros preparados para a pipeline em {worker.device}:\n{json.dumps(log_params, indent=2)}")
         return pipeline_params
-    # --- [REATORADO] Função de Geração simplificada ---
     def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
         worker_to_use = self._get_next_worker()
         try:
-            # Padding
             height, width = kwargs['height'], kwargs['width']
             padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
             padding_vals = calculate_padding(height, width, padded_h, padded_w)
-            # Atualiza kwargs com as dimensões com padding
-            kwargs['height'] = padded_h
-            kwargs['width'] = padded_w
-            # Prepara os parâmetros usando a função centralizada
             pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
-            # Executa a geração
             if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
                 result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
             else:
@@ -200,22 +165,26 @@ class LtxPoolManager:
                 with torch.cuda.device(worker_to_use.device):
                     gc.collect(); torch.cuda.empty_cache()
-    # --- [REATORADO] Função de Refinamento simplificada ---
     def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
         worker_to_use = self._get_next_worker()
         try:
-            # Adiciona os tensores e a força de denoise aos kwargs para a função auxiliar
             kwargs['latents'] = latents_to_refine
             kwargs['strength'] = kwargs.get('denoise_strength', 0.4)
             kwargs['num_inference_steps'] = int(kwargs.get('refine_steps', 10))
-            # Prepara os parâmetros usando a mesma função centralizada
             pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
-            logger.info("LTX POOL MANAGER: Iniciando passe de refinamento (denoise)...")
             pipeline_to_call = worker_to_use.pipeline.video_pipeline if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline) else worker_to_use.pipeline
             result = pipeline_to_call(**pipeline_params).images
             return result, None

             self.current_worker_index = (self.current_worker_index + 1) % len(self.workers)
             return worker
     def _prepare_pipeline_params(self, worker: LtxWorker, **kwargs) -> dict:
         """Prepara o dicionário de parâmetros para a pipeline, tratando casos especiais como modelos destilados."""
         pipeline_params = {
+            "height": kwargs['height'], "width": kwargs['width'], "num_frames": kwargs['video_total_frames'],
             "frame_rate": kwargs.get('video_fps', 24),
             "generator": torch.Generator(device=worker.device).manual_seed(int(time.time()) + kwargs.get('current_fragment_index', 0)),
             "is_video": True, "vae_per_channel_normalize": True,
+            "prompt": kwargs.get('motion_prompt', ""), "negative_prompt": kwargs.get('negative_prompt', "blurry, distorted, static, bad quality"),
+            "guidance_scale": kwargs.get('guidance_scale', 1.0), "stg_scale": kwargs.get('stg_scale', 0.0),
+            "rescaling_scale": kwargs.get('rescaling_scale', 0.15), "num_inference_steps": kwargs.get('num_inference_steps', 20),
             "output_type": "latent"
         }
+        if 'latents' in kwargs:
+            pipeline_params["latents"] = kwargs['latents'].to(worker.device, dtype=worker.pipeline.transformer.dtype)
+        if 'strength' in kwargs:
+            pipeline_params["strength"] = kwargs['strength']
+        if 'conditioning_items_data' in kwargs:
+            final_conditioning_items = []
+            for item in kwargs['conditioning_items_data']:
+                item.latent_tensor = item.latent_tensor.to(worker.device)
+                final_conditioning_items.append(item)
             pipeline_params["conditioning_items"] = final_conditioning_items
         if worker.is_distilled:
             logger.info(f"Worker {worker.device} está usando um modelo destilado. Usando timesteps fixos.")
             fixed_timesteps = worker.config.get("first_pass", {}).get("timesteps")
             pipeline_params["timesteps"] = fixed_timesteps
             if fixed_timesteps:
                 pipeline_params["num_inference_steps"] = len(fixed_timesteps)
         return pipeline_params
     def generate_latent_fragment(self, **kwargs) -> (torch.Tensor, tuple):
         worker_to_use = self._get_next_worker()
         try:
+            # [CORREÇÃO] A lógica de padding é específica para a geração do zero.
             height, width = kwargs['height'], kwargs['width']
             padded_h, padded_w = ((height - 1) // 32 + 1) * 32, ((width - 1) // 32 + 1) * 32
             padding_vals = calculate_padding(height, width, padded_h, padded_w)
+            kwargs['height'], kwargs['width'] = padded_h, padded_w
             pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
+            logger.info(f"Iniciando GERAÇÃO em {worker_to_use.device} com shape {padded_w}x{padded_h}")
             if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline):
                 result = worker_to_use.pipeline.video_pipeline(**pipeline_params).images
             else:
                 with torch.cuda.device(worker_to_use.device):
                     gc.collect(); torch.cuda.empty_cache()
     def refine_latents(self, latents_to_refine: torch.Tensor, **kwargs) -> (torch.Tensor, tuple):
         worker_to_use = self._get_next_worker()
         try:
+            # [CORREÇÃO] A lógica de dimensionamento para refinamento deriva da forma do latente.
+            _b, _c, _f, latent_h, latent_w = latents_to_refine.shape
+            vae_scale_factor = worker_to_use.pipeline.vae_scale_factor
+            # Garante que as dimensões correspondam EXATAMENTE ao latente fornecido.
+            kwargs['height'] = latent_h * vae_scale_factor
+            kwargs['width'] = latent_w * vae_scale_factor
+            kwargs['video_total_frames'] = kwargs.get('video_total_frames', _f * worker_to_use.pipeline.video_scale_factor)
             kwargs['latents'] = latents_to_refine
             kwargs['strength'] = kwargs.get('denoise_strength', 0.4)
             kwargs['num_inference_steps'] = int(kwargs.get('refine_steps', 10))
             pipeline_params = self._prepare_pipeline_params(worker_to_use, **kwargs)
+            logger.info(f"Iniciando REFINAMENTO em {worker_to_use.device} com shape {kwargs['width']}x{kwargs['height']}")
             pipeline_to_call = worker_to_use.pipeline.video_pipeline if isinstance(worker_to_use.pipeline, LTXMultiScalePipeline) else worker_to_use.pipeline
             result = pipeline_to_call(**pipeline_params).images
             return result, None