Test

Paused

App Files Files Community

eeuuia commited on Oct 12

Commit

7809765

verified ·

1 Parent(s): 891ecfe

Update api/ltx/ltx_aduc_pipeline.py

Browse files

Files changed (1) hide show

api/ltx/ltx_aduc_pipeline.py +62 -58

api/ltx/ltx_aduc_pipeline.py CHANGED Viewed

@@ -57,6 +57,8 @@ FRAMES_ALIGNMENT = 8
 repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
 if repo_path not in sys.path:
     sys.path.insert(0, repo_path)
 # ==============================================================================
 # --- CLASSE DE SERVIÇO (O ORQUESTRADOR) ---
@@ -130,11 +132,58 @@ class LtxAducPipeline:
                 strengths=[item[2] for item in initial_media_items],
                 target_resolution=(kwargs['height'], kwargs['width'])
             )
-        temp_latent_paths = []
-        overlap_condition_item: Optional[LatentConditioningItem] = None
-        current_conditions = initial_conditions
         try:
             for i, chunk_prompt in enumerate(prompt_list):
                 logging.info(f"Processing scene {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
@@ -143,12 +192,14 @@ class LtxAducPipeline:
                 current_frames = current_frames_base + (overlap_frames if i > 0 else 0)
                 current_frames = self._align(current_frames, alignment_rule='n*8+1')
-                kwargs.pop("prompt", None)
-                kwargs.pop("num_frames", None)
-                kwargs["prompt"] = chunk_prompt
-                kwargs["num_frames"] = current_frames
-                chunk_latents = self._generate_single_chunk_low(**kwargs)
                 if chunk_latents is None: raise RuntimeError(f"Failed to generate latents for scene {i+1}.")
                 if is_narrative and i < num_chunks - 1:
@@ -158,10 +209,10 @@ class LtxAducPipeline:
                         media_frame_number=0,
                         conditioning_strength=1.0
                     )
-                    kwargs.pop("conditioning_items", None)
-                    kwargs["conditioning_items"] = overlap_condition_item
                 else:
-                    kwargs.pop("conditioning_items", None)
                 if i > 0: chunk_latents = chunk_latents[:, :, overlap_frames:, :, :]
@@ -218,53 +269,6 @@ class LtxAducPipeline:
             # Usa o logger de debug para imprimir a mensagem completa
             logging.info("\n".join(log_str))
-    @log_function_io
-    def _generate_single_chunk_low(
-        self, **kwargs,
-    ) -> Optional[torch.Tensor]:
-        """[WORKER] Calls the pipeline to generate a single chunk of latents."""
-        height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
-        downscale_factor = self.config.get("downscale_factor", 0.6666666)
-        vae_scale_factor = self.pipeline.vae_scale_factor
-        downscaled_height = self._align(int(height_padded * downscale_factor), vae_scale_factor)
-        downscaled_width = self._align(int(width_padded * downscale_factor), vae_scale_factor)
-        call_kwargs = {
-            "cfg_star_rescale": "true",
-            "prompt": kwargs["prompt"],
-            "negative_prompt": kwargs['negative_prompt'],
-            "height": downscaled_height,
-            "width": downscaled_width,
-            "num_frames": kwargs["num_frames"],
-            "frame_rate": int(DEFAULT_FPS),
-            "generator": torch.Generator(device=self.main_device).manual_seed(kwargs['seed']),
-            "output_type": "latent",
-            "media_items": None,
-            "decode_timestep": self.config["decode_timestep"],
-            "decode_noise_scale": self.config["decode_noise_scale"],
-            "stochastic_sampling": self.config["stochastic_sampling"],
-            "image_cond_noise_scale": 0.05,
-            "is_video": True,
-            "vae_per_channel_normalize": True,
-            "mixed_precision": (self.config["precision"] == "mixed_precision"),
-            "offload_to_cpu": False,
-            "enhance_prompt": False,
-        }
-        call_kwargs.pop("num_inference_steps", None)
-        call_kwargs.pop("second_pass", None)
-        first_pass_config = self.config.get("first_pass", {}).copy()
-        call_kwargs.update(first_pass_config)
-        ltx_configs_override = kwargs.get("ltx_configs_override", {}).copy()
-        call_kwargs.update(ltx_configs_override)
-        call_kwargs['conditioning_items'] = kwargs["conditioning_items"]
-        with torch.autocast(device_type=self.main_device.type, dtype=self.runtime_autocast_dtype, enabled="cuda" in self.main_device.type):
-            latents_raw = self.pipeline(**call_kwargs).images
-        return latents_raw.to(self.main_device)
     @log_function_io
     def _finalize_generation(self, final_latents: torch.Tensor, base_filename: str, seed: int) -> Tuple[str, str]:
         """Delegates final decoding and encoding to specialist services."""

 repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
 if repo_path not in sys.path:
     sys.path.insert(0, repo_path)
+    from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 # ==============================================================================
 # --- CLASSE DE SERVIÇO (O ORQUESTRADOR) ---
                 strengths=[item[2] for item in initial_media_items],
                 target_resolution=(kwargs['height'], kwargs['width'])
             )
+        height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
+        downscale_factor = self.config.get("downscale_factor", 0.6666666)
+        vae_scale_factor = self.pipeline.vae_scale_factor
+        downscaled_height = self._align(int(height_padded * downscale_factor), vae_scale_factor)
+        downscaled_width = self._align(int(width_padded * downscale_factor), vae_scale_factor)
+        call_kwargs =  self.config.get("first_pass", {}).copy()
+        stg_mode_str = self.config.get("stg_mode", "attention_values")
+        if stg_mode_str.lower() in ["stg_av", "attention_values"]:
+            call_kwargs["skip_layer_strategy"] = SkipLayerStrategy.AttentionValues
+        elif stg_mode_str.lower() in ["stg_as", "attention_skip"]:
+            call_kwargs["skip_layer_strategy"] = SkipLayerStrategy.AttentionSkip
+        elif stg_mode_str.lower() in ["stg_r", "residual"]:
+            call_kwargs["skip_layer_strategy"] = SkipLayerStrategy.Residual
+        elif stg_mode_str.lower() in ["stg_t", "transformer_block"]:
+            call_kwargs["skip_layer_strategy"] = SkipLayerStrategy.TransformerBlock
+        call_kwargs = {
+            "skip_initial_inference_steps": 0,
+            "skip_final_inference_steps": 0,
+            "num_inference_steps": 20,
+            "negative_prompt": kwargs['negative_prompt'],
+            "height": downscaled_height,
+            "width": downscaled_width,
+            "guidance_scale": 4,
+            "stg_scale": self.config.get("stg_scale")
+            "rescaling_scale": self.config.get("rescaling_scale")
+            "skip_block_list": self.config.get("skip_block_list")
+            "frame_rate": int(DEFAULT_FPS),
+            "generator": torch.Generator(device=self.main_device).manual_seed(self._get_random_seed()),
+            "output_type": "latent",
+            "media_items": None,
+            "decode_timestep": self.config["decode_timestep"],
+            "decode_noise_scale": self.config["decode_noise_scale"],
+            "stochastic_sampling": self.config["stochastic_sampling"],
+            "image_cond_noise_scale": 0.15,
+            "is_video": True,
+            "vae_per_channel_normalize": True,
+            "mixed_precision": (self.config["precision"] == "mixed_precision"),
+            "offload_to_cpu": False,
+            "enhance_prompt": False,
+        }
+        ltx_configs_override = self.config.get("ltx_configs_override", {}).copy()
+        call_kwargs.update(ltx_configs_override)
+        if initial_conditions is not None:
+            call_kwargs["conditioning_items"] = initial_conditions
+        temp_latent_paths = []
         try:
             for i, chunk_prompt in enumerate(prompt_list):
                 logging.info(f"Processing scene {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
                 current_frames = current_frames_base + (overlap_frames if i > 0 else 0)
                 current_frames = self._align(current_frames, alignment_rule='n*8+1')
+                call_kwargs.pop("prompt", None)
+                call_kwargs.pop("num_frames", None)
+                call_kwargs["prompt"] = chunk_prompt
+                call_kwargs["num_frames"] = current_frames
+                with torch.autocast(device_type=self.main_device.type, dtype=self.runtime_autocast_dtype, enabled="cuda" in self.main_device.type):
+                    chunk_latents = self.pipeline(**call_kwargs).images
                 if chunk_latents is None: raise RuntimeError(f"Failed to generate latents for scene {i+1}.")
                 if is_narrative and i < num_chunks - 1:
                         media_frame_number=0,
                         conditioning_strength=1.0
                     )
+                    call_kwargs.pop("conditioning_items", None)
+                    call_kwargs["conditioning_items"] = overlap_condition_item
                 else:
+                    call_kwargsl.pop("conditioning_items", None)
                 if i > 0: chunk_latents = chunk_latents[:, :, overlap_frames:, :, :]
             # Usa o logger de debug para imprimir a mensagem completa
             logging.info("\n".join(log_str))
     @log_function_io
     def _finalize_generation(self, final_latents: torch.Tensor, base_filename: str, seed: int) -> Tuple[str, str]:
         """Delegates final decoding and encoding to specialist services."""