Test4

Paused

App Files Files Community

euiiiia commited on Oct 15

Commit

e8cfb14

verified ·

1 Parent(s): 3018b1d

Update api/aduc_ltx_latent_patch.py

Browse files

Files changed (1) hide show

api/aduc_ltx_latent_patch.py +36 -44

api/aduc_ltx_latent_patch.py CHANGED Viewed

@@ -12,25 +12,20 @@ from typing import Optional, List, Tuple
 from pathlib import Path
 import os
 import sys
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-def add_deps_to_path(repo_path: Path):
-    """Adiciona o diretório do repositório ao sys.path para importações locais."""
-    resolved_path = str(repo_path.resolve())
-    if resolved_path not in sys.path:
-        sys.path.insert(0, resolved_path)
-        if LTXV_DEBUG:
-            print(f"[DEBUG] Adicionado ao sys.path: {resolved_path}")
-# --- Execução da configuração inicial ---
-if not LTX_VIDEO_REPO_DIR.exists():
-    _run_setup_script()
-add_deps_to_path(LTX_VIDEO_REPO_DIR)
 # Tenta importar as dependências necessárias do módulo original que será modificado.
-# Isso requer que o ambiente Python tenha o pacote `ltx_video` acessível em seu sys.path.
 try:
     from ltx_video.pipelines.pipeline_ltx_video import (
         LTXVideoPipeline,
@@ -42,17 +37,14 @@ try:
 except ImportError as e:
     print(f"FATAL ERROR: Could not import dependencies from 'ltx_video'. "
           f"Please ensure the environment is correctly set up. Error: {e}")
-    # Interrompe a execução se as dependências essenciais não puderem ser encontradas.
     raise
 print("[INFO] Patch module 'aduc_ltx_latent_patch' loaded successfully.")
 # ==============================================================================
-# 1. NOVA DEFINIÇÃO DA DATACLASS `ConditioningItem`
 # ==============================================================================
-from dataclasses import dataclass
 @dataclass
 class PatchedConditioningItem:
     """
@@ -103,7 +95,6 @@ def prepare_conditioning_with_latents(
     assert isinstance(self, LTXVideoPipeline), "This function must be called as a method of LTXVideoPipeline."
     assert isinstance(self.vae, CausalVideoAutoencoder), "VAE must be of type CausalVideoAutoencoder."
-    # Se não há itens de condicionamento, apenas patchifica os latentes e retorna.
     if not conditioning_items:
         init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
         init_pixel_coords = latent_to_pixel_coords(
@@ -112,7 +103,6 @@ def prepare_conditioning_with_latents(
         )
         return init_latents, init_pixel_coords, None, 0
-    # Inicializa tensores para acumular resultados
     init_conditioning_mask = torch.zeros(
         init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device
     )
@@ -124,36 +114,52 @@ def prepare_conditioning_with_latents(
     for item in conditioning_items:
         item_latents: Tensor
-        # --- LÓGICA CENTRAL DO PATCH ---
         if item.latents is not None:
-            # 1. Se latentes pré-calculados existem, use-os diretamente.
             item_latents = item.latents.to(dtype=init_latents.dtype, device=init_latents.device)
             if item_latents.ndim != 5:
                 raise ValueError(f"Latents must have 5 dimensions (b, c, f, h, w), but got {item_latents.ndim}")
         elif item.media_item is not None:
-            # 2. Caso contrário, volte para o fluxo original de codificação da VAE.
             resized_item = self._resize_conditioning_item(item, height, width)
             media_item = resized_item.media_item
             assert media_item.ndim == 5, f"media_item must have 5 dims, but got {media_item.ndim}"
             item_latents = vae_encode(
                 media_item.to(dtype=self.vae.dtype, device=self.vae.device),
                 self.vae,
                 vae_per_channel_normalize=vae_per_channel_normalize,
             ).to(dtype=init_latents.dtype)
         else:
-            # Este caso é prevenido pelo __post_init__ do dataclass, mas é bom ter uma checagem.
             raise ValueError("ConditioningItem is invalid: it has neither 'latents' nor 'media_item'.")
-        # --- FIM DA LÓGICA DO PATCH ---
         media_frame_number = item.media_frame_number
         strength = item.conditioning_strength
-        # O resto da lógica da função original é aplicado sobre `item_latents`.
         if media_frame_number == 0:
             item_latents, l_x, l_y = self._get_latent_spatial_position(
-                item_latents, item, height, width, strip_latent_border=True
             )
             _, _, f_l, h_l, w_l = item_latents.shape
             init_latents[:, :, :f_l, l_y : l_y + h_l, l_x : l_x + w_l] = torch.lerp(
                 init_latents[:, :, :f_l, l_y : l_y + h_l, l_x : l_x + w_l], item_latents, strength
@@ -186,7 +192,6 @@ def prepare_conditioning_with_latents(
                 extra_conditioning_pixel_coords.append(pixel_coords)
                 extra_conditioning_mask.append(conditioning_mask)
-    # Patchifica os latentes principais e a máscara de condicionamento
     init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
     init_pixel_coords = latent_to_pixel_coords(
         init_latent_coords, self.vae,
@@ -195,7 +200,6 @@ def prepare_conditioning_with_latents(
     init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
     init_conditioning_mask = init_conditioning_mask.squeeze(-1)
-    # Concatena os latentes extras (se houver)
     if extra_conditioning_latents:
         init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
         init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
@@ -208,7 +212,6 @@ def prepare_conditioning_with_latents(
     return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
 # ==============================================================================
 # 3. CLASSE DO MONKEY PATCHER
 # ==============================================================================
@@ -216,10 +219,6 @@ def prepare_conditioning_with_latents(
 class LTXLatentConditioningPatch:
     """
     Classe estática para aplicar e reverter o monkey patch na pipeline LTX-Video.
-    Esta classe substitui o método `prepare_conditioning` da `LTXVideoPipeline`
-    pela versão otimizada que suporta latentes pré-calculados, e implicitamente
-    requer o uso da `PatchedConditioningItem`.
     """
     _original_prepare_conditioning = None
     _is_patched = False
@@ -228,21 +227,14 @@ class LTXLatentConditioningPatch:
     def apply():
         """
         Aplica o monkey patch à classe `LTXVideoPipeline`.
-        Guarda o método original e o substitui pela nova implementação.
-        É idempotente; aplicar múltiplas vezes não causa efeito adicional.
         """
         if LTXLatentConditioningPatch._is_patched:
             print("[WARNING] LTXLatentConditioningPatch has already been applied. Ignoring.")
             return
         print("[INFO] Applying monkey patch for latent-based conditioning...")
-        # Guarda a implementação original para permitir a reversão.
         LTXLatentConditioningPatch._original_prepare_conditioning = LTXVideoPipeline.prepare_conditioning
-        # Substitui o método na classe LTXVideoPipeline.
-        # Todas as instâncias futuras e existentes da classe usarão este novo método.
         LTXVideoPipeline.prepare_conditioning = prepare_conditioning_with_latents
         LTXLatentConditioningPatch._is_patched = True

 from pathlib import Path
 import os
 import sys
+from dataclasses import dataclass, replace
+# --- CONFIGURAÇÃO DE PATH (Assume que LTXV_DEBUG e _run_setup_script existem no escopo que carrega este módulo) ---
+# DEPS_DIR = Path("/data")
+# LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+# def add_deps_to_path(repo_path: Path):
+#     """Adiciona o diretório do repositório ao sys.path para importações locais."""
+#     resolved_path = str(repo_path.resolve())
+#     if resolved_path not in sys.path:
+#         sys.path.insert(0, resolved_path)
+# add_deps_to_path(LTX_VIDEO_REPO_DIR)
 # Tenta importar as dependências necessárias do módulo original que será modificado.
 try:
     from ltx_video.pipelines.pipeline_ltx_video import (
         LTXVideoPipeline,
 except ImportError as e:
     print(f"FATAL ERROR: Could not import dependencies from 'ltx_video'. "
           f"Please ensure the environment is correctly set up. Error: {e}")
     raise
 print("[INFO] Patch module 'aduc_ltx_latent_patch' loaded successfully.")
 # ==============================================================================
+# 1. NOVA DEFINIÇÃO DA DATACLASS `PatchedConditioningItem`
 # ==============================================================================
 @dataclass
 class PatchedConditioningItem:
     """
     assert isinstance(self, LTXVideoPipeline), "This function must be called as a method of LTXVideoPipeline."
     assert isinstance(self.vae, CausalVideoAutoencoder), "VAE must be of type CausalVideoAutoencoder."
     if not conditioning_items:
         init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
         init_pixel_coords = latent_to_pixel_coords(
         )
         return init_latents, init_pixel_coords, None, 0
     init_conditioning_mask = torch.zeros(
         init_latents[:, 0, :, :, :].shape, dtype=torch.float32, device=init_latents.device
     )
     for item in conditioning_items:
         item_latents: Tensor
         if item.latents is not None:
             item_latents = item.latents.to(dtype=init_latents.dtype, device=init_latents.device)
             if item_latents.ndim != 5:
                 raise ValueError(f"Latents must have 5 dimensions (b, c, f, h, w), but got {item_latents.ndim}")
         elif item.media_item is not None:
             resized_item = self._resize_conditioning_item(item, height, width)
             media_item = resized_item.media_item
             assert media_item.ndim == 5, f"media_item must have 5 dims, but got {media_item.ndim}"
             item_latents = vae_encode(
                 media_item.to(dtype=self.vae.dtype, device=self.vae.device),
                 self.vae,
                 vae_per_channel_normalize=vae_per_channel_normalize,
             ).to(dtype=init_latents.dtype)
         else:
             raise ValueError("ConditioningItem is invalid: it has neither 'latents' nor 'media_item'.")
         media_frame_number = item.media_frame_number
         strength = item.conditioning_strength
         if media_frame_number == 0:
+            # --- INÍCIO DA MODIFICAÇÃO ---
+            # Se `item.media_item` for None (nosso caso de uso otimizado), a função original `_get_latent_spatial_position`
+            # quebraria. Para evitar isso, criamos um item temporário com um tensor de placeholder que contém
+            # as informações de dimensão corretas, inferidas a partir dos próprios latentes.
+            item_for_spatial_position = item
+            if item.media_item is None:
+                # Infere as dimensões em pixels a partir da forma dos latentes
+                latent_h, latent_w = item_latents.shape[-2:]
+                pixel_h = latent_h * self.vae_scale_factor
+                pixel_w = latent_w * self.vae_scale_factor
+                # Cria um tensor de placeholder com o shape esperado (o conteúdo não importa)
+                placeholder_media_item = torch.empty(
+                    (1, 1, 1, pixel_h, pixel_w), device=item_latents.device, dtype=item_latents.dtype
+                )
+                # Usa `dataclasses.replace` para criar uma cópia temporária do item com o placeholder
+                item_for_spatial_position = replace(item, media_item=placeholder_media_item)
+            # Chama a função original com um item que ela pode processar sem erro
             item_latents, l_x, l_y = self._get_latent_spatial_position(
+                item_latents, item_for_spatial_position, height, width, strip_latent_border=True
             )
+            # --- FIM DA MODIFICAÇÃO ---
             _, _, f_l, h_l, w_l = item_latents.shape
             init_latents[:, :, :f_l, l_y : l_y + h_l, l_x : l_x + w_l] = torch.lerp(
                 init_latents[:, :, :f_l, l_y : l_y + h_l, l_x : l_x + w_l], item_latents, strength
                 extra_conditioning_pixel_coords.append(pixel_coords)
                 extra_conditioning_mask.append(conditioning_mask)
     init_latents, init_latent_coords = self.patchifier.patchify(latents=init_latents)
     init_pixel_coords = latent_to_pixel_coords(
         init_latent_coords, self.vae,
     init_conditioning_mask, _ = self.patchifier.patchify(latents=init_conditioning_mask.unsqueeze(1))
     init_conditioning_mask = init_conditioning_mask.squeeze(-1)
     if extra_conditioning_latents:
         init_latents = torch.cat([*extra_conditioning_latents, init_latents], dim=1)
         init_pixel_coords = torch.cat([*extra_conditioning_pixel_coords, init_pixel_coords], dim=2)
     return init_latents, init_pixel_coords, init_conditioning_mask, extra_conditioning_num_latents
 # ==============================================================================
 # 3. CLASSE DO MONKEY PATCHER
 # ==============================================================================
 class LTXLatentConditioningPatch:
     """
     Classe estática para aplicar e reverter o monkey patch na pipeline LTX-Video.
     """
     _original_prepare_conditioning = None
     _is_patched = False
     def apply():
         """
         Aplica o monkey patch à classe `LTXVideoPipeline`.
         """
         if LTXLatentConditioningPatch._is_patched:
             print("[WARNING] LTXLatentConditioningPatch has already been applied. Ignoring.")
             return
         print("[INFO] Applying monkey patch for latent-based conditioning...")
         LTXLatentConditioningPatch._original_prepare_conditioning = LTXVideoPipeline.prepare_conditioning
         LTXVideoPipeline.prepare_conditioning = prepare_conditioning_with_latents
         LTXLatentConditioningPatch._is_patched = True