Test3

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

eb43c15

verified ·

1 Parent(s): ecb4c37

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +72 -6

api/ltx_server.py CHANGED Viewed

@@ -385,6 +385,72 @@ class VideoService:
         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
     def _decode_latents_to_video(self, latents: torch.Tensor, output_video_path: str, frame_rate: int,
                                  padding_values, progress_callback=None):
         print(f"[DEBUG] Decodificando latentes → vídeo: {output_video_path}")
@@ -620,14 +686,14 @@ class VideoService:
             output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
             final_output_path = None
-            if external_decode:
-                print("[DEBUG] Iniciando decodificação de latentes → MP4...")
-                self._decode_latents_to_video(
                     latents=latents,
-                    output_video_path=output_video_path,
-                    frame_rate=call_kwargs["frame_rate"],
                     padding_values=padding_values,
-                    progress_callback=progress_callback,
                 )
             else:
                 print("[DEBUG] Escrevendo vídeo a partir de pixels (sem latentes)...")

         print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
         return out
+    def _decode_one_latent_to_pixel(self, latent_chw: torch.Tensor) -> torch.Tensor:
+        """
+        Decodifica um latente (C,H,W) para pixel (C,H,W) no intervalo [0,1].
+        Usa pipeline.decode_latents se existir, senão pipeline.vae.decode.
+        """
+        if self.device == "cuda":
+            ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
+        else:
+            ctx = contextlib.nullcontext()
+        with ctx:
+            if hasattr(self.pipeline, "decode_latents"):
+                img_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
+            elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
+                img_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
+            else:
+                raise RuntimeError("Nenhum decoder encontrado (decode_latents/vae.decode).")
+        img_chw = img_bchw[0]
+        # Normaliza para [0,1] caso venha em [-1,1]
+        if img_chw.min() < 0:
+            img_chw = (img_chw.clamp(-1, 1) + 1.0) / 2.0
+        else:
+            img_chw = img_chw.clamp(0, 1)
+        return img_chw
+    def _pixels_to_uint8_np(self, pixel_chw: torch.Tensor, padding_values) -> np.ndarray:
+        """
+        Converte (C,H,W) float [0,1] em (H,W,C) uint8, aplicando crop do padding.
+        """
+        pad_left, pad_right, pad_top, pad_bottom = padding_values
+        H, W = pixel_chw.shape[1], pixel_chw.shape[2]
+        h_end = H - pad_bottom if pad_bottom > 0 else H
+        w_end = W - pad_right if pad_right > 0 else W
+        pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
+        frame_hwc_u8 = (pixel_chw.permute(1, 2, 0)
+                        .mul(255)
+                        .to(torch.uint8)
+                        .cpu()
+                        .numpy())
+        return frame_hwc_u8
+    def encode_latents_to_mp4(self, latents: torch.Tensor, output_path: str, fps: int, padding_values,
+                              progress_callback=None):
+        """
+        Pipeline final: latentes (B,C,T,H,W) -> decodifica cada quadro -> escreve MP4 incremental.
+        Segue o padrão do encoder no outro app (frame a frame sem array 4D gigante).
+        """
+        T = latents.shape[2]
+        print(f"[DEBUG] encode_latents_to_mp4: frames={T} out={output_path}")
+        with imageio.get_writer(output_path, fps=fps, codec="libx264", quality=8) as writer:
+            for i in range(T):
+                latent_chw = latents[0, :, i].to(self.device)
+                pixel_chw = self._decode_one_latent_to_pixel(latent_chw)
+                frame_hwc_u8 = self._pixels_to_uint8_np(pixel_chw, padding_values)
+                writer.append_data(frame_hwc_u8)
+                if progress_callback:
+                    progress_callback(i + 1, T)
+                if i % getattr(self, "frame_log_every", 8) == 0:
+                    print(f"[DEBUG] encode frame {i}/{T}")
     def _decode_latents_to_video(self, latents: torch.Tensor, output_video_path: str, frame_rate: int,
                                  padding_values, progress_callback=None):
         print(f"[DEBUG] Decodificando latentes → vídeo: {output_video_path}")
             output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
             final_output_path = None
+            if external_decode:*
+                print("[DEBUG] Codificando a partir dos latentes (VAE externo) → MP4...")
+                self.encode_latents_to_mp4(
                     latents=latents,
+                    output_path=output_video_path,
+                    fps=call_kwargs["frame_rate"],
                     padding_values=padding_values,
+                    progress_callback=progress_callback
                 )
             else:
                 print("[DEBUG] Escrevendo vídeo a partir de pixels (sem latentes)...")