Test3

Paused

App Files Files Community

EuuIia commited on Oct 3

Commit

2e1e83b

verified ·

1 Parent(s): f0b5401

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +91 -36

api/ltx_server.py CHANGED Viewed

@@ -1,6 +1,3 @@
-# video_service.py
-# --- 1. IMPORTAÇÕES ---
 import torch
 import numpy as np
 import random
@@ -63,9 +60,9 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
         parts = [p.strip() for p in line.split(",")]
         if len(parts) >= 3:
             try:
-                pid = int(parts[0])
-                name = parts[1]
-                used_mb = int(parts[2])
                 user = "unknown"
                 try:
                     import psutil
@@ -349,6 +346,41 @@ class VideoService:
             return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
         return tensor.to(self.device)
     def generate(
         self,
         prompt,
@@ -370,6 +402,7 @@ class VideoService:
         guidance_scale=3.0,
         improve_texture=True,
         progress_callback=None,
     ):
         if self.device == "cuda":
             torch.cuda.empty_cache()
@@ -417,7 +450,7 @@ class VideoService:
             "num_frames": actual_num_frames,
             "frame_rate": int(FPS),
             "generator": generator,
-            "output_type": "pt",
             "conditioning_items": conditioning_items if conditioning_items else None,
             "media_items": None,
             "decode_timestep": self.config["decode_timestep"],
@@ -441,6 +474,7 @@ class VideoService:
                 padding=padding_values,
             ).to(self.device)
         result_tensor = None
         multi_scale_pipeline = None
@@ -452,7 +486,6 @@ class VideoService:
             first_pass_args["guidance_scale"] = float(guidance_scale)
             second_pass_args = self.config.get("second_pass", {}).copy()
             second_pass_args["guidance_scale"] = float(guidance_scale)
             multi_scale_call_kwargs = call_kwargs.copy()
             multi_scale_call_kwargs.update(
                 {
@@ -461,13 +494,18 @@ class VideoService:
                     "second_pass": second_pass_args,
                 }
             )
             ctx = contextlib.nullcontext()
             if self.device == "cuda":
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
             with ctx:
-                result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
-            log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
         else:
             single_pass_kwargs = call_kwargs.copy()
             first_pass_config = self.config.get("first_pass", {})
@@ -479,8 +517,7 @@ class VideoService:
                     "skip_block_list": first_pass_config.get("skip_block_list"),
                 }
             )
-            # Escolha de schedule única para garantir guidance_mapping definido e consistente
             schedule = first_pass_config.get("timesteps")
             if schedule is None:
                 schedule = first_pass_config.get("guidance_timesteps")
@@ -489,20 +526,18 @@ class VideoService:
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
             if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
                 single_pass_kwargs["timesteps"] = schedule
-                single_pass_kwargs["guidance_timesteps"] = schedule  # garante criação de guidance_mapping
             print("\n[INFO] Executando pipeline de etapa única...")
             ctx = contextlib.nullcontext()
             if self.device == "cuda":
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
             with ctx:
-                result_tensor = self.pipeline(**single_pass_kwargs).images
-        pad_left, pad_right, pad_top, pad_bottom = padding_values
-        slice_h_end = -pad_bottom if pad_bottom > 0 else None
-        slice_w_end = -pad_right if pad_right > 0 else None
-        result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
-        log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
         # Staging seguro em tmp e move para diretório persistente
         temp_dir = tempfile.mkdtemp(prefix="ltxv_")
@@ -513,20 +548,36 @@ class VideoService:
         final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
         try:
-            # Escrita quadro a quadro para evitar array 4D gigante em RAM
-            with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
-                T = result_tensor.shape[2]  # (B, C, T, H, W)
-                for i in range(T):
-                    frame_chw = result_tensor[0, :, i]              # (C,H,W) no device
-                    frame_hwc_u8 = (frame_chw.permute(1, 2, 0)      # (H,W,C)
-                                    .clamp(0, 1)
-                                    .mul(255)
-                                    .to(torch.uint8)
-                                    .cpu()
-                                    .numpy())
-                    writer.append_data(frame_hwc_u8)
-                    if progress_callback:
-                        progress_callback(i + 1, T)
             candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
             try:
@@ -539,6 +590,10 @@ class VideoService:
             self._log_gpu_memory("Fim da Geração")
             return final_output_path, used_seed
         finally:
             try:
                 del result_tensor
             except Exception:
@@ -565,4 +620,4 @@ class VideoService:
                 pass
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
-video_generation_service = VideoService()

 import torch
 import numpy as np
 import random
         parts = [p.strip() for p in line.split(",")]
         if len(parts) >= 3:
             try:
+                pid = int(parts[^20_0])
+                name = parts[^20_1]
+                used_mb = int(parts[^20_2])
                 user = "unknown"
                 try:
                     import psutil
             return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
         return tensor.to(self.device)
+    # Nova: decodificação de latentes fora da pipeline com VAE e escrita incremental
+    def _decode_latents_to_video(self, latents: torch.Tensor, output_video_path: str, frame_rate: int,
+                                 padding_values, progress_callback=None):
+        pad_left, pad_right, pad_top, pad_bottom = padding_values
+        with imageio.get_writer(output_video_path, fps=frame_rate, codec="libx264", quality=8) as writer:
+            T = latents.shape[^20_2]
+            for i in range(T):
+                latent_chw = latents[0, :, i].to(self.device)
+                with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext():
+                    pixel_bchw = None
+                    if hasattr(self.pipeline, "decode_latents"):
+                        pixel_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
+                    elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
+                        pixel_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
+                    else:
+                        raise RuntimeError("Pipeline não expõe decode_latents nem vae.decode para decodificar latentes.")
+                pixel_chw = pixel_bchw[^20_0]
+                if pixel_chw.min() < 0:
+                    pixel_chw = (pixel_chw.clamp(-1, 1) + 1.0) / 2.0
+                else:
+                    pixel_chw = pixel_chw.clamp(0, 1)
+                H = pixel_chw.shape[^20_1]
+                W = pixel_chw.shape[^20_2]
+                h_end = H - pad_bottom if pad_bottom > 0 else H
+                w_end = W - pad_right if pad_right > 0 else W
+                pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
+                frame_hwc_u8 = (pixel_chw.permute(1, 2, 0)
+                                .mul(255)
+                                .to(torch.uint8)
+                                .cpu()
+                                .numpy())
+                writer.append_data(frame_hwc_u8)
+                if progress_callback:
+                    progress_callback(i + 1, T)
     def generate(
         self,
         prompt,
         guidance_scale=3.0,
         improve_texture=True,
         progress_callback=None,
+        external_decode=True,  # NOVO: decodificar fora da pipeline
     ):
         if self.device == "cuda":
             torch.cuda.empty_cache()
             "num_frames": actual_num_frames,
             "frame_rate": int(FPS),
             "generator": generator,
+            "output_type": "latent" if external_decode else "pt",  # aqui alternamos o tipo de saída
             "conditioning_items": conditioning_items if conditioning_items else None,
             "media_items": None,
             "decode_timestep": self.config["decode_timestep"],
                 padding=padding_values,
             ).to(self.device)
+        latents = None
         result_tensor = None
         multi_scale_pipeline = None
             first_pass_args["guidance_scale"] = float(guidance_scale)
             second_pass_args = self.config.get("second_pass", {}).copy()
             second_pass_args["guidance_scale"] = float(guidance_scale)
             multi_scale_call_kwargs = call_kwargs.copy()
             multi_scale_call_kwargs.update(
                 {
                     "second_pass": second_pass_args,
                 }
             )
             ctx = contextlib.nullcontext()
             if self.device == "cuda":
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
             with ctx:
+                result = multi_scale_pipeline(**multi_scale_call_kwargs)
+            # Captura latentes ou imagens conforme o output_type
+            if external_decode:
+                latents = getattr(result, "latents", None) or getattr(result, "images", None) or result
+            else:
+                result_tensor = getattr(result, "images", None) or result
+            if not external_decode:
+                log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
         else:
             single_pass_kwargs = call_kwargs.copy()
             first_pass_config = self.config.get("first_pass", {})
                     "skip_block_list": first_pass_config.get("skip_block_list"),
                 }
             )
+            # Agenda única para guidance_mapping consistente
             schedule = first_pass_config.get("timesteps")
             if schedule is None:
                 schedule = first_pass_config.get("guidance_timesteps")
                 print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
             if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
                 single_pass_kwargs["timesteps"] = schedule
+                single_pass_kwargs["guidance_timesteps"] = schedule
             print("\n[INFO] Executando pipeline de etapa única...")
             ctx = contextlib.nullcontext()
             if self.device == "cuda":
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
             with ctx:
+                result = self.pipeline(**single_pass_kwargs)
+            if external_decode:
+                latents = getattr(result, "latents", None) or getattr(result, "images", None) or result
+            else:
+                result_tensor = getattr(result, "images", None) or result
         # Staging seguro em tmp e move para diretório persistente
         temp_dir = tempfile.mkdtemp(prefix="ltxv_")
         final_output_path = None
         output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
         try:
+            if external_decode:
+                # Decodifica latentes -> MP4, quadro a quadro
+                self._decode_latents_to_video(
+                    latents=latents,
+                    output_video_path=output_video_path,
+                    frame_rate=call_kwargs["frame_rate"],
+                    padding_values=padding_values,
+                    progress_callback=progress_callback,
+                )
+            else:
+                # Caminho antigo: tensor já em espaço de pixels -> escrever quadro a quadro
+                # Aplicar corte de padding antes de escrever
+                pad_left, pad_right, pad_top, pad_bottom = padding_values
+                slice_h_end = -pad_bottom if pad_bottom > 0 else None
+                slice_w_end = -pad_right if pad_right > 0 else None
+                result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
+                log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
+                with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
+                    T = result_tensor.shape[^20_2]
+                    for i in range(T):
+                        frame_chw = result_tensor[0, :, i]
+                        frame_hwc_u8 = (frame_chw.permute(1, 2, 0)
+                                        .clamp(0, 1)
+                                        .mul(255)
+                                        .to(torch.uint8)
+                                        .cpu()
+                                        .numpy())
+                        writer.append_data(frame_hwc_u8)
+                        if progress_callback:
+                            progress_callback(i + 1, T)
             candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
             try:
             self._log_gpu_memory("Fim da Geração")
             return final_output_path, used_seed
         finally:
+            try:
+                del latents
+            except Exception:
+                pass
             try:
                 del result_tensor
             except Exception:
                 pass
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
+video_generation_service = VideoService()