Test3

Paused

App Files Files Community

EuuIia commited on Oct 5

Commit

6f22ec8

verified ·

1 Parent(s): 1f0a06a

Upload ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +59 -122

api/ltx_server.py CHANGED Viewed

@@ -396,6 +396,40 @@ class VideoService:
         return out
     def _dividir_latentes(self, latents_brutos):
         total = latents_brutos.shape[2]  # dimensão temporal (número de latentes)
@@ -544,131 +578,38 @@ class VideoService:
         multi_scale_pipeline = None
         try:
-            # Em ltx_server.py, substitua o bloco 'if improve_texture:' por este:
             if improve_texture:
                 if not self.latent_upsampler:
                     raise ValueError("Upscaler espacial não carregado.")
-                # --- INÍCIO DA IMPLEMENTAÇÃO LIMPA DOS 3 PASSOS ---
-                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
-                # --- PASSO 1: GERAÇÃO DE LATENTES EM BAIXA RESOLUÇÃO ---
-                print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
                 first_pass_args = self.config.get("first_pass", {}).copy()
-                first_pass_kwargs = call_kwargs.copy()
-                first_pass_kwargs.update({
-                    "guidance_scale": first_pass_args.get("guidance_scale", guidance_scale),
-                    "stg_scale": first_pass_args.get("stg_scale"),
-                    "rescaling_scale": first_pass_args.get("rescaling_scale"),
-                    "skip_block_list": first_pass_args.get("skip_block_list"),
-                    "guidance_timesteps": first_pass_args.get("guidance_timesteps"),
-                    "timesteps": first_pass_args.get("timesteps"),
-                    "num_inference_steps": first_pass_args.get("num_inference_steps", 20)
-                })
-                print(f"[DEBUG] Passo 1: Parâmetros do config carregados.")
-                downscale_factor = self.config.get("downscale_factor", 2)
-                original_height = first_pass_kwargs["height"]
-                original_width = first_pass_kwargs["width"]
-                divisor = 24
-                if downscale_factor < 1.0:
-                    target_height_p1 = original_height * downscale_factor
-                    target_width_p1 = original_width * downscale_factor
-                else:
-                    target_height_p1 = original_height // downscale_factor
-                    target_width_p1 = original_width // downscale_factor
-                height_p1 = round(target_height_p1 / divisor) * divisor
-                if height_p1 == 0: height_p1 = divisor
-                first_pass_kwargs["height"] = height_p1
-                width_p1 = round(target_width_p1 / divisor) * divisor
-                if width_p1 == 0: width_p1 = divisor
-                first_pass_kwargs["width"] = width_p1
-                print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {height_p1}x{width_p1}")
-                print(f"[DEBUG] first_pass_kwargs {first_pass_kwargs}")
-                with ctx:
-                    first_pass_result = self.pipeline(**first_pass_kwargs)
-                latents_low_res = first_pass_result.images
-                log_tensor_info(latents_low_res, "Latentes (Passo 1)")
-                del first_pass_result, first_pass_kwargs
-                gc.collect()
-                if self.device == "cuda": torch.cuda.empty_cache()
-                # --- PASSO INTERMEDIÁRIO: UPSCALE DOS LATENTES ---
-                print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
-                with ctx:
-                    latents_high_res = self.latent_upsampler(latents_low_res)
-                log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
-                del latents_low_res
-                gc.collect()
-                if self.device == "cuda": torch.cuda.empty_cache()
-                # --- PASSO 2: REFINAMENTO EM ALTA RESOLUÇÃO ---
-                print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
                 second_pass_args = self.config.get("second_pass", {}).copy()
-                second_pass_kwargs = call_kwargs.copy()
-                # Lógica de refinamento robusta usando 'strength'
-                strength = second_pass_args.get("strength", second_pass_args.get("denoising_strength"))
-                if strength is None and "skip_initial_inference_steps" in second_pass_args:
-                    total_steps = second_pass_args.get("num_inference_steps", 30)
-                    skip_steps = second_pass_args.get("skip_initial_inference_steps", 0)
-                    if total_steps > 0:
-                        strength = 1.0 - (skip_steps / total_steps)
-                elif strength is None and "timesteps" in second_pass_args:
-                    # Se temos timesteps explícitos, o strength é o primeiro valor da lista
-                    # (já que a lista começa "tarde", ex: [0.9, 0.7...])
-                    strength = second_pass_args["timesteps"][0]
-                elif strength is None:
-                    strength = 0.5 # Fallback seguro
-                second_pass_kwargs["strength"] = strength
-                print(f"[DEBUG] Passo 2: Usando 'strength'={strength:.3f} para o refinamento.")
-                # Removemos timesteps para que a pipeline os calcule a partir do strength
-                if "timesteps" in second_pass_kwargs: del second_pass_kwargs["timesteps"]
-                if "guidance_timesteps" in second_pass_kwargs: del second_pass_kwargs["guidance_timesteps"]
-                second_pass_kwargs.update({
-                    "guidance_scale": second_pass_args.get("guidance_scale", guidance_scale),
-                    "stg_scale": second_pass_args.get("stg_scale"),
-                    "rescaling_scale": second_pass_args.get("rescaling_scale"),
-                    "skip_block_list": second_pass_args.get("skip_block_list"),
-                    "num_inference_steps": second_pass_args.get("num_inference_steps", 20)
-                })
-                height_p2 = height_p1 * 2
-                width_p2 = width_p1 * 2
-                second_pass_kwargs["height"] = height_p2
-                second_pass_kwargs["width"] = width_p2
-                print(f"[DEBUG] Passo 2: Dimensões definidas para {height_p2}x{width_p2}")
-                second_pass_kwargs["latents"] = latents_high_res
-                print(f"[DEBUG] second_pass_kwargs {second_pass_kwargs}")
                 with ctx:
-                    second_pass_result = self.pipeline(**second_pass_kwargs)
-                latents = second_pass_result.images
-                log_tensor_info(latents, "Latentes Finais (Passo 2)")
-                # --- FIM DA IMPLEMENTAÇÃO LIMPA ---
             else:
                 single_pass_kwargs = call_kwargs.copy()
                 first_pass_config = self.config.get("first_pass", {})
@@ -691,10 +632,6 @@ class VideoService:
                 print("\n[INFO] Executando pipeline de etapa única...")
                 t_sp = time.perf_counter()
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
-                print(f"[DEBUG] single_pass_kwargs {single_pass_kwargs}")
                 with ctx:
                     result = self.pipeline(**single_pass_kwargs)
                 print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")

         return out
+    def _dividir_latentes_em_partes(self, latents_brutos, quantidade: int):
+        """
+        Divide um tensor de latentes em `quantidade` partes e retorna uma lista de clones.
+        Args:
+            latents_brutos: tensor [B, C, T, H, W]
+            quantidade: número de partes que queremos dividir
+        Returns:
+            List[Tensor]: lista de `quantidade` partes, cada uma cloneada
+        """
+        total = latents_brutos.shape[2]  # dimensão temporal
+        partes = []
+        if quantidade <= 1 or quantidade > total:
+            return [latents_brutos.clone()]
+        # calcular tamanho aproximado de cada parte
+        step = total // quantidade
+        overlap = 0  # sobreposição mínima de 1 frame entre partes
+        for i in range(quantidade):
+            start = i * step
+            end = start + step
+            if i == quantidade - 1:
+                end = total  # última parte vai até o final
+            else:
+                end += overlap  # sobreposição
+            parte = latents_brutos[:, :, start-1:end+1, :, :].clone()
+            partes.append(parte)
+        return partes
     def _dividir_latentes(self, latents_brutos):
         total = latents_brutos.shape[2]  # dimensão temporal (número de latentes)
         multi_scale_pipeline = None
         try:
             if improve_texture:
                 if not self.latent_upsampler:
                     raise ValueError("Upscaler espacial não carregado.")
+                print("[DEBUG] Multi-escala: construindo pipeline...")
+                multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
                 first_pass_args = self.config.get("first_pass", {}).copy()
+                first_pass_args["guidance_scale"] = float(guidance_scale)
                 second_pass_args = self.config.get("second_pass", {}).copy()
+                second_pass_args["guidance_scale"] = float(guidance_scale)
+                multi_scale_call_kwargs = call_kwargs.copy()
+                multi_scale_call_kwargs.update(
+                    {
+                        "downscale_factor": self.config["downscale_factor"],
+                        "first_pass": first_pass_args,
+                        "second_pass": second_pass_args,
+                    }
+                )
+                print("[DEBUG] Chamando multi_scale_pipeline...")
+                t_ms = time.perf_counter()
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
+                    result = multi_scale_pipeline(**multi_scale_call_kwargs)
+                print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
+                if hasattr(result, "latents"):
+                    latents = result.latents
+                elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
+                    latents = result.images
+                else:
+                    latents = result
+                print(f"[DEBUG] Latentes (multi-escala): shape={tuple(latents.shape)}")
             else:
                 single_pass_kwargs = call_kwargs.copy()
                 first_pass_config = self.config.get("first_pass", {})
                 print("\n[INFO] Executando pipeline de etapa única...")
                 t_sp = time.perf_counter()
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
                     result = self.pipeline(**single_pass_kwargs)
                 print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")