Test3

Paused

App Files Files Community

EuuIia commited on Oct 4

Commit

3a06c45

verified ·

1 Parent(s): 5d3df0c

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +39 -61

api/ltx_server.py CHANGED Viewed

@@ -503,8 +503,6 @@ class VideoService:
             torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
         self._log_gpu_memory("Início da Geração")
-        #ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
         if mode == "image-to-video" and not start_image_filepath:
             raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
         if mode == "video-to-video" and not input_video_filepath:
@@ -577,82 +575,65 @@ class VideoService:
             print(f"[DEBUG] media_items shape={tuple(media.shape)}")
         latents = None
-        multi_scale_pipeline = None
         try:
             if improve_texture:
                 if not self.latent_upsampler:
                     raise ValueError("Upscaler espacial não carregado.")
                 print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
-                single_pass_kwargs = call_kwargs.copy()
-                first_pass_config = self.config.get("first_pass", {})
-                single_pass_kwargs.update(
-                    {
-                        "guidance_scale": float(guidance_scale),
-                        "stg_scale": first_pass_config.get("stg_scale"),
-                        "rescaling_scale": first_pass_config.get("rescaling_scale"),
-                        "skip_block_list": first_pass_config.get("skip_block_list"),
-                    }
-                )
-                schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
-                if mode == "video-to-video":
-                    schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
-                if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
-                    single_pass_kwargs["timesteps"] = schedule
-                    single_pass_kwargs["guidance_timesteps"] = schedule
-                print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
-                # ==================== NOVA LÓGICA DE DIMENSÕES AQUI ====================
                 downscale_factor = self.config.get("downscale_factor", 2)
-                original_height = single_pass_kwargs["height"]
-                original_width = single_pass_kwargs["width"]
                 divisor = 24
-                # Calcula a altura para o primeiro passo, garantindo divisibilidade
                 target_height_p1 = original_height // downscale_factor
-                single_pass_kwargs["height"] = round(target_height_p1 / divisor) * divisor
-                # Calcula a largura para o primeiro passo, garantindo divisibilidade
                 target_width_p1 = original_width // downscale_factor
-                single_pass_kwargs["width"] = round(target_width_p1 / divisor) * divisor
-                # Medida de segurança para evitar dimensões zero
-                if single_pass_kwargs["height"] == 0: first_pass_kwargs["height"] = divisor
-                if single_pass_kwargs["width"] == 0: first_pass_kwargs["width"] = divisor
-                # =======================================================================
-                print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {single_pass_kwargs['height']}x{single_pass_kwargs['width']}")
-                print("\n[INFO] Executando pipeline promeira etapa...")
-                t_sp = time.perf_counter()
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
-                    latents = self.pipeline(**single_pass_kwargs).frames
-                print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
-                print(f"[DEBUG] Latentes (first_pass_kwargs): shape={tuple(latents.shape)}")
-                del single_pass_kwargs
                 gc.collect()
                 if self.device == "cuda": torch.cuda.empty_cache()
                 # 2. Upscale dos latentes
                 print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
                 with ctx:
-                    latents_high_res = self.latent_upsampler(
-                        latents=latents,
-                        output_height=original_height,
-                        output_width=original_width,
-                        output_type="latent"
-                    ).frames
                 log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
                 del latents_low_res
@@ -662,7 +643,10 @@ class VideoService:
                 # 3. Configurar e executar o segundo passo
                 print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
                 second_pass_args = self.config.get("second_pass", {}).copy()
                 second_pass_kwargs = call_kwargs.copy()
                 second_pass_kwargs.update({
                     "guidance_scale": float(guidance_scale),
@@ -670,18 +654,16 @@ class VideoService:
                     "rescaling_scale": second_pass_args.get("rescaling_scale"),
                     "skip_block_list": second_pass_args.get("skip_block_list"),
                 })
-                # O segundo passo geralmente usa uma fração dos timesteps totais (ex: 70%)
                 schedule_p2 = second_pass_args.get("timesteps") or second_pass_args.get("guidance_timesteps")
                 if schedule_p2:
                     second_pass_kwargs["timesteps"] = schedule_p2
                     second_pass_kwargs["guidance_timesteps"] = schedule_p2
-                # Adiciona os latentes do upscale como 'latents' iniciais para o segundo passo
                 second_pass_kwargs["latents"] = latents_high_res
                 t_p2 = time.perf_counter()
                 with ctx:
-                    # Executa a pipeline principal para o segundo passo
                     second_pass_result = self.pipeline(**second_pass_kwargs)
                 latents = second_pass_result.latents if hasattr(second_pass_result, "latents") else second_pass_result
@@ -793,11 +775,7 @@ class VideoService:
                 del latents
             except Exception:
                 pass
-            try:
-                del multi_scale_pipeline
-            except Exception:
-                pass
             gc.collect()
             try:
                 if self.device == "cuda":
@@ -815,4 +793,4 @@ class VideoService:
                 print(f"[DEBUG] finalize() no finally falhou: {e}")
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
-video_generation_service = VideoService()

             torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
         self._log_gpu_memory("Início da Geração")
         if mode == "image-to-video" and not start_image_filepath:
             raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
         if mode == "video-to-video" and not input_video_filepath:
             print(f"[DEBUG] media_items shape={tuple(media.shape)}")
         latents = None
         try:
             if improve_texture:
                 if not self.latent_upsampler:
                     raise ValueError("Upscaler espacial não carregado.")
+                # --- INÍCIO DA SEPARAÇÃO DOS PASSOS ---
                 print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
+                # 1. Configurar e executar o primeiro passo
+                first_pass_args = self.config.get("first_pass", {}).copy()
+                first_pass_kwargs = call_kwargs.copy()
+                first_pass_kwargs.update({
+                    "guidance_scale": float(guidance_scale),
+                    "stg_scale": first_pass_args.get("stg_scale"),
+                    "rescaling_scale": first_pass_args.get("rescaling_scale"),
+                    "skip_block_list": first_pass_args.get("skip_block_list"),
+                })
+                schedule = first_pass_args.get("timesteps") or first_pass_args.get("guidance_timesteps")
+                if schedule:
+                    first_pass_kwargs["timesteps"] = schedule
+                    first_pass_kwargs["guidance_timesteps"] = schedule
+                # Reduzir dimensões para o primeiro passo, garantindo divisibilidade por 24
                 downscale_factor = self.config.get("downscale_factor", 2)
+                original_height = first_pass_kwargs["height"]
+                original_width = first_pass_kwargs["width"]
                 divisor = 24
                 target_height_p1 = original_height // downscale_factor
+                first_pass_kwargs["height"] = round(target_height_p1 / divisor) * divisor
                 target_width_p1 = original_width // downscale_factor
+                first_pass_kwargs["width"] = round(target_width_p1 / divisor) * divisor
+                if first_pass_kwargs["height"] == 0: first_pass_kwargs["height"] = divisor
+                if first_pass_kwargs["width"] == 0: first_pass_kwargs["width"] = divisor
+                print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {first_pass_kwargs['height']}x{first_pass_kwargs['width']}")
+                t_p1 = time.perf_counter()
                 ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
                 with ctx:
+                    first_pass_result = self.pipeline(**first_pass_kwargs)
+                latents_low_res = first_pass_result.latents if hasattr(first_pass_result, "latents") else first_pass_result
+                print(f"[DEBUG] Passo 1 concluído em {time.perf_counter()-t_p1:.3f}s. Shape dos latentes de baixa resolução: {tuple(latents_low_res.shape)}")
+                log_tensor_info(latents_low_res, "Latentes (Passo 1)")
+                del first_pass_result
                 gc.collect()
                 if self.device == "cuda": torch.cuda.empty_cache()
                 # 2. Upscale dos latentes
                 print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
                 with ctx:
+                    # Chamada corrigida: posicional, sem argumentos de palavra-chave extras
+                    latents_high_res = self.latent_upsampler(latents_low_res)
                 log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
                 del latents_low_res
                 # 3. Configurar e executar o segundo passo
                 print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
                 second_pass_args = self.config.get("second_pass", {}).copy()
                 second_pass_kwargs = call_kwargs.copy()
+                second_pass_kwargs["height"] = original_height
+                second_pass_kwargs["width"] = original_width
                 second_pass_kwargs.update({
                     "guidance_scale": float(guidance_scale),
                     "rescaling_scale": second_pass_args.get("rescaling_scale"),
                     "skip_block_list": second_pass_args.get("skip_block_list"),
                 })
                 schedule_p2 = second_pass_args.get("timesteps") or second_pass_args.get("guidance_timesteps")
                 if schedule_p2:
                     second_pass_kwargs["timesteps"] = schedule_p2
                     second_pass_kwargs["guidance_timesteps"] = schedule_p2
                 second_pass_kwargs["latents"] = latents_high_res
                 t_p2 = time.perf_counter()
                 with ctx:
                     second_pass_result = self.pipeline(**second_pass_kwargs)
                 latents = second_pass_result.latents if hasattr(second_pass_result, "latents") else second_pass_result
                 del latents
             except Exception:
                 pass
             gc.collect()
             try:
                 if self.device == "cuda":
                 print(f"[DEBUG] finalize() no finally falhou: {e}")
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
+video_generation_service = VideoService()