Test3

Paused

App Files Files Community

Eueuiaa commited on Oct 8

Commit

35be4e2

verified ·

1 Parent(s): ab2fc5d

Update api/ltx_server.py

Browse files

Files changed (1) hide show

api/ltx_server.py +108 -122

api/ltx_server.py CHANGED Viewed

@@ -705,11 +705,15 @@ class VideoService:
         }
         latents = None
         latents_list[]
         try:
-            ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
-            with ctx:
-                if improve_texture:
                     if not self.latent_upsampler:
                         raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
@@ -718,19 +722,16 @@ class VideoService:
                     t_pass1 = time.perf_counter()
                     first_pass_config = self.config.get("first_pass", {}).copy()
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
                     vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
-                    # Replica a fórmula da LTXMultiScalePipeline
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     x_height = int(height_padded * downscale_factor)
                     downscaled_height = x_height - (x_height % vae_scale_factor)
                     print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
-                    # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
                     first_pass_kwargs = call_kwargs.copy()
                     first_pass_kwargs.update({
                         "output_type": "latent",
                         "width": downscaled_width,
@@ -740,130 +741,129 @@ class VideoService:
                     })
                     print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
-                    base_latents = self.pipeline(**first_pass_kwargs).images
-                    log_tensor_info(base_latents, "Latentes Base (First Pass)")
                     print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
-                    # --- ETAPA 2: UPSCALE DOS LATENTES ---
                     print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
                     t_upscale = time.perf_counter()
-                    upsampled_latents = self._upsample_latents_internal(base_latents)
-                    upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=base_latents)
-                    log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
                     print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
-                    del base_latents; gc.collect(); torch.cuda.empty_cache()
-                    par = 0
-                    latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
-                    torch.cuda.empty_cache()
-                    try:
-                         torch.cuda.ipc_collect()
-                    except Exception:
-                         pass
-                    latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
-                    temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
-                    results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
                     for latents in latents_parts_up:
                         # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
                         print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
                         second_pass_config = self.config.get("second_pass", {}).copy()
-                        # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA PARA SECOND PASS> ---
-                        # Usa as dimensões da primeira passagem dobradas, como na pipeline original
                         second_pass_width = downscaled_width * 2
                         second_pass_height = downscaled_height * 2
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
-                        # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
                         t_pass2 = time.perf_counter()
                         second_pass_kwargs = call_kwargs.copy()
                         second_pass_kwargs.update({
                            "output_type": "latent",
                            "width": second_pass_width,
                            "height": second_pass_height,
-                           "latents": upsampled_latents, # O tensor upscaled
                            "guidance_scale": float(guidance_scale),
                            **second_pass_config
                         })
                         print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
                         final_latents = self.pipeline(**second_pass_kwargs).images
                         log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
                         print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
-                        latents_list.append(final_latents)
-                else: # Geração de etapa única
                     print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
                     t_single = time.perf_counter()
-                    single_pass_kwargs = call_kwargs.copy()
-                    single_pass_kwargs.update(self.config.get("first_pass", {}))
-                    single_pass_kwargs["guidance_scale"] = float(guidance_scale)
-                    single_pass_kwargs["output_type"] = "latent"
-                    latents = self.pipeline(**single_pass_kwargs).images
-                    log_tensor_info(latents, "Latentes Finais (Etapa Única)")
                     print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
-                    latents_list.append(latents)
-            # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
-            print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
-            #latents_cpu = latents.detach().to("cpu", non_blocking=True)
-            #torch.cuda.empty_cache()
-            #try:
-            #    torch.cuda.ipc_collect()
-            #except Exception:
-            #    pass
-            latents_parts[]
-            for latents in latents_list:
-                latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
-                temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
-                results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
-            partes_mp4 = []
-            par = 0
-            for latents in latents_parts:
-                print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
-                par = par + 1
-                output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
-                final_output_path = None
-                print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
-                # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
-                pixel_tensor = vae_manager_singleton.decode(
-                    latents.to(self.device, non_blocking=True),
-                    decode_timestep=float(self.config.get("decode_timestep", 0.05))
-                )
-                log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
-                print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
-                video_encode_tool_singleton.save_video_from_tensor(
-                    pixel_tensor,
-                    output_video_path,
-                    fps=call_kwargs["frame_rate"],
-                    progress_callback=progress_callback
-                )
-                candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
-                try:
-                    shutil.move(output_video_path, candidate)
-                    final_output_path = candidate
-                    print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
-                    partes_mp4.append(final_output_path)
-                except Exception as e:
-                    final_output_path = output_video_path
-                    print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
             total_partes = len(partes_mp4)
             if (total_partes>1):
@@ -873,6 +873,10 @@ class VideoService:
             else:
                 final_vid = partes_mp4[0]
             self._log_gpu_memory("Fim da Geração")
             return final_vid, used_seed
@@ -882,31 +886,13 @@ class VideoService:
             print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
             print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
             raise
         finally:
-            try:
-                del latents
-            except Exception:
-                pass
-            try:
-                del multi_scale_pipeline
-            except Exception:
-                pass
             gc.collect()
-            try:
-                if self.device == "cuda":
-                    torch.cuda.empty_cache()
-                    try:
-                        torch.cuda.ipc_collect()
-                    except Exception:
-                        pass
-            except Exception as e:
-                print(f"[DEBUG] Limpeza GPU no finally falhou: {e}")
-            try:
-                self.finalize(keep_paths=[])
-            except Exception as e:
-                print(f"[DEBUG] finalize() no finally falhou: {e}")
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
 video_generation_service = VideoService()

         }
         latents = None
         latents_list[]
+        temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
+        results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
         try:
+            if improve_texture:
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
+                with ctx:
                     if not self.latent_upsampler:
                         raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
                     t_pass1 = time.perf_counter()
                     first_pass_config = self.config.get("first_pass", {}).copy()
+                    first_pass_config.pop("num_inference_steps", None)
                     downscale_factor = self.config.get("downscale_factor", 0.6666666)
                     vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
                     x_width = int(width_padded * downscale_factor)
                     downscaled_width = x_width - (x_width % vae_scale_factor)
                     x_height = int(height_padded * downscale_factor)
                     downscaled_height = x_height - (x_height % vae_scale_factor)
                     print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
                     first_pass_kwargs = call_kwargs.copy()
                     first_pass_kwargs.update({
                         "output_type": "latent",
                         "width": downscaled_width,
                     })
                     print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
+                    latents = self.pipeline(**first_pass_kwargs).images
+                    log_tensor_info(latents, "Latentes Base (First Pass)")
                     print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
+                    del pipeline
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
+                with ctx:
                     print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
                     t_upscale = time.perf_counter()
+                    upsampled_latents = self._upsample_latents_internal(latents)
+                    upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents)
                     print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
+                    latents_cpu = upsampled_latents.detach().to("cpu", non_blocking=True)
+                    del upsampled_latents;
+                    del latents; gc.collect(); torch.cuda.empty_cache()
+                    del spatial_upscaler_path
+                    #latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
+                    latents_parts_up[latents_cpu]
+                    #del latents_cpu_up
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
+                with ctx:
                     for latents in latents_parts_up:
+                        latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu_up)
                         # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
                         print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
                         second_pass_config = self.config.get("second_pass", {}).copy()
+                        second_pass_config.pop("num_inference_steps", None)
                         second_pass_width = downscaled_width * 2
                         second_pass_height = downscaled_height * 2
                         print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
                         t_pass2 = time.perf_counter()
                         second_pass_kwargs = call_kwargs.copy()
                         second_pass_kwargs.update({
                            "output_type": "latent",
                            "width": second_pass_width,
                            "height": second_pass_height,
+                           "latents": latents,
                            "guidance_scale": float(guidance_scale),
                            **second_pass_config
                         })
                         print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
                         final_latents = self.pipeline(**second_pass_kwargs).images
                         log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
                         print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
+                        latents_cpu = final_latents.detach().to("cpu", non_blocking=True)
+                        latents_list.append(latents_cpu)
+                        del final_latents; gc.collect(); torch.cuda.empty_cache()
+                        del pipeline
+            else:
+                ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
+                with ctx:
                     print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
                     t_single = time.perf_counter()
+                    single_pass_call_kwargs = call_kwargs.copy()
+                    first_pass_config_from_yaml = self.config.get("first_pass", {})
+                    single_pass_call_kwargs["timesteps"] = first_pass_config_from_yaml.get("timesteps")
+                    single_pass_call_kwargs["guidance_scale"] = float(guidance_scale)
+                    single_pass_call_kwargs["stg_scale"] = first_pass_config_from_yaml.get("stg_scale")
+                    single_pass_call_kwargs["rescaling_scale"] = first_pass_config_from_yaml.get("rescaling_scale")
+                    single_pass_call_kwargs["skip_block_list"] = first_pass_config_from_yaml.get("skip_block_list")
+                    single_pass_call_kwargs.pop("num_inference_steps", None)
+                    single_pass_call_kwargs.pop("first_pass", None)
+                    single_pass_call_kwargs.pop("second_pass", None)
+                    single_pass_call_kwargs.pop("downscale_factor", None)
+                    latents_single_pass = pipeline_instance(**single_pass_call_kwargs).images
+                    log_tensor_info(latents_single_pass, "Latentes Finais (Etapa Única)")
                     print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
+                    latents_cpu = latents_single_pass.detach().to("cpu", non_blocking=True)
+                    latents_list.append(latents_single_pass)
+                    del latents_single_pass; gc.collect(); torch.cuda.empty_cache()
+                    del pipeline
+            ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
+                with ctx:
+                    # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
+                    print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
+                latents_parts[]
+                for latents in latents_list:
+                    latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
+                partes_mp4 = []
+                par = 0
+                for latents in latents_parts:
+                    latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu)
+                    print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
+                    par = par + 1
+                    output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
+                    final_output_path = None
+                    print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
+                    # Usar manager om timestep por item; previne target_shape e rota NoneType.decode
+                    pixel_tensor = vae_manager_singleton.decode(
+                        latents.to(self.device, non_blocking=True),
+                        decode_timestep=float(self.config.get("decode_timestep", 0.05))
+                    )
+                    log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
+                    print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
+                    video_encode_tool_singleton.save_video_from_tensor(
+                       pixel_tensor,
+                       output_video_path,
+                       fps=call_kwargs["frame_rate"],
+                       progress_callback=progress_callback
+                    )
+                    candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
+                    try:
+                       shutil.move(output_video_path, candidate)
+                       final_output_path = candidate
+                       print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
+                       partes_mp4.append(final_output_path)
+                    except Exception as e:
+                        final_output_path = output_video_path
+                        print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
+                    del pixel_tensor
+                    del latents; gc.collect(); torch.cuda.empty_cache()
+                    del candidate
             total_partes = len(partes_mp4)
             if (total_partes>1):
             else:
                 final_vid = partes_mp4[0]
+            del partes_mp4_fade
+            del latents_list
+            del latents_parts
+            del partes_mp4
             self._log_gpu_memory("Fim da Geração")
             return final_vid, used_seed
             print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
             print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
             raise
         finally:
             gc.collect()
+            torch.cuda.empty_cache()
+            torch.cuda.ipc_collect()
+            self.finalize(keep_paths=[])
 print("Criando instância do VideoService. O carregamento do modelo começará agora...")
 video_generation_service = VideoService()