Eueuiaa commited on
Commit
35be4e2
·
verified ·
1 Parent(s): ab2fc5d

Update api/ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +108 -122
api/ltx_server.py CHANGED
@@ -705,11 +705,15 @@ class VideoService:
705
  }
706
  latents = None
707
  latents_list[]
 
 
 
708
 
709
  try:
710
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
711
- with ctx:
712
- if improve_texture:
 
713
  if not self.latent_upsampler:
714
  raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
715
 
@@ -718,19 +722,16 @@ class VideoService:
718
  t_pass1 = time.perf_counter()
719
 
720
  first_pass_config = self.config.get("first_pass", {}).copy()
 
721
  downscale_factor = self.config.get("downscale_factor", 0.6666666)
722
  vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
723
-
724
- # Replica a fórmula da LTXMultiScalePipeline
725
  x_width = int(width_padded * downscale_factor)
726
  downscaled_width = x_width - (x_width % vae_scale_factor)
727
  x_height = int(height_padded * downscale_factor)
728
  downscaled_height = x_height - (x_height % vae_scale_factor)
729
  print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
730
- # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
731
-
732
  first_pass_kwargs = call_kwargs.copy()
733
-
734
  first_pass_kwargs.update({
735
  "output_type": "latent",
736
  "width": downscaled_width,
@@ -740,130 +741,129 @@ class VideoService:
740
  })
741
 
742
  print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
743
- base_latents = self.pipeline(**first_pass_kwargs).images
744
- log_tensor_info(base_latents, "Latentes Base (First Pass)")
745
  print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
 
 
 
 
746
 
747
- # --- ETAPA 2: UPSCALE DOS LATENTES ---
748
  print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
749
  t_upscale = time.perf_counter()
750
-
751
- upsampled_latents = self._upsample_latents_internal(base_latents)
752
- upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=base_latents)
753
- log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
754
  print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
755
- del base_latents; gc.collect(); torch.cuda.empty_cache()
756
-
757
- par = 0
758
- latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
759
- torch.cuda.empty_cache()
760
- try:
761
- torch.cuda.ipc_collect()
762
- except Exception:
763
- pass
764
-
765
- latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
766
- temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
767
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
768
-
769
-
770
  for latents in latents_parts_up:
771
-
 
772
  # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
773
  print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
774
-
775
  second_pass_config = self.config.get("second_pass", {}).copy()
776
- # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA PARA SECOND PASS> ---
777
- # Usa as dimensões da primeira passagem dobradas, como na pipeline original
778
  second_pass_width = downscaled_width * 2
779
  second_pass_height = downscaled_height * 2
780
  print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
781
- # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
782
  t_pass2 = time.perf_counter()
783
-
784
  second_pass_kwargs = call_kwargs.copy()
785
  second_pass_kwargs.update({
786
  "output_type": "latent",
787
  "width": second_pass_width,
788
  "height": second_pass_height,
789
- "latents": upsampled_latents, # O tensor upscaled
790
  "guidance_scale": float(guidance_scale),
791
  **second_pass_config
792
  })
793
-
794
  print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
795
  final_latents = self.pipeline(**second_pass_kwargs).images
796
  log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
797
  print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
798
-
799
- latents_list.append(final_latents)
800
-
801
- else: # Geração de etapa única
 
 
 
 
802
  print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
803
  t_single = time.perf_counter()
804
- single_pass_kwargs = call_kwargs.copy()
805
- single_pass_kwargs.update(self.config.get("first_pass", {}))
806
- single_pass_kwargs["guidance_scale"] = float(guidance_scale)
807
- single_pass_kwargs["output_type"] = "latent"
 
 
 
 
 
 
 
808
 
809
- latents = self.pipeline(**single_pass_kwargs).images
810
- log_tensor_info(latents, "Latentes Finais (Etapa Única)")
811
  print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
 
 
 
 
812
 
813
- latents_list.append(latents)
814
-
815
- # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
816
- print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
817
-
818
- #latents_cpu = latents.detach().to("cpu", non_blocking=True)
819
- #torch.cuda.empty_cache()
820
- #try:
821
- # torch.cuda.ipc_collect()
822
- #except Exception:
823
- # pass
824
-
825
- latents_parts[]
826
- for latents in latents_list:
827
- latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
828
- temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
829
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
830
-
831
-
832
- partes_mp4 = []
833
- par = 0
834
- for latents in latents_parts:
835
- print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
836
-
837
- par = par + 1
838
- output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
839
- final_output_path = None
840
 
841
- print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
842
- # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
843
- pixel_tensor = vae_manager_singleton.decode(
844
- latents.to(self.device, non_blocking=True),
845
- decode_timestep=float(self.config.get("decode_timestep", 0.05))
846
- )
847
- log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
 
 
 
 
 
 
 
 
 
 
 
 
848
 
849
- print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
850
- video_encode_tool_singleton.save_video_from_tensor(
851
- pixel_tensor,
852
- output_video_path,
853
- fps=call_kwargs["frame_rate"],
854
- progress_callback=progress_callback
855
- )
856
 
857
- candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
858
- try:
859
- shutil.move(output_video_path, candidate)
860
- final_output_path = candidate
861
- print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
862
- partes_mp4.append(final_output_path)
863
-
864
- except Exception as e:
865
- final_output_path = output_video_path
866
- print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
 
 
 
867
 
868
  total_partes = len(partes_mp4)
869
  if (total_partes>1):
@@ -873,6 +873,10 @@ class VideoService:
873
  else:
874
  final_vid = partes_mp4[0]
875
 
 
 
 
 
876
 
877
  self._log_gpu_memory("Fim da Geração")
878
  return final_vid, used_seed
@@ -882,31 +886,13 @@ class VideoService:
882
  print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
883
  print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
884
  raise
 
885
  finally:
886
- try:
887
- del latents
888
- except Exception:
889
- pass
890
- try:
891
- del multi_scale_pipeline
892
- except Exception:
893
- pass
894
-
895
  gc.collect()
896
- try:
897
- if self.device == "cuda":
898
- torch.cuda.empty_cache()
899
- try:
900
- torch.cuda.ipc_collect()
901
- except Exception:
902
- pass
903
- except Exception as e:
904
- print(f"[DEBUG] Limpeza GPU no finally falhou: {e}")
905
-
906
- try:
907
- self.finalize(keep_paths=[])
908
- except Exception as e:
909
- print(f"[DEBUG] finalize() no finally falhou: {e}")
910
 
 
911
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
912
  video_generation_service = VideoService()
 
705
  }
706
  latents = None
707
  latents_list[]
708
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
709
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
710
+
711
 
712
  try:
713
+ if improve_texture:
714
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
715
+ with ctx:
716
+
717
  if not self.latent_upsampler:
718
  raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
719
 
 
722
  t_pass1 = time.perf_counter()
723
 
724
  first_pass_config = self.config.get("first_pass", {}).copy()
725
+ first_pass_config.pop("num_inference_steps", None)
726
  downscale_factor = self.config.get("downscale_factor", 0.6666666)
727
  vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
 
 
728
  x_width = int(width_padded * downscale_factor)
729
  downscaled_width = x_width - (x_width % vae_scale_factor)
730
  x_height = int(height_padded * downscale_factor)
731
  downscaled_height = x_height - (x_height % vae_scale_factor)
732
  print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
733
+
 
734
  first_pass_kwargs = call_kwargs.copy()
 
735
  first_pass_kwargs.update({
736
  "output_type": "latent",
737
  "width": downscaled_width,
 
741
  })
742
 
743
  print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
744
+ latents = self.pipeline(**first_pass_kwargs).images
745
+ log_tensor_info(latents, "Latentes Base (First Pass)")
746
  print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
747
+ del pipeline
748
+
749
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
750
+ with ctx:
751
 
 
752
  print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
753
  t_upscale = time.perf_counter()
754
+ upsampled_latents = self._upsample_latents_internal(latents)
755
+ upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents)
 
 
756
  print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
757
+ latents_cpu = upsampled_latents.detach().to("cpu", non_blocking=True)
758
+ del upsampled_latents;
759
+ del latents; gc.collect(); torch.cuda.empty_cache()
760
+ del spatial_upscaler_path
761
+ #latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
762
+ latents_parts_up[latents_cpu]
763
+ #del latents_cpu_up
764
+
765
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
766
+ with ctx:
 
 
 
 
 
767
  for latents in latents_parts_up:
768
+ latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu_up)
769
+
770
  # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
771
  print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
 
772
  second_pass_config = self.config.get("second_pass", {}).copy()
773
+ second_pass_config.pop("num_inference_steps", None)
 
774
  second_pass_width = downscaled_width * 2
775
  second_pass_height = downscaled_height * 2
776
  print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
 
777
  t_pass2 = time.perf_counter()
 
778
  second_pass_kwargs = call_kwargs.copy()
779
  second_pass_kwargs.update({
780
  "output_type": "latent",
781
  "width": second_pass_width,
782
  "height": second_pass_height,
783
+ "latents": latents,
784
  "guidance_scale": float(guidance_scale),
785
  **second_pass_config
786
  })
 
787
  print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
788
  final_latents = self.pipeline(**second_pass_kwargs).images
789
  log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
790
  print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
791
+ latents_cpu = final_latents.detach().to("cpu", non_blocking=True)
792
+ latents_list.append(latents_cpu)
793
+ del final_latents; gc.collect(); torch.cuda.empty_cache()
794
+ del pipeline
795
+
796
+ else:
797
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
798
+ with ctx:
799
  print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
800
  t_single = time.perf_counter()
801
+ single_pass_call_kwargs = call_kwargs.copy()
802
+ first_pass_config_from_yaml = self.config.get("first_pass", {})
803
+ single_pass_call_kwargs["timesteps"] = first_pass_config_from_yaml.get("timesteps")
804
+ single_pass_call_kwargs["guidance_scale"] = float(guidance_scale)
805
+ single_pass_call_kwargs["stg_scale"] = first_pass_config_from_yaml.get("stg_scale")
806
+ single_pass_call_kwargs["rescaling_scale"] = first_pass_config_from_yaml.get("rescaling_scale")
807
+ single_pass_call_kwargs["skip_block_list"] = first_pass_config_from_yaml.get("skip_block_list")
808
+ single_pass_call_kwargs.pop("num_inference_steps", None)
809
+ single_pass_call_kwargs.pop("first_pass", None)
810
+ single_pass_call_kwargs.pop("second_pass", None)
811
+ single_pass_call_kwargs.pop("downscale_factor", None)
812
 
813
+ latents_single_pass = pipeline_instance(**single_pass_call_kwargs).images
814
+ log_tensor_info(latents_single_pass, "Latentes Finais (Etapa Única)")
815
  print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
816
+ latents_cpu = latents_single_pass.detach().to("cpu", non_blocking=True)
817
+ latents_list.append(latents_single_pass)
818
+ del latents_single_pass; gc.collect(); torch.cuda.empty_cache()
819
+ del pipeline
820
 
821
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
822
+ with ctx:
823
+ # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
824
+ print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
 
826
+ latents_parts[]
827
+ for latents in latents_list:
828
+ latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
829
+
830
+ partes_mp4 = []
831
+ par = 0
832
+ for latents in latents_parts:
833
+ latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu)
834
+ print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
835
+ par = par + 1
836
+ output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
837
+ final_output_path = None
838
+ print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
839
+ # Usar manager om timestep por item; previne target_shape e rota NoneType.decode
840
+ pixel_tensor = vae_manager_singleton.decode(
841
+ latents.to(self.device, non_blocking=True),
842
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
843
+ )
844
+ log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
845
 
846
+ print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
847
+ video_encode_tool_singleton.save_video_from_tensor(
848
+ pixel_tensor,
849
+ output_video_path,
850
+ fps=call_kwargs["frame_rate"],
851
+ progress_callback=progress_callback
852
+ )
853
 
854
+ candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
855
+ try:
856
+ shutil.move(output_video_path, candidate)
857
+ final_output_path = candidate
858
+ print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
859
+ partes_mp4.append(final_output_path)
860
+ except Exception as e:
861
+ final_output_path = output_video_path
862
+ print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
863
+
864
+ del pixel_tensor
865
+ del latents; gc.collect(); torch.cuda.empty_cache()
866
+ del candidate
867
 
868
  total_partes = len(partes_mp4)
869
  if (total_partes>1):
 
873
  else:
874
  final_vid = partes_mp4[0]
875
 
876
+ del partes_mp4_fade
877
+ del latents_list
878
+ del latents_parts
879
+ del partes_mp4
880
 
881
  self._log_gpu_memory("Fim da Geração")
882
  return final_vid, used_seed
 
886
  print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
887
  print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
888
  raise
889
+
890
  finally:
 
 
 
 
 
 
 
 
 
891
  gc.collect()
892
+ torch.cuda.empty_cache()
893
+ torch.cuda.ipc_collect()
894
+ self.finalize(keep_paths=[])
 
 
 
 
 
 
 
 
 
 
 
895
 
896
+
897
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
898
  video_generation_service = VideoService()