Update api/ltx_server.py
Browse files- api/ltx_server.py +108 -122
api/ltx_server.py
CHANGED
|
@@ -705,11 +705,15 @@ class VideoService:
|
|
| 705 |
}
|
| 706 |
latents = None
|
| 707 |
latents_list[]
|
|
|
|
|
|
|
|
|
|
| 708 |
|
| 709 |
try:
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
|
|
|
| 713 |
if not self.latent_upsampler:
|
| 714 |
raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
|
| 715 |
|
|
@@ -718,19 +722,16 @@ class VideoService:
|
|
| 718 |
t_pass1 = time.perf_counter()
|
| 719 |
|
| 720 |
first_pass_config = self.config.get("first_pass", {}).copy()
|
|
|
|
| 721 |
downscale_factor = self.config.get("downscale_factor", 0.6666666)
|
| 722 |
vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
|
| 723 |
-
|
| 724 |
-
# Replica a fórmula da LTXMultiScalePipeline
|
| 725 |
x_width = int(width_padded * downscale_factor)
|
| 726 |
downscaled_width = x_width - (x_width % vae_scale_factor)
|
| 727 |
x_height = int(height_padded * downscale_factor)
|
| 728 |
downscaled_height = x_height - (x_height % vae_scale_factor)
|
| 729 |
print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
|
| 730 |
-
|
| 731 |
-
|
| 732 |
first_pass_kwargs = call_kwargs.copy()
|
| 733 |
-
|
| 734 |
first_pass_kwargs.update({
|
| 735 |
"output_type": "latent",
|
| 736 |
"width": downscaled_width,
|
|
@@ -740,130 +741,129 @@ class VideoService:
|
|
| 740 |
})
|
| 741 |
|
| 742 |
print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
|
| 743 |
-
|
| 744 |
-
log_tensor_info(
|
| 745 |
print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
|
| 747 |
-
# --- ETAPA 2: UPSCALE DOS LATENTES ---
|
| 748 |
print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
|
| 749 |
t_upscale = time.perf_counter()
|
| 750 |
-
|
| 751 |
-
upsampled_latents =
|
| 752 |
-
upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=base_latents)
|
| 753 |
-
log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
|
| 754 |
print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
|
| 766 |
-
temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
|
| 767 |
-
results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
|
| 768 |
-
|
| 769 |
-
|
| 770 |
for latents in latents_parts_up:
|
| 771 |
-
|
|
|
|
| 772 |
# # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
|
| 773 |
print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
|
| 774 |
-
|
| 775 |
second_pass_config = self.config.get("second_pass", {}).copy()
|
| 776 |
-
|
| 777 |
-
# Usa as dimensões da primeira passagem dobradas, como na pipeline original
|
| 778 |
second_pass_width = downscaled_width * 2
|
| 779 |
second_pass_height = downscaled_height * 2
|
| 780 |
print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
|
| 781 |
-
# --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
|
| 782 |
t_pass2 = time.perf_counter()
|
| 783 |
-
|
| 784 |
second_pass_kwargs = call_kwargs.copy()
|
| 785 |
second_pass_kwargs.update({
|
| 786 |
"output_type": "latent",
|
| 787 |
"width": second_pass_width,
|
| 788 |
"height": second_pass_height,
|
| 789 |
-
"latents":
|
| 790 |
"guidance_scale": float(guidance_scale),
|
| 791 |
**second_pass_config
|
| 792 |
})
|
| 793 |
-
|
| 794 |
print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
|
| 795 |
final_latents = self.pipeline(**second_pass_kwargs).images
|
| 796 |
log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
|
| 797 |
print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
|
| 798 |
-
|
| 799 |
-
latents_list.append(
|
| 800 |
-
|
| 801 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 802 |
print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
|
| 803 |
t_single = time.perf_counter()
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
|
| 809 |
-
|
| 810 |
-
log_tensor_info(
|
| 811 |
print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 812 |
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
#latents_cpu = latents.detach().to("cpu", non_blocking=True)
|
| 819 |
-
#torch.cuda.empty_cache()
|
| 820 |
-
#try:
|
| 821 |
-
# torch.cuda.ipc_collect()
|
| 822 |
-
#except Exception:
|
| 823 |
-
# pass
|
| 824 |
-
|
| 825 |
-
latents_parts[]
|
| 826 |
-
for latents in latents_list:
|
| 827 |
-
latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
|
| 828 |
-
temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
|
| 829 |
-
results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
partes_mp4 = []
|
| 833 |
-
par = 0
|
| 834 |
-
for latents in latents_parts:
|
| 835 |
-
print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
|
| 836 |
-
|
| 837 |
-
par = par + 1
|
| 838 |
-
output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
|
| 839 |
-
final_output_path = None
|
| 840 |
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 848 |
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
| 865 |
-
|
| 866 |
-
|
|
|
|
|
|
|
|
|
|
| 867 |
|
| 868 |
total_partes = len(partes_mp4)
|
| 869 |
if (total_partes>1):
|
|
@@ -873,6 +873,10 @@ class VideoService:
|
|
| 873 |
else:
|
| 874 |
final_vid = partes_mp4[0]
|
| 875 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
|
| 877 |
self._log_gpu_memory("Fim da Geração")
|
| 878 |
return final_vid, used_seed
|
|
@@ -882,31 +886,13 @@ class VideoService:
|
|
| 882 |
print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
|
| 883 |
print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
|
| 884 |
raise
|
|
|
|
| 885 |
finally:
|
| 886 |
-
try:
|
| 887 |
-
del latents
|
| 888 |
-
except Exception:
|
| 889 |
-
pass
|
| 890 |
-
try:
|
| 891 |
-
del multi_scale_pipeline
|
| 892 |
-
except Exception:
|
| 893 |
-
pass
|
| 894 |
-
|
| 895 |
gc.collect()
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
try:
|
| 900 |
-
torch.cuda.ipc_collect()
|
| 901 |
-
except Exception:
|
| 902 |
-
pass
|
| 903 |
-
except Exception as e:
|
| 904 |
-
print(f"[DEBUG] Limpeza GPU no finally falhou: {e}")
|
| 905 |
-
|
| 906 |
-
try:
|
| 907 |
-
self.finalize(keep_paths=[])
|
| 908 |
-
except Exception as e:
|
| 909 |
-
print(f"[DEBUG] finalize() no finally falhou: {e}")
|
| 910 |
|
|
|
|
| 911 |
print("Criando instância do VideoService. O carregamento do modelo começará agora...")
|
| 912 |
video_generation_service = VideoService()
|
|
|
|
| 705 |
}
|
| 706 |
latents = None
|
| 707 |
latents_list[]
|
| 708 |
+
temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
|
| 709 |
+
results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
|
| 710 |
+
|
| 711 |
|
| 712 |
try:
|
| 713 |
+
if improve_texture:
|
| 714 |
+
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
| 715 |
+
with ctx:
|
| 716 |
+
|
| 717 |
if not self.latent_upsampler:
|
| 718 |
raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
|
| 719 |
|
|
|
|
| 722 |
t_pass1 = time.perf_counter()
|
| 723 |
|
| 724 |
first_pass_config = self.config.get("first_pass", {}).copy()
|
| 725 |
+
first_pass_config.pop("num_inference_steps", None)
|
| 726 |
downscale_factor = self.config.get("downscale_factor", 0.6666666)
|
| 727 |
vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
|
|
|
|
|
|
|
| 728 |
x_width = int(width_padded * downscale_factor)
|
| 729 |
downscaled_width = x_width - (x_width % vae_scale_factor)
|
| 730 |
x_height = int(height_padded * downscale_factor)
|
| 731 |
downscaled_height = x_height - (x_height % vae_scale_factor)
|
| 732 |
print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
|
| 733 |
+
|
|
|
|
| 734 |
first_pass_kwargs = call_kwargs.copy()
|
|
|
|
| 735 |
first_pass_kwargs.update({
|
| 736 |
"output_type": "latent",
|
| 737 |
"width": downscaled_width,
|
|
|
|
| 741 |
})
|
| 742 |
|
| 743 |
print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
|
| 744 |
+
latents = self.pipeline(**first_pass_kwargs).images
|
| 745 |
+
log_tensor_info(latents, "Latentes Base (First Pass)")
|
| 746 |
print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
|
| 747 |
+
del pipeline
|
| 748 |
+
|
| 749 |
+
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
| 750 |
+
with ctx:
|
| 751 |
|
|
|
|
| 752 |
print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
|
| 753 |
t_upscale = time.perf_counter()
|
| 754 |
+
upsampled_latents = self._upsample_latents_internal(latents)
|
| 755 |
+
upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents)
|
|
|
|
|
|
|
| 756 |
print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
|
| 757 |
+
latents_cpu = upsampled_latents.detach().to("cpu", non_blocking=True)
|
| 758 |
+
del upsampled_latents;
|
| 759 |
+
del latents; gc.collect(); torch.cuda.empty_cache()
|
| 760 |
+
del spatial_upscaler_path
|
| 761 |
+
#latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
|
| 762 |
+
latents_parts_up[latents_cpu]
|
| 763 |
+
#del latents_cpu_up
|
| 764 |
+
|
| 765 |
+
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
| 766 |
+
with ctx:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
for latents in latents_parts_up:
|
| 768 |
+
latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu_up)
|
| 769 |
+
|
| 770 |
# # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
|
| 771 |
print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
|
|
|
|
| 772 |
second_pass_config = self.config.get("second_pass", {}).copy()
|
| 773 |
+
second_pass_config.pop("num_inference_steps", None)
|
|
|
|
| 774 |
second_pass_width = downscaled_width * 2
|
| 775 |
second_pass_height = downscaled_height * 2
|
| 776 |
print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
|
|
|
|
| 777 |
t_pass2 = time.perf_counter()
|
|
|
|
| 778 |
second_pass_kwargs = call_kwargs.copy()
|
| 779 |
second_pass_kwargs.update({
|
| 780 |
"output_type": "latent",
|
| 781 |
"width": second_pass_width,
|
| 782 |
"height": second_pass_height,
|
| 783 |
+
"latents": latents,
|
| 784 |
"guidance_scale": float(guidance_scale),
|
| 785 |
**second_pass_config
|
| 786 |
})
|
|
|
|
| 787 |
print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
|
| 788 |
final_latents = self.pipeline(**second_pass_kwargs).images
|
| 789 |
log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
|
| 790 |
print(f"[DEBUG] Second part Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
|
| 791 |
+
latents_cpu = final_latents.detach().to("cpu", non_blocking=True)
|
| 792 |
+
latents_list.append(latents_cpu)
|
| 793 |
+
del final_latents; gc.collect(); torch.cuda.empty_cache()
|
| 794 |
+
del pipeline
|
| 795 |
+
|
| 796 |
+
else:
|
| 797 |
+
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
| 798 |
+
with ctx:
|
| 799 |
print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
|
| 800 |
t_single = time.perf_counter()
|
| 801 |
+
single_pass_call_kwargs = call_kwargs.copy()
|
| 802 |
+
first_pass_config_from_yaml = self.config.get("first_pass", {})
|
| 803 |
+
single_pass_call_kwargs["timesteps"] = first_pass_config_from_yaml.get("timesteps")
|
| 804 |
+
single_pass_call_kwargs["guidance_scale"] = float(guidance_scale)
|
| 805 |
+
single_pass_call_kwargs["stg_scale"] = first_pass_config_from_yaml.get("stg_scale")
|
| 806 |
+
single_pass_call_kwargs["rescaling_scale"] = first_pass_config_from_yaml.get("rescaling_scale")
|
| 807 |
+
single_pass_call_kwargs["skip_block_list"] = first_pass_config_from_yaml.get("skip_block_list")
|
| 808 |
+
single_pass_call_kwargs.pop("num_inference_steps", None)
|
| 809 |
+
single_pass_call_kwargs.pop("first_pass", None)
|
| 810 |
+
single_pass_call_kwargs.pop("second_pass", None)
|
| 811 |
+
single_pass_call_kwargs.pop("downscale_factor", None)
|
| 812 |
|
| 813 |
+
latents_single_pass = pipeline_instance(**single_pass_call_kwargs).images
|
| 814 |
+
log_tensor_info(latents_single_pass, "Latentes Finais (Etapa Única)")
|
| 815 |
print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
|
| 816 |
+
latents_cpu = latents_single_pass.detach().to("cpu", non_blocking=True)
|
| 817 |
+
latents_list.append(latents_single_pass)
|
| 818 |
+
del latents_single_pass; gc.collect(); torch.cuda.empty_cache()
|
| 819 |
+
del pipeline
|
| 820 |
|
| 821 |
+
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
| 822 |
+
with ctx:
|
| 823 |
+
# --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
|
| 824 |
+
print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 825 |
|
| 826 |
+
latents_parts[]
|
| 827 |
+
for latents in latents_list:
|
| 828 |
+
latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
|
| 829 |
+
|
| 830 |
+
partes_mp4 = []
|
| 831 |
+
par = 0
|
| 832 |
+
for latents in latents_parts:
|
| 833 |
+
latents = adain_filter_latent(latents=latents, reference_latents=latents_cpu)
|
| 834 |
+
print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
|
| 835 |
+
par = par + 1
|
| 836 |
+
output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
|
| 837 |
+
final_output_path = None
|
| 838 |
+
print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
|
| 839 |
+
# Usar manager om timestep por item; previne target_shape e rota NoneType.decode
|
| 840 |
+
pixel_tensor = vae_manager_singleton.decode(
|
| 841 |
+
latents.to(self.device, non_blocking=True),
|
| 842 |
+
decode_timestep=float(self.config.get("decode_timestep", 0.05))
|
| 843 |
+
)
|
| 844 |
+
log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
|
| 845 |
|
| 846 |
+
print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
|
| 847 |
+
video_encode_tool_singleton.save_video_from_tensor(
|
| 848 |
+
pixel_tensor,
|
| 849 |
+
output_video_path,
|
| 850 |
+
fps=call_kwargs["frame_rate"],
|
| 851 |
+
progress_callback=progress_callback
|
| 852 |
+
)
|
| 853 |
|
| 854 |
+
candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
|
| 855 |
+
try:
|
| 856 |
+
shutil.move(output_video_path, candidate)
|
| 857 |
+
final_output_path = candidate
|
| 858 |
+
print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
|
| 859 |
+
partes_mp4.append(final_output_path)
|
| 860 |
+
except Exception as e:
|
| 861 |
+
final_output_path = output_video_path
|
| 862 |
+
print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
|
| 863 |
+
|
| 864 |
+
del pixel_tensor
|
| 865 |
+
del latents; gc.collect(); torch.cuda.empty_cache()
|
| 866 |
+
del candidate
|
| 867 |
|
| 868 |
total_partes = len(partes_mp4)
|
| 869 |
if (total_partes>1):
|
|
|
|
| 873 |
else:
|
| 874 |
final_vid = partes_mp4[0]
|
| 875 |
|
| 876 |
+
del partes_mp4_fade
|
| 877 |
+
del latents_list
|
| 878 |
+
del latents_parts
|
| 879 |
+
del partes_mp4
|
| 880 |
|
| 881 |
self._log_gpu_memory("Fim da Geração")
|
| 882 |
return final_vid, used_seed
|
|
|
|
| 886 |
print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
|
| 887 |
print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
|
| 888 |
raise
|
| 889 |
+
|
| 890 |
finally:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
gc.collect()
|
| 892 |
+
torch.cuda.empty_cache()
|
| 893 |
+
torch.cuda.ipc_collect()
|
| 894 |
+
self.finalize(keep_paths=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 895 |
|
| 896 |
+
|
| 897 |
print("Criando instância do VideoService. O carregamento do modelo começará agora...")
|
| 898 |
video_generation_service = VideoService()
|