Update api/ltx_server.py
Browse files- api/ltx_server.py +27 -33
api/ltx_server.py
CHANGED
|
@@ -821,7 +821,9 @@ class VideoService:
|
|
| 821 |
if improve_texture:
|
| 822 |
if not self.latent_upsampler:
|
| 823 |
raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
|
| 824 |
-
|
|
|
|
|
|
|
| 825 |
# --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
|
| 826 |
print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
|
| 827 |
t_pass1 = time.perf_counter()
|
|
@@ -830,23 +832,21 @@ class VideoService:
|
|
| 830 |
downscale_factor = self.config.get("downscale_factor", 0.6666666)
|
| 831 |
vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
|
| 832 |
|
| 833 |
-
# --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
|
| 834 |
-
# Replica a fórmula da LTXMultiScalePipeline
|
| 835 |
x_width = int(width_padded * downscale_factor)
|
| 836 |
downscaled_width = x_width - (x_width % vae_scale_factor)
|
| 837 |
x_height = int(height_padded * downscale_factor)
|
| 838 |
downscaled_height = x_height - (x_height % vae_scale_factor)
|
| 839 |
print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
|
| 840 |
-
# --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
|
| 841 |
|
| 842 |
-
first_pass_kwargs
|
| 843 |
-
|
|
|
|
|
|
|
| 844 |
first_pass_kwargs.update({
|
| 845 |
"output_type": "latent",
|
| 846 |
"width": downscaled_width,
|
| 847 |
"height": downscaled_height,
|
| 848 |
"guidance_scale": float(guidance_scale),
|
| 849 |
-
**first_pass_config
|
| 850 |
})
|
| 851 |
|
| 852 |
print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
|
|
@@ -879,29 +879,29 @@ class VideoService:
|
|
| 879 |
cc = 1
|
| 880 |
for latents in latents_parts_up:
|
| 881 |
|
|
|
|
| 882 |
print("\n\n#########################################")
|
| 883 |
# # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
|
| 884 |
print(f"\n--- INICIANDO ETAPA 3/{cc} ")
|
| 885 |
-
|
|
|
|
| 886 |
second_pass_config = self.config.get("second_pass", {}).copy()
|
| 887 |
-
|
| 888 |
-
# Usa as dimensões da primeira passagem dobradas, como na pipeline original
|
| 889 |
second_pass_width = downscaled_width * 2
|
| 890 |
second_pass_height = downscaled_height * 2
|
| 891 |
print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
|
| 892 |
-
|
| 893 |
-
t_pass2 = time.perf_counter()
|
| 894 |
-
|
| 895 |
num_latent_frames_part = latents.shape[2]
|
| 896 |
-
|
| 897 |
log_tensor_info(latents, "Latentes input (Pre-Pós-Second Pass)")
|
| 898 |
|
| 899 |
-
|
| 900 |
vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
|
| 901 |
num_pixel_frames_part = ((num_latent_frames_part - 1) * vae_temporal_scale) + 1
|
| 902 |
print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
|
| 903 |
-
|
| 904 |
-
second_pass_kwargs
|
|
|
|
|
|
|
|
|
|
| 905 |
second_pass_kwargs.update({
|
| 906 |
"output_type": "latent",
|
| 907 |
"width": second_pass_width,
|
|
@@ -909,7 +909,6 @@ class VideoService:
|
|
| 909 |
"num_frames": num_pixel_frames_part,
|
| 910 |
"latents": latents, # O tensor upscaled
|
| 911 |
"guidance_scale": float(guidance_scale),
|
| 912 |
-
**second_pass_config
|
| 913 |
})
|
| 914 |
|
| 915 |
print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
|
|
@@ -926,23 +925,18 @@ class VideoService:
|
|
| 926 |
else: # Geração de etapa única
|
| 927 |
print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
|
| 928 |
t_single = time.perf_counter()
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
single_pass_call_kwargs["output_type"] = "latent"
|
| 934 |
-
single_pass_call_kwargs["timesteps"] = single_pass_call_kwargs_config.get("timesteps")
|
| 935 |
-
single_pass_call_kwargs["guidance_scale"] = float(guidance_scale)
|
| 936 |
-
single_pass_call_kwargs["stg_scale"] = single_pass_call_kwargs_config.get("stg_scale")
|
| 937 |
-
single_pass_call_kwargs["rescaling_scale"] = single_pass_call_kwargs_config.get("rescaling_scale")
|
| 938 |
|
| 939 |
# Remove keys that might conflict or are not used in single pass / handled by above
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
|
| 945 |
-
latents = self.pipeline(**
|
| 946 |
log_tensor_info(latents, "Latentes Finais (Etapa Única)")
|
| 947 |
print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
|
| 948 |
|
|
@@ -967,7 +961,7 @@ class VideoService:
|
|
| 967 |
except Exception:
|
| 968 |
pass
|
| 969 |
|
| 970 |
-
latents_parts_vae = self._dividir_latentes_por_tamanho(latents_cpu_vae,4,
|
| 971 |
|
| 972 |
for latents in latents_parts_vae:
|
| 973 |
print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
|
|
|
|
| 821 |
if improve_texture:
|
| 822 |
if not self.latent_upsampler:
|
| 823 |
raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
|
| 824 |
+
|
| 825 |
+
first_pass_kwargs = call_kwargs.copy()
|
| 826 |
+
|
| 827 |
# --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
|
| 828 |
print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
|
| 829 |
t_pass1 = time.perf_counter()
|
|
|
|
| 832 |
downscale_factor = self.config.get("downscale_factor", 0.6666666)
|
| 833 |
vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
|
| 834 |
|
|
|
|
|
|
|
| 835 |
x_width = int(width_padded * downscale_factor)
|
| 836 |
downscaled_width = x_width - (x_width % vae_scale_factor)
|
| 837 |
x_height = int(height_padded * downscale_factor)
|
| 838 |
downscaled_height = x_height - (x_height % vae_scale_factor)
|
| 839 |
print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
|
|
|
|
| 840 |
|
| 841 |
+
first_pass_kwargs.update({
|
| 842 |
+
**first_pass_config
|
| 843 |
+
})
|
| 844 |
+
|
| 845 |
first_pass_kwargs.update({
|
| 846 |
"output_type": "latent",
|
| 847 |
"width": downscaled_width,
|
| 848 |
"height": downscaled_height,
|
| 849 |
"guidance_scale": float(guidance_scale),
|
|
|
|
| 850 |
})
|
| 851 |
|
| 852 |
print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
|
|
|
|
| 879 |
cc = 1
|
| 880 |
for latents in latents_parts_up:
|
| 881 |
|
| 882 |
+
t_pass2 = time.perf_counter()
|
| 883 |
print("\n\n#########################################")
|
| 884 |
# # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
|
| 885 |
print(f"\n--- INICIANDO ETAPA 3/{cc} ")
|
| 886 |
+
|
| 887 |
+
second_pass_kwargs = first_pass_config.copy()
|
| 888 |
second_pass_config = self.config.get("second_pass", {}).copy()
|
| 889 |
+
|
|
|
|
| 890 |
second_pass_width = downscaled_width * 2
|
| 891 |
second_pass_height = downscaled_height * 2
|
| 892 |
print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
|
| 893 |
+
|
|
|
|
|
|
|
| 894 |
num_latent_frames_part = latents.shape[2]
|
|
|
|
| 895 |
log_tensor_info(latents, "Latentes input (Pre-Pós-Second Pass)")
|
| 896 |
|
|
|
|
| 897 |
vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
|
| 898 |
num_pixel_frames_part = ((num_latent_frames_part - 1) * vae_temporal_scale) + 1
|
| 899 |
print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
|
| 900 |
+
|
| 901 |
+
second_pass_kwargs.update({
|
| 902 |
+
**second_pass_config
|
| 903 |
+
})
|
| 904 |
+
|
| 905 |
second_pass_kwargs.update({
|
| 906 |
"output_type": "latent",
|
| 907 |
"width": second_pass_width,
|
|
|
|
| 909 |
"num_frames": num_pixel_frames_part,
|
| 910 |
"latents": latents, # O tensor upscaled
|
| 911 |
"guidance_scale": float(guidance_scale),
|
|
|
|
| 912 |
})
|
| 913 |
|
| 914 |
print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
|
|
|
|
| 925 |
else: # Geração de etapa única
|
| 926 |
print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
|
| 927 |
t_single = time.perf_counter()
|
| 928 |
+
single_pass_kwargs = call_kwargs.copy()
|
| 929 |
+
single_pass_kwargs.update(self.config.get("first_pass", {}))
|
| 930 |
+
single_pass_kwargs["guidance_scale"] = float(guidance_scale)
|
| 931 |
+
single_pass_kwargs["output_type"] = "latent"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 932 |
|
| 933 |
# Remove keys that might conflict or are not used in single pass / handled by above
|
| 934 |
+
single_pass_kwargs.pop("num_inference_steps", None)
|
| 935 |
+
single_pass_kwargs.pop("first_pass", None)
|
| 936 |
+
single_pass_kwargs.pop("second_pass", None)
|
| 937 |
+
single_pass_kwargs.pop("downscale_factor", None)
|
| 938 |
|
| 939 |
+
latents = self.pipeline(**single_pass_kwargs).images
|
| 940 |
log_tensor_info(latents, "Latentes Finais (Etapa Única)")
|
| 941 |
print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
|
| 942 |
|
|
|
|
| 961 |
except Exception:
|
| 962 |
pass
|
| 963 |
|
| 964 |
+
latents_parts_vae = self._dividir_latentes_por_tamanho(latents_cpu_vae,4,1)
|
| 965 |
|
| 966 |
for latents in latents_parts_vae:
|
| 967 |
print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
|