Update api/ltx_server.py
Browse files- api/ltx_server.py +12 -8
api/ltx_server.py
CHANGED
|
@@ -744,6 +744,8 @@ class VideoService:
|
|
| 744 |
|
| 745 |
latents = None
|
| 746 |
latents_list = []
|
|
|
|
|
|
|
| 747 |
|
| 748 |
try:
|
| 749 |
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
|
@@ -803,8 +805,7 @@ class VideoService:
|
|
| 803 |
pass
|
| 804 |
|
| 805 |
latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
|
| 806 |
-
|
| 807 |
-
|
| 808 |
for latents in latents_parts_up:
|
| 809 |
|
| 810 |
# # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
|
|
@@ -818,12 +819,17 @@ class VideoService:
|
|
| 818 |
print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
|
| 819 |
# --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
|
| 820 |
t_pass2 = time.perf_counter()
|
| 821 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
second_pass_kwargs = call_kwargs.copy()
|
| 823 |
second_pass_kwargs.update({
|
| 824 |
"output_type": "latent",
|
| 825 |
"width": second_pass_width,
|
| 826 |
"height": second_pass_height,
|
|
|
|
| 827 |
"latents": upsampled_latents, # O tensor upscaled
|
| 828 |
"guidance_scale": float(guidance_scale),
|
| 829 |
**second_pass_config
|
|
@@ -863,19 +869,17 @@ class VideoService:
|
|
| 863 |
latents_parts = []
|
| 864 |
for latents in latents_list:
|
| 865 |
latents_parts.append(self._dividir_latentes_por_tamanho(latents,4,1))
|
| 866 |
-
|
| 867 |
-
|
| 868 |
|
| 869 |
partes_mp4 = []
|
| 870 |
par = 0
|
| 871 |
for latents in latents_parts:
|
| 872 |
-
|
| 873 |
-
|
| 874 |
par = par + 1
|
| 875 |
output_video_path = os.path.join(results_dir, f"output_{used_seed}_{par}.mp4")
|
| 876 |
final_output_path = None
|
| 877 |
|
| 878 |
-
print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
|
| 879 |
# Usar manager com timestep por item; previne target_shape e rota NoneType.decode
|
| 880 |
pixel_tensor = vae_manager_singleton.decode(
|
| 881 |
latents.to(self.device, non_blocking=True),
|
|
|
|
| 744 |
|
| 745 |
latents = None
|
| 746 |
latents_list = []
|
| 747 |
+
results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
|
| 748 |
+
|
| 749 |
|
| 750 |
try:
|
| 751 |
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
|
|
|
| 805 |
pass
|
| 806 |
|
| 807 |
latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
|
| 808 |
+
|
|
|
|
| 809 |
for latents in latents_parts_up:
|
| 810 |
|
| 811 |
# # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
|
|
|
|
| 819 |
print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
|
| 820 |
# --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
|
| 821 |
t_pass2 = time.perf_counter()
|
| 822 |
+
|
| 823 |
+
vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
|
| 824 |
+
num_pixel_frames_part = ((latents.shape[2] - 1) * vae_temporal_scale) + 1
|
| 825 |
+
print(f"[DEBUG] Parte {i+1}: {latents.shape[2] - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
|
| 826 |
+
|
| 827 |
second_pass_kwargs = call_kwargs.copy()
|
| 828 |
second_pass_kwargs.update({
|
| 829 |
"output_type": "latent",
|
| 830 |
"width": second_pass_width,
|
| 831 |
"height": second_pass_height,
|
| 832 |
+
"num_frames": num_pixel_frames_part
|
| 833 |
"latents": upsampled_latents, # O tensor upscaled
|
| 834 |
"guidance_scale": float(guidance_scale),
|
| 835 |
**second_pass_config
|
|
|
|
| 869 |
latents_parts = []
|
| 870 |
for latents in latents_list:
|
| 871 |
latents_parts.append(self._dividir_latentes_por_tamanho(latents,4,1))
|
| 872 |
+
|
|
|
|
| 873 |
|
| 874 |
partes_mp4 = []
|
| 875 |
par = 0
|
| 876 |
for latents in latents_parts:
|
| 877 |
+
|
|
|
|
| 878 |
par = par + 1
|
| 879 |
output_video_path = os.path.join(results_dir, f"output_{used_seed}_{par}.mp4")
|
| 880 |
final_output_path = None
|
| 881 |
|
| 882 |
+
print("[DEBUG] Decodificando bloco de latentes com VAE {par} → tensor de pixels...")
|
| 883 |
# Usar manager com timestep por item; previne target_shape e rota NoneType.decode
|
| 884 |
pixel_tensor = vae_manager_singleton.decode(
|
| 885 |
latents.to(self.device, non_blocking=True),
|