Update api/ltx_server.py
Browse files- api/ltx_server.py +59 -4
api/ltx_server.py
CHANGED
|
@@ -707,13 +707,22 @@ class VideoService:
|
|
| 707 |
single_pass_kwargs = call_kwargs.copy()
|
| 708 |
first_pass_config = self.config.get("first_pass", {}).copy()
|
| 709 |
|
| 710 |
-
|
| 711 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
#"guidance_scale": float(guidance_scale),
|
| 713 |
#"stg_scale": first_pass_config.get("stg_scale"),
|
| 714 |
#"rescaling_scale": first_pass_config.get("rescaling_scale"),
|
| 715 |
#"skip_block_list": first_pass_config.get("skip_block_list"),
|
| 716 |
-
|
| 717 |
#)
|
| 718 |
#schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
|
| 719 |
#if mode == "video-to-video":
|
|
@@ -736,8 +745,54 @@ class VideoService:
|
|
| 736 |
latents = result.images
|
| 737 |
else:
|
| 738 |
latents = result
|
| 739 |
-
print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
|
| 740 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 741 |
# Staging e escrita MP4 (simples: VAE → pixels → MP4)
|
| 742 |
|
| 743 |
latents_cpu = latents.detach().to("cpu", non_blocking=True)
|
|
|
|
| 707 |
single_pass_kwargs = call_kwargs.copy()
|
| 708 |
first_pass_config = self.config.get("first_pass", {}).copy()
|
| 709 |
|
| 710 |
+
single_pass_kwargs.update(
|
| 711 |
+
{
|
| 712 |
+
"skip_final_inference_steps": first_pass_config.get("skip_final_inference_steps"),
|
| 713 |
+
"stg_scale": first_pass_config.get("stg_scale"),
|
| 714 |
+
"stg_scale": first_pass_config.get("stg_scale"),
|
| 715 |
+
"rescaling_scale": first_pass_config.get("rescaling_scale"),
|
| 716 |
+
"guidance_timesteps": first_pass_config.get("guidance_timesteps"),
|
| 717 |
+
"skip_block_list": first_pass_config.get("skip_block_list"),
|
| 718 |
+
"num_inference_steps": first_pass_config.get("num_inference_steps"),
|
| 719 |
+
"skip_final_inference_steps": first_pass_config.get("skip_final_inference_steps"),
|
| 720 |
+
"cfg_star_rescale": first_pass_config.get("cfg_star_rescale"),
|
| 721 |
#"guidance_scale": float(guidance_scale),
|
| 722 |
#"stg_scale": first_pass_config.get("stg_scale"),
|
| 723 |
#"rescaling_scale": first_pass_config.get("rescaling_scale"),
|
| 724 |
#"skip_block_list": first_pass_config.get("skip_block_list"),
|
| 725 |
+
}
|
| 726 |
#)
|
| 727 |
#schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
|
| 728 |
#if mode == "video-to-video":
|
|
|
|
| 745 |
latents = result.images
|
| 746 |
else:
|
| 747 |
latents = result
|
| 748 |
+
print(f"[DEBUG] Latentes (single-pass) first : shape={tuple(latents.shape)}")
|
| 749 |
|
| 750 |
+
single_pass_kwargs = call_kwargs.copy()
|
| 751 |
+
first_pass_config = self.config.get("first_pass", {}).copy()
|
| 752 |
+
|
| 753 |
+
single_pass_kwargs.update(
|
| 754 |
+
{
|
| 755 |
+
"latents" : latents,
|
| 756 |
+
"skip_final_inference_steps": second_pass.get("skip_final_inference_steps"),
|
| 757 |
+
"stg_scale": second_pass.get("stg_scale"),
|
| 758 |
+
"stg_scale": second_pass.get("stg_scale"),
|
| 759 |
+
"rescaling_scale": second_pass.get("rescaling_scale"),
|
| 760 |
+
"guidance_timesteps": second_pass.get("guidance_timesteps"),
|
| 761 |
+
"skip_block_list": second_pass.get("skip_block_list"),
|
| 762 |
+
"num_inference_steps": second_pass.get("num_inference_steps"),
|
| 763 |
+
"skip_final_inference_steps": second_pass.get("skip_final_inference_steps"),
|
| 764 |
+
"cfg_star_rescale": second_pass.get("cfg_star_rescale"),
|
| 765 |
+
#"guidance_scale": float(guidance_scale),
|
| 766 |
+
#"stg_scale": first_pass_config.get("stg_scale"),
|
| 767 |
+
#"rescaling_scale": first_pass_config.get("rescaling_scale"),
|
| 768 |
+
#"skip_block_list": first_pass_config.get("skip_block_list"),
|
| 769 |
+
}
|
| 770 |
+
#)
|
| 771 |
+
#schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
|
| 772 |
+
#if mode == "video-to-video":
|
| 773 |
+
# schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
|
| 774 |
+
#if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
|
| 775 |
+
# single_pass_kwargs["timesteps"] = schedule
|
| 776 |
+
# single_pass_kwargs["guidance_timesteps"] = schedule
|
| 777 |
+
#print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
|
| 778 |
+
|
| 779 |
+
#print("\n[INFO] Executando pipeline de etapa única...")
|
| 780 |
+
t_sp = time.perf_counter()
|
| 781 |
+
ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
|
| 782 |
+
with ctx:
|
| 783 |
+
result = self.pipeline(**single_pass_kwargs)
|
| 784 |
+
print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
|
| 785 |
+
|
| 786 |
+
if hasattr(result, "latents"):
|
| 787 |
+
latents = result.latents
|
| 788 |
+
elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
|
| 789 |
+
latents = result.images
|
| 790 |
+
else:
|
| 791 |
+
latents = result
|
| 792 |
+
print(f"[DEBUG] Latentes (single-pass) segunda: shape={tuple(latents.shape)}")
|
| 793 |
+
|
| 794 |
+
|
| 795 |
+
|
| 796 |
# Staging e escrita MP4 (simples: VAE → pixels → MP4)
|
| 797 |
|
| 798 |
latents_cpu = latents.detach().to("cpu", non_blocking=True)
|