EuuIia commited on
Commit
c344381
·
verified ·
1 Parent(s): 3a06c45

Upload ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +32 -87
api/ltx_server.py CHANGED
@@ -575,100 +575,41 @@ class VideoService:
575
  print(f"[DEBUG] media_items shape={tuple(media.shape)}")
576
 
577
  latents = None
578
-
 
579
  try:
580
  if improve_texture:
581
  if not self.latent_upsampler:
582
  raise ValueError("Upscaler espacial não carregado.")
583
-
584
- # --- INÍCIO DA SEPARAÇÃO DOS PASSOS ---
585
-
586
- print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
587
-
588
- # 1. Configurar e executar o primeiro passo
589
  first_pass_args = self.config.get("first_pass", {}).copy()
590
- first_pass_kwargs = call_kwargs.copy()
591
- first_pass_kwargs.update({
592
- "guidance_scale": float(guidance_scale),
593
- "stg_scale": first_pass_args.get("stg_scale"),
594
- "rescaling_scale": first_pass_args.get("rescaling_scale"),
595
- "skip_block_list": first_pass_args.get("skip_block_list"),
596
- })
597
- schedule = first_pass_args.get("timesteps") or first_pass_args.get("guidance_timesteps")
598
- if schedule:
599
- first_pass_kwargs["timesteps"] = schedule
600
- first_pass_kwargs["guidance_timesteps"] = schedule
601
-
602
- # Reduzir dimensões para o primeiro passo, garantindo divisibilidade por 24
603
- downscale_factor = self.config.get("downscale_factor", 2)
604
- original_height = first_pass_kwargs["height"]
605
- original_width = first_pass_kwargs["width"]
606
- divisor = 24
607
-
608
- target_height_p1 = original_height // downscale_factor
609
- first_pass_kwargs["height"] = round(target_height_p1 / divisor) * divisor
610
 
611
- target_width_p1 = original_width // downscale_factor
612
- first_pass_kwargs["width"] = round(target_width_p1 / divisor) * divisor
613
-
614
- if first_pass_kwargs["height"] == 0: first_pass_kwargs["height"] = divisor
615
- if first_pass_kwargs["width"] == 0: first_pass_kwargs["width"] = divisor
616
-
617
- print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {first_pass_kwargs['height']}x{first_pass_kwargs['width']}")
618
-
619
- t_p1 = time.perf_counter()
 
620
  ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
621
  with ctx:
622
- first_pass_result = self.pipeline(**first_pass_kwargs)
623
-
624
- latents_low_res = first_pass_result.latents if hasattr(first_pass_result, "latents") else first_pass_result
625
- print(f"[DEBUG] Passo 1 concluído em {time.perf_counter()-t_p1:.3f}s. Shape dos latentes de baixa resolução: {tuple(latents_low_res.shape)}")
626
- log_tensor_info(latents_low_res, "Latentes (Passo 1)")
627
-
628
- del first_pass_result
629
- gc.collect()
630
- if self.device == "cuda": torch.cuda.empty_cache()
631
-
632
- # 2. Upscale dos latentes
633
- print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
634
- with ctx:
635
- # Chamada corrigida: posicional, sem argumentos de palavra-chave extras
636
- latents_high_res = self.latent_upsampler(latents_low_res)
637
 
638
- log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
639
- del latents_low_res
640
- gc.collect()
641
- if self.device == "cuda": torch.cuda.empty_cache()
642
-
643
- # 3. Configurar e executar o segundo passo
644
- print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
645
- second_pass_args = self.config.get("second_pass", {}).copy()
646
-
647
- second_pass_kwargs = call_kwargs.copy()
648
- second_pass_kwargs["height"] = original_height
649
- second_pass_kwargs["width"] = original_width
650
-
651
- second_pass_kwargs.update({
652
- "guidance_scale": float(guidance_scale),
653
- "stg_scale": second_pass_args.get("stg_scale"),
654
- "rescaling_scale": second_pass_args.get("rescaling_scale"),
655
- "skip_block_list": second_pass_args.get("skip_block_list"),
656
- })
657
-
658
- schedule_p2 = second_pass_args.get("timesteps") or second_pass_args.get("guidance_timesteps")
659
- if schedule_p2:
660
- second_pass_kwargs["timesteps"] = schedule_p2
661
- second_pass_kwargs["guidance_timesteps"] = schedule_p2
662
-
663
- second_pass_kwargs["latents"] = latents_high_res
664
-
665
- t_p2 = time.perf_counter()
666
- with ctx:
667
- second_pass_result = self.pipeline(**second_pass_kwargs)
668
-
669
- latents = second_pass_result.latents if hasattr(second_pass_result, "latents") else second_pass_result
670
- print(f"[DEBUG] Passo 2 concluído em {time.perf_counter()-t_p2:.3f}s. Shape dos latentes finais: {tuple(latents.shape)}")
671
-
672
  else:
673
  single_pass_kwargs = call_kwargs.copy()
674
  first_pass_config = self.config.get("first_pass", {})
@@ -775,7 +716,11 @@ class VideoService:
775
  del latents
776
  except Exception:
777
  pass
778
-
 
 
 
 
779
  gc.collect()
780
  try:
781
  if self.device == "cuda":
@@ -793,4 +738,4 @@ class VideoService:
793
  print(f"[DEBUG] finalize() no finally falhou: {e}")
794
 
795
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
796
- video_generation_service = VideoService()
 
575
  print(f"[DEBUG] media_items shape={tuple(media.shape)}")
576
 
577
  latents = None
578
+ multi_scale_pipeline = None
579
+
580
  try:
581
  if improve_texture:
582
  if not self.latent_upsampler:
583
  raise ValueError("Upscaler espacial não carregado.")
584
+ print("[DEBUG] Multi-escala: construindo pipeline...")
585
+ multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
 
 
 
 
586
  first_pass_args = self.config.get("first_pass", {}).copy()
587
+ first_pass_args["guidance_scale"] = float(guidance_scale)
588
+ second_pass_args = self.config.get("second_pass", {}).copy()
589
+ second_pass_args["guidance_scale"] = float(guidance_scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
 
591
+ multi_scale_call_kwargs = call_kwargs.copy()
592
+ multi_scale_call_kwargs.update(
593
+ {
594
+ "downscale_factor": self.config["downscale_factor"],
595
+ "first_pass": first_pass_args,
596
+ "second_pass": second_pass_args,
597
+ }
598
+ )
599
+ print("[DEBUG] Chamando multi_scale_pipeline...")
600
+ t_ms = time.perf_counter()
601
  ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
602
  with ctx:
603
+ result = multi_scale_pipeline(**multi_scale_call_kwargs)
604
+ print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
+ if hasattr(result, "latents"):
607
+ latents = result.latents
608
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
609
+ latents = result.images
610
+ else:
611
+ latents = result
612
+ print(f"[DEBUG] Latentes (multi-escala): shape={tuple(latents.shape)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
  else:
614
  single_pass_kwargs = call_kwargs.copy()
615
  first_pass_config = self.config.get("first_pass", {})
 
716
  del latents
717
  except Exception:
718
  pass
719
+ try:
720
+ del multi_scale_pipeline
721
+ except Exception:
722
+ pass
723
+
724
  gc.collect()
725
  try:
726
  if self.device == "cuda":
 
738
  print(f"[DEBUG] finalize() no finally falhou: {e}")
739
 
740
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
741
+ video_generation_service = VideoService()