EuuIia commited on
Commit
3a06c45
·
verified ·
1 Parent(s): 5d3df0c

Update api/ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +39 -61
api/ltx_server.py CHANGED
@@ -503,8 +503,6 @@ class VideoService:
503
  torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
504
  self._log_gpu_memory("Início da Geração")
505
 
506
- #ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
507
-
508
  if mode == "image-to-video" and not start_image_filepath:
509
  raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
510
  if mode == "video-to-video" and not input_video_filepath:
@@ -577,82 +575,65 @@ class VideoService:
577
  print(f"[DEBUG] media_items shape={tuple(media.shape)}")
578
 
579
  latents = None
580
- multi_scale_pipeline = None
581
-
582
-
583
  try:
584
  if improve_texture:
585
  if not self.latent_upsampler:
586
  raise ValueError("Upscaler espacial não carregado.")
587
 
 
 
588
  print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
589
 
590
- single_pass_kwargs = call_kwargs.copy()
591
- first_pass_config = self.config.get("first_pass", {})
592
- single_pass_kwargs.update(
593
- {
594
- "guidance_scale": float(guidance_scale),
595
- "stg_scale": first_pass_config.get("stg_scale"),
596
- "rescaling_scale": first_pass_config.get("rescaling_scale"),
597
- "skip_block_list": first_pass_config.get("skip_block_list"),
598
- }
599
- )
600
- schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
601
- if mode == "video-to-video":
602
- schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
603
- if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
604
- single_pass_kwargs["timesteps"] = schedule
605
- single_pass_kwargs["guidance_timesteps"] = schedule
606
- print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
607
-
608
-
609
- # ==================== NOVA LÓGICA DE DIMENSÕES AQUI ====================
610
  downscale_factor = self.config.get("downscale_factor", 2)
611
- original_height = single_pass_kwargs["height"]
612
- original_width = single_pass_kwargs["width"]
613
  divisor = 24
614
 
615
- # Calcula a altura para o primeiro passo, garantindo divisibilidade
616
  target_height_p1 = original_height // downscale_factor
617
- single_pass_kwargs["height"] = round(target_height_p1 / divisor) * divisor
618
 
619
- # Calcula a largura para o primeiro passo, garantindo divisibilidade
620
  target_width_p1 = original_width // downscale_factor
621
- single_pass_kwargs["width"] = round(target_width_p1 / divisor) * divisor
622
 
623
- # Medida de segurança para evitar dimensões zero
624
- if single_pass_kwargs["height"] == 0: first_pass_kwargs["height"] = divisor
625
- if single_pass_kwargs["width"] == 0: first_pass_kwargs["width"] = divisor
626
- # =======================================================================
627
 
628
- print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {single_pass_kwargs['height']}x{single_pass_kwargs['width']}")
629
 
630
-
631
- print("\n[INFO] Executando pipeline promeira etapa...")
632
- t_sp = time.perf_counter()
633
  ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
634
  with ctx:
635
- latents = self.pipeline(**single_pass_kwargs).frames
636
- print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
637
-
638
-
639
- print(f"[DEBUG] Latentes (first_pass_kwargs): shape={tuple(latents.shape)}")
640
 
641
-
 
 
642
 
643
- del single_pass_kwargs
644
  gc.collect()
645
  if self.device == "cuda": torch.cuda.empty_cache()
646
 
647
  # 2. Upscale dos latentes
648
  print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
649
  with ctx:
650
- latents_high_res = self.latent_upsampler(
651
- latents=latents,
652
- output_height=original_height,
653
- output_width=original_width,
654
- output_type="latent"
655
- ).frames
656
 
657
  log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
658
  del latents_low_res
@@ -662,7 +643,10 @@ class VideoService:
662
  # 3. Configurar e executar o segundo passo
663
  print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
664
  second_pass_args = self.config.get("second_pass", {}).copy()
 
665
  second_pass_kwargs = call_kwargs.copy()
 
 
666
 
667
  second_pass_kwargs.update({
668
  "guidance_scale": float(guidance_scale),
@@ -670,18 +654,16 @@ class VideoService:
670
  "rescaling_scale": second_pass_args.get("rescaling_scale"),
671
  "skip_block_list": second_pass_args.get("skip_block_list"),
672
  })
673
- # O segundo passo geralmente usa uma fração dos timesteps totais (ex: 70%)
674
  schedule_p2 = second_pass_args.get("timesteps") or second_pass_args.get("guidance_timesteps")
675
  if schedule_p2:
676
  second_pass_kwargs["timesteps"] = schedule_p2
677
  second_pass_kwargs["guidance_timesteps"] = schedule_p2
678
 
679
- # Adiciona os latentes do upscale como 'latents' iniciais para o segundo passo
680
  second_pass_kwargs["latents"] = latents_high_res
681
 
682
  t_p2 = time.perf_counter()
683
  with ctx:
684
- # Executa a pipeline principal para o segundo passo
685
  second_pass_result = self.pipeline(**second_pass_kwargs)
686
 
687
  latents = second_pass_result.latents if hasattr(second_pass_result, "latents") else second_pass_result
@@ -793,11 +775,7 @@ class VideoService:
793
  del latents
794
  except Exception:
795
  pass
796
- try:
797
- del multi_scale_pipeline
798
- except Exception:
799
- pass
800
-
801
  gc.collect()
802
  try:
803
  if self.device == "cuda":
@@ -815,4 +793,4 @@ class VideoService:
815
  print(f"[DEBUG] finalize() no finally falhou: {e}")
816
 
817
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
818
- video_generation_service = VideoService()
 
503
  torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
504
  self._log_gpu_memory("Início da Geração")
505
 
 
 
506
  if mode == "image-to-video" and not start_image_filepath:
507
  raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
508
  if mode == "video-to-video" and not input_video_filepath:
 
575
  print(f"[DEBUG] media_items shape={tuple(media.shape)}")
576
 
577
  latents = None
578
+
 
 
579
  try:
580
  if improve_texture:
581
  if not self.latent_upsampler:
582
  raise ValueError("Upscaler espacial não carregado.")
583
 
584
+ # --- INÍCIO DA SEPARAÇÃO DOS PASSOS ---
585
+
586
  print("[DEBUG] Multi-escala: Iniciando Passo 1 (geração de latentes base).")
587
 
588
+ # 1. Configurar e executar o primeiro passo
589
+ first_pass_args = self.config.get("first_pass", {}).copy()
590
+ first_pass_kwargs = call_kwargs.copy()
591
+ first_pass_kwargs.update({
592
+ "guidance_scale": float(guidance_scale),
593
+ "stg_scale": first_pass_args.get("stg_scale"),
594
+ "rescaling_scale": first_pass_args.get("rescaling_scale"),
595
+ "skip_block_list": first_pass_args.get("skip_block_list"),
596
+ })
597
+ schedule = first_pass_args.get("timesteps") or first_pass_args.get("guidance_timesteps")
598
+ if schedule:
599
+ first_pass_kwargs["timesteps"] = schedule
600
+ first_pass_kwargs["guidance_timesteps"] = schedule
601
+
602
+ # Reduzir dimensões para o primeiro passo, garantindo divisibilidade por 24
 
 
 
 
 
603
  downscale_factor = self.config.get("downscale_factor", 2)
604
+ original_height = first_pass_kwargs["height"]
605
+ original_width = first_pass_kwargs["width"]
606
  divisor = 24
607
 
 
608
  target_height_p1 = original_height // downscale_factor
609
+ first_pass_kwargs["height"] = round(target_height_p1 / divisor) * divisor
610
 
 
611
  target_width_p1 = original_width // downscale_factor
612
+ first_pass_kwargs["width"] = round(target_width_p1 / divisor) * divisor
613
 
614
+ if first_pass_kwargs["height"] == 0: first_pass_kwargs["height"] = divisor
615
+ if first_pass_kwargs["width"] == 0: first_pass_kwargs["width"] = divisor
 
 
616
 
617
+ print(f"[DEBUG] Passo 1: Dimensões reduzidas e ajustadas para {first_pass_kwargs['height']}x{first_pass_kwargs['width']}")
618
 
619
+ t_p1 = time.perf_counter()
 
 
620
  ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
621
  with ctx:
622
+ first_pass_result = self.pipeline(**first_pass_kwargs)
 
 
 
 
623
 
624
+ latents_low_res = first_pass_result.latents if hasattr(first_pass_result, "latents") else first_pass_result
625
+ print(f"[DEBUG] Passo 1 concluído em {time.perf_counter()-t_p1:.3f}s. Shape dos latentes de baixa resolução: {tuple(latents_low_res.shape)}")
626
+ log_tensor_info(latents_low_res, "Latentes (Passo 1)")
627
 
628
+ del first_pass_result
629
  gc.collect()
630
  if self.device == "cuda": torch.cuda.empty_cache()
631
 
632
  # 2. Upscale dos latentes
633
  print("[DEBUG] Multi-escala: Fazendo upscale dos latentes com latent_upsampler.")
634
  with ctx:
635
+ # Chamada corrigida: posicional, sem argumentos de palavra-chave extras
636
+ latents_high_res = self.latent_upsampler(latents_low_res)
 
 
 
 
637
 
638
  log_tensor_info(latents_high_res, "Latentes (Pós-Upscale)")
639
  del latents_low_res
 
643
  # 3. Configurar e executar o segundo passo
644
  print("[DEBUG] Multi-escala: Iniciando Passo 2 (refinamento em alta resolução).")
645
  second_pass_args = self.config.get("second_pass", {}).copy()
646
+
647
  second_pass_kwargs = call_kwargs.copy()
648
+ second_pass_kwargs["height"] = original_height
649
+ second_pass_kwargs["width"] = original_width
650
 
651
  second_pass_kwargs.update({
652
  "guidance_scale": float(guidance_scale),
 
654
  "rescaling_scale": second_pass_args.get("rescaling_scale"),
655
  "skip_block_list": second_pass_args.get("skip_block_list"),
656
  })
657
+
658
  schedule_p2 = second_pass_args.get("timesteps") or second_pass_args.get("guidance_timesteps")
659
  if schedule_p2:
660
  second_pass_kwargs["timesteps"] = schedule_p2
661
  second_pass_kwargs["guidance_timesteps"] = schedule_p2
662
 
 
663
  second_pass_kwargs["latents"] = latents_high_res
664
 
665
  t_p2 = time.perf_counter()
666
  with ctx:
 
667
  second_pass_result = self.pipeline(**second_pass_kwargs)
668
 
669
  latents = second_pass_result.latents if hasattr(second_pass_result, "latents") else second_pass_result
 
775
  del latents
776
  except Exception:
777
  pass
778
+
 
 
 
 
779
  gc.collect()
780
  try:
781
  if self.device == "cuda":
 
793
  print(f"[DEBUG] finalize() no finally falhou: {e}")
794
 
795
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
796
+ video_generation_service = VideoService()