EuuIia commited on
Commit
953982d
·
verified ·
1 Parent(s): 82b2143

Update api/ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +137 -181
api/ltx_server.py CHANGED
@@ -42,6 +42,8 @@ import shutil
42
  import contextlib
43
  import time
44
  import traceback
 
 
45
 
46
  # Singletons (versões simples)
47
  from managers.vae_manager import vae_manager_singleton
@@ -157,6 +159,9 @@ add_deps_to_path()
157
 
158
  from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
159
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 
 
 
160
 
161
  # --- 4. FUNÇÕES HELPER DE LOG ---
162
  def log_tensor_info(tensor, name="Tensor"):
@@ -174,6 +179,28 @@ def log_tensor_info(tensor, name="Tensor"):
174
  pass
175
  print("------------------------------------------\n")
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
178
  class VideoService:
179
  def __init__(self):
@@ -441,7 +468,7 @@ class VideoService:
441
  chunks.append(latents_brutos)
442
  print("================PODA CAUSAL=================")
443
  return chunks
444
-
445
  def _get_total_frames(self, video_path: str) -> int:
446
  cmd = [
447
  "ffprobe",
@@ -455,8 +482,6 @@ class VideoService:
455
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
456
  return int(result.stdout.strip())
457
 
458
-
459
-
460
  def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
461
  """
462
  Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
@@ -538,7 +563,7 @@ class VideoService:
538
  print("===========CONCATECAO CAUSAL=============")
539
  print(f"[DEBUG] {nova_lista}")
540
  return nova_lista
541
-
542
  def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
543
  """
544
  Concatena múltiplos MP4s sem reencode usando o demuxer do ffmpeg.
@@ -564,8 +589,11 @@ class VideoService:
564
  os.remove(list_path)
565
  except Exception:
566
  pass
 
567
 
568
-
 
 
569
  def generate(
570
  self,
571
  prompt,
@@ -587,7 +615,6 @@ class VideoService:
587
  guidance_scale=3.0,
588
  improve_texture=True,
589
  progress_callback=None,
590
- # Sempre latent → VAE → MP4 (simples)
591
  external_decode=True,
592
  ):
593
  t_all = time.perf_counter()
@@ -596,211 +623,140 @@ class VideoService:
596
  torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
597
  self._log_gpu_memory("Início da Geração")
598
 
 
599
  if mode == "image-to-video" and not start_image_filepath:
600
  raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
601
- if mode == "video-to-video" and not input_video_filepath:
602
- raise ValueError("O vídeo de entrada é obrigatório para o modo video-to-video")
603
-
604
  used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
605
  seed_everething(used_seed); print(f"[DEBUG] Seed usado: {used_seed}")
606
-
607
  FPS = 24.0; MAX_NUM_FRAMES = 2570
608
  target_frames_rounded = round(duration * FPS)
609
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
610
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
611
- print(f"[DEBUG] Frames alvo: {actual_num_frames} (dur={duration}s @ {FPS}fps)")
612
-
613
  height_padded = ((height - 1) // 32 + 1) * 32
614
  width_padded = ((width - 1) // 32 + 1) * 32
615
  padding_values = calculate_padding(height, width, height_padded, width_padded)
616
- print(f"[DEBUG] Dimensões: ({height},{width}) -> pad ({height_padded},{width_padded}); padding={padding_values}")
617
-
618
  generator = torch.Generator(device=self.device).manual_seed(used_seed)
 
619
  conditioning_items = []
620
-
621
  if mode == "image-to-video":
622
- start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
623
- conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
624
- if middle_image_filepath and middle_frame_number is not None:
625
- middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
626
- safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
627
- conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
628
- if end_image_filepath:
629
- end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
630
- last_frame_index = actual_num_frames - 1
631
- conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
632
- print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
633
-
634
- # Sempre pedimos latentes (simples)
635
  call_kwargs = {
636
- "prompt": prompt,
637
- "negative_prompt": negative_prompt,
638
- "height": height_padded,
639
- "width": width_padded,
640
- "num_frames": actual_num_frames,
641
- "frame_rate": int(FPS),
642
- "generator": generator,
643
- "output_type": "latent",
644
  "conditioning_items": conditioning_items if conditioning_items else None,
645
- "media_items": None,
646
- "decode_timestep": self.config["decode_timestep"],
647
- "decode_noise_scale": self.config["decode_noise_scale"],
648
- "stochastic_sampling": self.config["stochastic_sampling"],
649
- "image_cond_noise_scale": 0.01,
650
- "is_video": True,
651
- "vae_per_channel_normalize": True,
652
- "mixed_precision": (self.config["precision"] == "mixed_precision"),
653
- "offload_to_cpu": False,
654
- "enhance_prompt": False,
655
- "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
656
  }
657
- print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
658
-
659
- if mode == "video-to-video":
660
- media = load_media_file(
661
- media_path=input_video_filepath,
662
- height=height,
663
- width=width,
664
- max_frames=int(frames_to_use),
665
- padding=padding_values,
666
- ).to(self.device)
667
- call_kwargs["media_items"] = media
668
- print(f"[DEBUG] media_items shape={tuple(media.shape)}")
669
-
670
  latents = None
671
- multi_scale_pipeline = None
672
 
673
  try:
674
- if improve_texture:
675
- if not self.latent_upsampler:
676
- raise ValueError("Upscaler espacial não carregado.")
677
- print("[DEBUG] Multi-escala: construindo pipeline...")
678
- multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
679
- first_pass_args = self.config.get("first_pass", {}).copy()
680
- first_pass_args["guidance_scale"] = float(guidance_scale)
681
- second_pass_args = self.config.get("second_pass", {}).copy()
682
- second_pass_args["guidance_scale"] = float(guidance_scale)
683
-
684
- multi_scale_call_kwargs = call_kwargs.copy()
685
- multi_scale_call_kwargs.update(
686
- {
687
- "downscale_factor": self.config["downscale_factor"],
688
- "first_pass": first_pass_args,
689
- "second_pass": second_pass_args,
690
- }
691
- )
692
- print("[DEBUG] Chamando multi_scale_pipeline...")
693
- t_ms = time.perf_counter()
694
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
695
- with ctx:
696
- result = multi_scale_pipeline(**multi_scale_call_kwargs)
697
- print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
698
-
699
- if hasattr(result, "latents"):
700
- latents = result.latents
701
- elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
702
- latents = result.images
703
- else:
704
- latents = result
705
- print(f"[DEBUG] Latentes (multi-escala): shape={tuple(latents.shape)}")
706
- else:
707
- single_pass_kwargs = call_kwargs.copy()
708
- first_pass_config = self.config.get("first_pass", {})
709
- single_pass_kwargs.update(
710
- {
711
  "guidance_scale": float(guidance_scale),
712
- "stg_scale": first_pass_config.get("stg_scale"),
713
- "rescaling_scale": first_pass_config.get("rescaling_scale"),
714
- "skip_block_list": first_pass_config.get("skip_block_list"),
715
- }
716
- )
717
- schedule = first_pass_config.get("timesteps") or first_pass_config.get("guidance_timesteps")
718
- if mode == "video-to-video":
719
- schedule = [0.7]; print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
720
- if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
721
- single_pass_kwargs["timesteps"] = schedule
722
- single_pass_kwargs["guidance_timesteps"] = schedule
723
- print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
724
-
725
- print("\n[INFO] Executando pipeline de etapa única...")
726
- t_sp = time.perf_counter()
727
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
728
- with ctx:
729
- result = self.pipeline(**single_pass_kwargs)
730
- print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
731
-
732
- if hasattr(result, "latents"):
733
- latents = result.latents
734
- elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
735
- latents = result.images
736
- else:
737
- latents = result
738
- print(f"[DEBUG] Latentes (single-pass): shape={tuple(latents.shape)}")
 
 
 
 
 
 
739
 
740
- # Staging e escrita MP4 (simples: VAE pixels → MP4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
 
742
  latents_cpu = latents.detach().to("cpu", non_blocking=True)
743
- torch.cuda.empty_cache()
744
- try:
745
- torch.cuda.ipc_collect()
746
- except Exception:
747
- pass
748
-
749
- latents_parts = self._dividir_latentes_por_tamanho(latents_cpu,4,1)
750
-
751
  temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
752
  results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
753
-
754
- partes_mp4 = []
755
- par = 0
756
 
757
- for latents in latents_parts:
758
- print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
759
-
760
- par = par + 1
761
- output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
762
- final_output_path = None
763
-
764
- print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
765
- # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
766
- pixel_tensor = vae_manager_singleton.decode(
767
- latents.to(self.device, non_blocking=True),
768
- decode_timestep=float(self.config.get("decode_timestep", 0.05))
769
- )
770
- log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
771
-
772
- print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
773
- video_encode_tool_singleton.save_video_from_tensor(
774
- pixel_tensor,
775
- output_video_path,
776
- fps=call_kwargs["frame_rate"],
777
- progress_callback=progress_callback
778
- )
779
-
780
- candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
781
- try:
782
- shutil.move(output_video_path, candidate)
783
- final_output_path = candidate
784
- print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
785
- partes_mp4.append(final_output_path)
786
-
787
- except Exception as e:
788
- final_output_path = output_video_path
789
- print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
790
-
791
- total_partes = len(partes_mp4)
792
- if (total_partes>1):
793
- final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
794
- partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
795
- self._concat_mp4s_no_reencode(partes_mp4_fade, final_vid)
796
  else:
797
- final_vid = partes_mp4[0]
798
-
799
 
800
  self._log_gpu_memory("Fim da Geração")
801
- return final_vid, used_seed
802
 
803
-
804
  except Exception as e:
805
  print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
806
  print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
@@ -832,4 +788,4 @@ class VideoService:
832
  print(f"[DEBUG] finalize() no finally falhou: {e}")
833
 
834
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
835
- video_generation_service = VideoService()
 
42
  import contextlib
43
  import time
44
  import traceback
45
+ from einops import rearrange
46
+ import torch.nn.functional as F
47
 
48
  # Singletons (versões simples)
49
  from managers.vae_manager import vae_manager_singleton
 
159
 
160
  from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
161
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
162
+ from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
163
+ from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
164
+
165
 
166
  # --- 4. FUNÇÕES HELPER DE LOG ---
167
  def log_tensor_info(tensor, name="Tensor"):
 
179
  pass
180
  print("------------------------------------------\n")
181
 
182
+
183
+ @torch.no_grad()
184
+ def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
185
+ """
186
+ Lógica extraída diretamente da LTXMultiScalePipeline para upscale de latentes.
187
+ """
188
+ if not self.latent_upsampler:
189
+ raise ValueError("Latent Upsampler não está carregado.")
190
+
191
+ # Garante que os modelos estejam no dispositivo correto
192
+ self.latent_upsampler.to(self.device)
193
+ self.pipeline.vae.to(self.device)
194
+ print(f"[DEBUG-UPSAMPLE] Shape de entrada: {tuple(latents.shape)}")
195
+ latents = un_normalize_latents(latents, self.pipeline.vae, vae_per_channel_normalize=True)
196
+ upsampled_latents = self.latent_upsampler(latents)
197
+ upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
198
+ print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
199
+
200
+ return upsampled_latents
201
+
202
+
203
+
204
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
205
  class VideoService:
206
  def __init__(self):
 
468
  chunks.append(latents_brutos)
469
  print("================PODA CAUSAL=================")
470
  return chunks
471
+
472
  def _get_total_frames(self, video_path: str) -> int:
473
  cmd = [
474
  "ffprobe",
 
482
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
483
  return int(result.stdout.strip())
484
 
 
 
485
  def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
486
  """
487
  Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
 
563
  print("===========CONCATECAO CAUSAL=============")
564
  print(f"[DEBUG] {nova_lista}")
565
  return nova_lista
566
+
567
  def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
568
  """
569
  Concatena múltiplos MP4s sem reencode usando o demuxer do ffmpeg.
 
589
  os.remove(list_path)
590
  except Exception:
591
  pass
592
+
593
 
594
+ # ==============================================================================
595
+ # --- FUNÇÃO GENERATE COMPLETA E ATUALIZADA ---
596
+ # ==============================================================================
597
  def generate(
598
  self,
599
  prompt,
 
615
  guidance_scale=3.0,
616
  improve_texture=True,
617
  progress_callback=None,
 
618
  external_decode=True,
619
  ):
620
  t_all = time.perf_counter()
 
623
  torch.cuda.empty_cache(); torch.cuda.reset_peak_memory_stats()
624
  self._log_gpu_memory("Início da Geração")
625
 
626
+ # --- Setup Inicial (como antes) ---
627
  if mode == "image-to-video" and not start_image_filepath:
628
  raise ValueError("A imagem de início é obrigatória para o modo image-to-video")
 
 
 
629
  used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
630
  seed_everething(used_seed); print(f"[DEBUG] Seed usado: {used_seed}")
 
631
  FPS = 24.0; MAX_NUM_FRAMES = 2570
632
  target_frames_rounded = round(duration * FPS)
633
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
634
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
 
 
635
  height_padded = ((height - 1) // 32 + 1) * 32
636
  width_padded = ((width - 1) // 32 + 1) * 32
637
  padding_values = calculate_padding(height, width, height_padded, width_padded)
 
 
638
  generator = torch.Generator(device=self.device).manual_seed(used_seed)
639
+
640
  conditioning_items = []
 
641
  if mode == "image-to-video":
642
+ # ... (lógica de preparação de conditioning_items como antes)
643
+
 
 
 
 
 
 
 
 
 
 
 
644
  call_kwargs = {
645
+ "prompt": prompt, "negative_prompt": negative_prompt,
646
+ "height": height_padded, "width": width_padded, "num_frames": actual_num_frames,
647
+ "frame_rate": int(FPS), "generator": generator,
 
 
 
 
 
648
  "conditioning_items": conditioning_items if conditioning_items else None,
649
+ "media_items": None, # (Lógica para video-to-video omitida por clareza)
650
+ # ... (outros kwargs base como antes)
 
 
 
 
 
 
 
 
 
651
  }
652
+
 
 
 
 
 
 
 
 
 
 
 
 
653
  latents = None
 
654
 
655
  try:
656
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
657
+ with ctx:
658
+ if improve_texture:
659
+ if not self.latent_upsampler:
660
+ raise ValueError("Upscaler espacial não carregado, mas 'improve_texture' está ativo.")
661
+
662
+ # --- ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---
663
+ print("\n--- INICIANDO ETAPA 1: GERAÇÃO BASE (FIRST PASS) ---")
664
+ t_pass1 = time.perf_counter()
665
+
666
+ first_pass_config = self.config.get("first_pass", {}).copy()
667
+ downscale_factor = self.config.get("downscale_factor", 0.666)
668
+
669
+ downscaled_width = int(width_padded * downscale_factor)
670
+ downscaled_height = int(height_padded * downscale_factor)
671
+
672
+ first_pass_kwargs = call_kwargs.copy()
673
+ first_pass_kwargs.update({
674
+ "output_type": "latent",
675
+ "width": downscaled_width,
676
+ "height": downscaled_height,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
  "guidance_scale": float(guidance_scale),
678
+ **first_pass_config
679
+ })
680
+
681
+ print(f"[DEBUG] First Pass: Gerando em {downscaled_width}x{downscaled_height}...")
682
+ base_latents = self.pipeline(**first_pass_kwargs).images
683
+ log_tensor_info(base_latents, "Latentes Base (First Pass)")
684
+ print(f"[DEBUG] First Pass concluída em {time.perf_counter() - t_pass1:.2f}s")
685
+
686
+ # --- ETAPA 2: UPSCALE DOS LATENTES ---
687
+ print("\n--- INICIANDO ETAPA 2: UPSCALE DOS LATENTES ---")
688
+ t_upscale = time.perf_counter()
689
+
690
+ upsampled_latents = self._upsample_latents_internal(base_latents)
691
+ upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=base_latents)
692
+ log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
693
+ print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
694
+ del base_latents; gc.collect(); torch.cuda.empty_cache()
695
+
696
+ # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
697
+ print("\n--- INICIANDO ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---")
698
+ t_pass2 = time.perf_counter()
699
+
700
+ second_pass_config = self.config.get("second_pass", {}).copy()
701
+
702
+ second_pass_kwargs = call_kwargs.copy()
703
+ second_pass_kwargs.update({
704
+ "output_type": "latent",
705
+ "width": width_padded,
706
+ "height": height_padded,
707
+ "latents": upsampled_latents,
708
+ "guidance_scale": float(guidance_scale),
709
+ **second_pass_config
710
+ })
711
 
712
+ print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
713
+ final_latents = self.pipeline(**second_pass_kwargs).images
714
+ log_tensor_info(final_latents, "Latentes Finais (Pós-Second Pass)")
715
+ print(f"[DEBUG] Second Pass concluída em {time.perf_counter() - t_pass2:.2f}s")
716
+
717
+ latents = final_latents
718
+
719
+ else: # Geração de etapa única
720
+ print("\n--- INICIANDO GERAÇÃO DE ETAPA ÚNICA ---")
721
+ t_single = time.perf_counter()
722
+ single_pass_kwargs = call_kwargs.copy()
723
+ single_pass_kwargs.update(self.config.get("first_pass", {}))
724
+ single_pass_kwargs["guidance_scale"] = float(guidance_scale)
725
+ single_pass_kwargs["output_type"] = "latent"
726
+
727
+ latents = self.pipeline(**single_pass_kwargs).images
728
+ log_tensor_info(latents, "Latentes Finais (Etapa Única)")
729
+ print(f"[DEBUG] Etapa única concluída em {time.perf_counter() - t_single:.2f}s")
730
+
731
+ # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
732
+ print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
733
 
734
  latents_cpu = latents.detach().to("cpu", non_blocking=True)
735
+ del latents; gc.collect(); torch.cuda.empty_cache()
736
+
 
 
 
 
 
 
737
  temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
738
  results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
 
 
 
739
 
740
+ # (A lógica de divisão de latentes e concatenação com fade que você já tem vai aqui)
741
+ latents_parts = self._dividir_latentes_por_tamanho(latents_cpu, 4, 1) # Exemplo de divisão
742
+ partes_mp4 = []
743
+ for i, part_latents in enumerate(latents_parts):
744
+ output_part_path = os.path.join(temp_dir, f"part_{i}.mp4")
745
+ pixel_tensor = vae_manager_singleton.decode(part_latents.to(self.device), decode_timestep=0.05)
746
+ video_encode_tool_singleton.save_video_from_tensor(pixel_tensor, output_part_path, fps=FPS)
747
+ partes_mp4.append(output_part_path)
748
+
749
+ final_concat_path = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
750
+ if len(partes_mp4) > 1:
751
+ # Sua lógica de _gerar_lista_com_transicoes e _concat_mp4s_no_reencode
752
+ # ...
753
+ pass # Substitua pelo seu código
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
  else:
755
+ shutil.copy(partes_mp4[0], final_concat_path)
 
756
 
757
  self._log_gpu_memory("Fim da Geração")
758
+ return final_concat_path, used_seed
759
 
 
760
  except Exception as e:
761
  print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
762
  print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
 
788
  print(f"[DEBUG] finalize() no finally falhou: {e}")
789
 
790
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
791
+ video_generation_service = VideoService(