Eueuiaa commited on
Commit
37f366a
·
verified ·
1 Parent(s): c47500b

Update api/ltx_server_refactored.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored.py +123 -1
api/ltx_server_refactored.py CHANGED
@@ -444,8 +444,130 @@ class VideoService:
444
  downscale_factor = self.config.get("downscale_factor", 0.6666666)
445
  vae_scale_factor = self.pipeline.vae_scale_factor
446
 
 
447
  # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
448
  # Replica a fórmula da LTXMultiScalePipeline
449
  x_width = int(width_padded * downscale_factor)
450
  downscaled_width = x_width - (x_width % vae_scale_factor)
451
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  downscale_factor = self.config.get("downscale_factor", 0.6666666)
445
  vae_scale_factor = self.pipeline.vae_scale_factor
446
 
447
+
448
  # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
449
  # Replica a fórmula da LTXMultiScalePipeline
450
  x_width = int(width_padded * downscale_factor)
451
  downscaled_width = x_width - (x_width % vae_scale_factor)
452
+ x_height = int(height_padded * downscale_factor)
453
+ downscaled_height = x_height - (x_height % vae_scale_factor)
454
+ print(f"[DEBUG] First Pass Dims: Original Pad ({width_padded}x{height_padded}) -> Downscaled ({downscaled_width}x{downscaled_height})")
455
+ # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
456
+
457
+ first_pass_kwargs = {
458
+ "prompt": prompt, "negative_prompt": negative_prompt, "height": downscaled_height, "width": downscaled_width,
459
+ "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "latent",
460
+ "conditioning_items": conditioning_items, "guidance_scale": float(guidance_scale),
461
+ **(self.config.get("first_pass", {}))
462
+ }
463
+
464
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
465
+ latents = self.pipeline(**first_pass_kwargs).images
466
+ log_tensor_info(latents, "Latentes Low-Res Gerados")
467
+
468
+ pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
469
+ video_path = self._save_and_log_video(pixel_tensor, "low_res_video", FPS, temp_dir, results_dir, used_seed)
470
+ del pixel_tensor
471
+
472
+ latents_cpu = latents.detach().to("cpu")
473
+ tensor_path = os.path.join(results_dir, f"latents_low_res_{used_seed}.pt")
474
+ torch.save(latents_cpu, tensor_path)
475
+ print(f"[DEBUG] Tensor latente de baixa resolução salvo em: {tensor_path}")
476
+
477
+ self._log_gpu_memory("Fim da Geração Low-Res")
478
+ return video_path, tensor_path, used_seed
479
+
480
+ def generate_upscale_denoise(self, latents_path, prompt, negative_prompt, guidance_scale, seed):
481
+ print("\n--- INICIANDO ETAPA 2: UPSCALE E REFINAMENTO ---")
482
+ self._log_gpu_memory("Início do Upscale/Denoise")
483
+
484
+ used_seed = random.randint(0, 2**32 - 1) if seed is None else int(seed)
485
+ seed_everething(used_seed)
486
+
487
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_up_"); self._register_tmp_dir(temp_dir)
488
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
489
+
490
+ latents_low = torch.load(latents_path).to(self.device)
491
+ log_tensor_info(latents_low, "Latentes Low-Res Carregados")
492
+
493
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
494
+ upsampled_latents = self._upsample_latents_internal(latents_low)
495
+ upsampled_latents = adain_filter_latent(latents=upsampled_latents, reference_latents=latents_low)
496
+ del latents_low; torch.cuda.empty_cache()
497
+
498
+ total_frames = upsampled_latents.shape[2]
499
+ mid_point = total_frames // 2
500
+ chunk1 = upsampled_latents[:, :, :mid_point, :, :]
501
+ chunk2 = upsampled_latents[:, :, mid_point:, :, :]
502
+
503
+ final_latents_list = []
504
+
505
+ for i, chunk in enumerate([chunk1, chunk2]):
506
+ if chunk.shape[2] == 0: continue
507
+ print(f" - Refinando chunk {i+1}/{2} com {chunk.shape[2]} frames")
508
+ second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
509
+ second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
510
+
511
+ second_pass_kwargs = {
512
+ "prompt": prompt, "negative_prompt": negative_prompt, "height": second_pass_height, "width": second_pass_width,
513
+ "num_frames": chunk.shape[2], "latents": chunk, "guidance_scale": float(guidance_scale),
514
+ "output_type": "latent", "generator": torch.Generator(device=self.device).manual_seed(used_seed),
515
+ **(self.config.get("second_pass", {}))
516
+ }
517
+
518
+ refined_chunk = self.pipeline(**second_pass_kwargs).images
519
+ final_latents_list.append(refined_chunk.detach().clone())
520
+
521
+ del upsampled_latents, chunk1, chunk2; torch.cuda.empty_cache()
522
+
523
+ final_latents = torch.cat(final_latents_list, dim=2)
524
+ log_tensor_info(final_latents, "Latentes Upscaled/Refinados Finais")
525
+
526
+ latents_cpu = final_latents.detach().to("cpu")
527
+ tensor_path = os.path.join(results_dir, f"latents_refined_{used_seed}.pt")
528
+ torch.save(latents_cpu, tensor_path)
529
+
530
+ pixel_tensor = vae_manager_singleton.decode(final_latents, decode_timestep=float(self.config.get("decode_timestep", 0.05)))
531
+ video_path = self._save_and_log_video(pixel_tensor, "refined_video", 24.0, temp_dir, results_dir, used_seed)
532
+ del pixel_tensor, final_latents
533
+
534
+ self._log_gpu_memory("Fim do Upscale/Denoise")
535
+ return video_path, tensor_path
536
+
537
+ def encode_mp4(self, latents_path: str, fps: int = 24):
538
+ print("\n--- INICIANDO ETAPA 3: DECODIFICAÇÃO FINAL ---")
539
+ self._log_gpu_memory("Início do Encode MP4")
540
+
541
+ latents = torch.load(latents_path)
542
+ seed = random.randint(0, 99999)
543
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_enc_"); self._register_tmp_dir(temp_dir)
544
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
545
+
546
+ total_frames = latents.shape[2]
547
+ mid_point = total_frames // 2
548
+ chunk1_latents = latents[:, :, :mid_point, :, :]
549
+ chunk2_latents = latents[:, :, mid_point:, :, :]
550
+
551
+ video_parts = []
552
+ with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype, enabled=self.device == 'cuda'):
553
+ for i, chunk in enumerate([chunk1_latents, chunk2_latents]):
554
+ if chunk.shape[2] == 0: continue
555
+ print(f" - Decodificando chunk {i+1}/{2}")
556
+ pixel_chunk = vae_manager_singleton.decode(chunk.to(self.device), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
557
+
558
+ part_path = os.path.join(temp_dir, f"part_{i}.mp4")
559
+ video_encode_tool_singleton.save_video_from_tensor(pixel_chunk, part_path, fps=fps)
560
+ video_parts.append(part_path)
561
+ del pixel_chunk; torch.cuda.empty_cache()
562
+
563
+ final_video_path = os.path.join(results_dir, f"final_concatenated_{seed}.mp4")
564
+ self._concat_mp4s_no_reencode(video_parts, final_video_path)
565
+
566
+ print(f"Encode final concluído: {final_video_path}")
567
+ self._log_gpu_memory("Fim do Encode MP4")
568
+ return final_video_path
569
+
570
+ # --- INSTANCIAÇÃO DO SERVIÇO ---
571
+ print("Criando instância do VideoService. O carregamento do modelo começará agora...")
572
+ video_generation_service = VideoService()
573
+ print("Instância do VideoService pronta para uso.")