Eueuiaa commited on
Commit
ab2fc5d
·
verified ·
1 Parent(s): 9fddc3b

Update api/ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +126 -263
api/ltx_server.py CHANGED
@@ -1,27 +1,19 @@
1
  # ltx_server.py — VideoService (beta 1.1)
2
  # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
3
  # Ignora UserWarning/FutureWarning e injeta VAE no manager com dtype/device corretos.
4
-
5
  # --- 0. WARNINGS E AMBIENTE ---
 
6
  import warnings
7
  warnings.filterwarnings("ignore", category=UserWarning)
8
  warnings.filterwarnings("ignore", category=FutureWarning)
9
  warnings.filterwarnings("ignore", message=".*")
10
-
11
  from huggingface_hub import logging
12
-
13
  logging.set_verbosity_error()
14
  logging.set_verbosity_warning()
15
  logging.set_verbosity_info()
16
  logging.set_verbosity_debug()
17
-
18
-
19
  LTXV_DEBUG=1
20
  LTXV_FRAME_LOG_EVERY=8
21
-
22
-
23
-
24
- # --- 1. IMPORTAÇÕES ---
25
  import os, subprocess, shlex, tempfile
26
  import torch
27
  import json
@@ -44,12 +36,30 @@ import time
44
  import traceback
45
  from einops import rearrange
46
  import torch.nn.functional as F
47
-
48
- # Singletons (versões simples)
49
  from managers.vae_manager import vae_manager_singleton
50
  from tools.video_encode_tool import video_encode_tool_singleton
51
-
52
- # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
54
  try:
55
  import psutil
@@ -83,7 +93,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
83
  return results
84
  except Exception:
85
  return []
86
-
87
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
88
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
89
  try:
@@ -107,9 +116,6 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
107
  except Exception:
108
  continue
109
  return results
110
-
111
-
112
-
113
  def calculate_new_dimensions(orig_w, orig_h, divisor=8):
114
  """
115
  Calcula novas dimensões mantendo a proporção, garantindo que ambos os
@@ -143,8 +149,6 @@ def calculate_new_dimensions(orig_w, orig_h, divisor=8):
143
 
144
  print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
145
  return final_h, final_w # Retorna (altura, largura)
146
-
147
-
148
  def handle_media_upload_for_dims(filepath, current_h, current_w):
149
  """
150
  Esta função agora usará o novo cálculo robusto.
@@ -168,8 +172,6 @@ def handle_media_upload_for_dims(filepath, current_h, current_w):
168
  except Exception as e:
169
  print(f"Erro ao processar mídia para dimensões: {e}")
170
  return gr.update(value=current_h), gr.update(value=current_w)
171
-
172
-
173
  def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
174
  if not processes:
175
  return " - Processos ativos: (nenhum)\n"
@@ -180,52 +182,6 @@ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
180
  used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
181
  lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
182
  return "\n".join(lines) + "\n"
183
-
184
- def run_setup():
185
- setup_script_path = "setup.py"
186
- if not os.path.exists(setup_script_path):
187
- print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
188
- return
189
- try:
190
- print("[DEBUG] Executando setup.py para dependências...")
191
- subprocess.run([sys.executable, setup_script_path], check=True)
192
- print("[DEBUG] Setup concluído com sucesso.")
193
- except subprocess.CalledProcessError as e:
194
- print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
195
- sys.exit(1)
196
-
197
- from api.ltx.inference import (
198
- create_ltx_video_pipeline,
199
- create_latent_upsampler,
200
- load_image_to_tensor_with_resize_and_crop,
201
- seed_everething,
202
- calculate_padding,
203
- load_media_file,
204
- )
205
-
206
- DEPS_DIR = Path("/data")
207
- LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
208
- if not LTX_VIDEO_REPO_DIR.exists():
209
- print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
210
- run_setup()
211
-
212
- def add_deps_to_path():
213
- repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
214
- if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
215
- sys.path.insert(0, repo_path)
216
- print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
217
-
218
- add_deps_to_path()
219
-
220
- # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
221
-
222
- from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
223
- from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
224
- from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
225
- from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
226
-
227
-
228
- # --- 4. FUNÇÕES HELPER DE LOG ---
229
  def log_tensor_info(tensor, name="Tensor"):
230
  if not isinstance(tensor, torch.Tensor):
231
  print(f"\n[INFO] '{name}' não é tensor.")
@@ -240,12 +196,19 @@ def log_tensor_info(tensor, name="Tensor"):
240
  except Exception:
241
  pass
242
  print("------------------------------------------\n")
243
-
244
-
245
-
246
-
247
-
248
- # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
 
 
 
 
 
 
 
249
  class VideoService:
250
  def __init__(self):
251
  t0 = time.perf_counter()
@@ -366,57 +329,31 @@ class VideoService:
366
  return yaml.safe_load(file)
367
 
368
  def _load_models(self):
369
- """
370
- Carrega os modelos de forma inteligente:
371
- 1. Tenta resolver o caminho do cache local (rápido, sem rede).
372
- 2. Se o arquivo não for encontrado localmente, baixa como fallback.
373
- Garante que o serviço possa iniciar mesmo que o setup.py não tenha sido executado.
374
- """
375
  t0 = time.perf_counter()
376
  LTX_REPO = "Lightricks/LTX-Video"
377
-
378
- print("[DEBUG] Resolvendo caminhos dos modelos de forma inteligente...")
379
-
380
- # --- Função Auxiliar para Carregamento Inteligente ---
381
- def get_or_download_model(repo_id, filename, description):
382
- try:
383
- # hf_hub_download é a ferramenta certa aqui. Ela verifica o cache PRIMEIRO.
384
- # Se o arquivo estiver no cache, retorna o caminho instantaneamente (após uma verificação rápida de metadados).
385
- # Se não estiver no cache, ela o baixa.
386
- print(f"[DEBUG] Verificando {description}: {filename}...")
387
- model_path = hf_hub_download(
388
- repo_id=repo_id,
389
- filename=filename,
390
- # Forçar o uso de um cache específico se necessário
391
- cache_dir=os.getenv("HF_HOME_CACHE"),
392
- token=os.getenv("HF_TOKEN")
393
- )
394
- print(f"[DEBUG] Caminho do {description} resolvido com sucesso.")
395
- return model_path
396
- except Exception as e:
397
- print("\n" + "="*80)
398
- print(f"[ERRO CRÍTICO] Falha ao obter o modelo '{filename}'.")
399
- print(f"Detalhe do erro: {e}")
400
- print("Verifique sua conexão com a internet ou o estado do cache do Hugging Face.")
401
- print("="*80 + "\n")
402
- sys.exit(1)
403
-
404
- # --- Checkpoint Principal ---
405
- checkpoint_filename = self.config["checkpoint_path"]
406
- distilled_model_path = get_or_download_model(
407
- LTX_REPO, checkpoint_filename, "checkpoint principal"
408
  )
409
  self.config["checkpoint_path"] = distilled_model_path
410
-
411
- # --- Upscaler Espacial ---
412
- upscaler_filename = self.config["spatial_upscaler_model_path"]
413
- spatial_upscaler_path = get_or_download_model(
414
- LTX_REPO, upscaler_filename, "upscaler espacial"
 
 
 
 
415
  )
416
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
 
417
 
418
- # --- Construção dos Pipelines ---
419
- print("\n[DEBUG] Construindo pipeline a partir dos caminhos resolvidos...")
420
  pipeline = create_ltx_video_pipeline(
421
  ckpt_path=self.config["checkpoint_path"],
422
  precision=self.config["precision"],
@@ -434,7 +371,6 @@ class VideoService:
434
  print("[DEBUG] Construindo latent_upsampler...")
435
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
436
  print("[DEBUG] Upsampler pronto.")
437
-
438
  print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
439
  return pipeline, latent_upsampler
440
 
@@ -462,6 +398,8 @@ class VideoService:
462
  pass
463
  print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
464
 
 
 
465
  @torch.no_grad()
466
  def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
467
  """
@@ -478,8 +416,11 @@ class VideoService:
478
  upsampled_latents = self.latent_upsampler(latents)
479
  upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
480
  print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
 
481
  return upsampled_latents
482
 
 
 
483
  def _apply_precision_policy(self):
484
  prec = str(self.config.get("precision", "")).lower()
485
  self.runtime_autocast_dtype = torch.float32
@@ -513,6 +454,7 @@ class VideoService:
513
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
514
  return out
515
 
 
516
  def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
517
  """
518
  Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
@@ -543,10 +485,10 @@ class VideoService:
543
  start = (num_latente_por_chunk*i)
544
  end = (start+num_latente_por_chunk+overlap)
545
  if i+1 < n_chunks:
546
- chunk = latents_brutos[:, :, start:end, :, :].detach()
547
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
548
  else:
549
- chunk = latents_brutos[:, :, start:, :, :].detach()
550
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
551
  chunks.append(chunk)
552
  i+=1
@@ -570,21 +512,6 @@ class VideoService:
570
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
571
  return int(result.stdout.strip())
572
 
573
-
574
-
575
- def _dividir_latentes(self, latents_brutos):
576
- total = latents_brutos.shape[2] # dimensão temporal (número de latentes)
577
-
578
- #if total % 2 == 1: # ÍMPAR
579
- # Ex: 11 → primeira 0..5, segunda 5..10
580
- cut = total // 2
581
- primeira = latents_brutos[:, :, :cut+1, :, :].detach()
582
- segunda = latents_brutos[:, :, cut:, :, :].detach()
583
-
584
-
585
- return primeira, segunda
586
-
587
-
588
  def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
589
  """
590
  Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
@@ -662,25 +589,10 @@ class VideoService:
662
  print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
663
 
664
 
 
665
  print("===========CONCATECAO CAUSAL=============")
666
  print(f"[DEBUG] {nova_lista}")
667
  return nova_lista
668
-
669
- def _concat_mp4s_no_reencode2(self, mp4_a: str, mp4_b: str, out_path: str):
670
- # Concat demuxer do ffmpeg (sem reencode)
671
- import tempfile, subprocess, shlex, os
672
- with tempfile.NamedTemporaryFile("w", delete=False, suffix=".txt") as f:
673
- f.write(f"file '{os.path.abspath(mp4_a)}'\n")
674
- f.write(f"file '{os.path.abspath(mp4_b)}'\n")
675
- list_path = f.name
676
- cmd = f"ffmpeg -y -f concat -safe 0 -i {list_path} -c copy {out_path}"
677
- print(f"[DEBUG] Concat: {cmd}")
678
- try:
679
- subprocess.check_call(shlex.split(cmd))
680
- finally:
681
- try: os.remove(list_path)
682
- except Exception: pass
683
-
684
 
685
  def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
686
  """
@@ -759,20 +671,14 @@ class VideoService:
759
  if mode == "image-to-video":
760
  start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
761
  conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
762
- conditioning_items1.append(ConditioningItem(start_tensor, 0, 1.0))
763
-
764
  if middle_image_filepath and middle_frame_number is not None:
765
  middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
766
  safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
767
  conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
768
- conditioning_items1.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
769
-
770
  if end_image_filepath:
771
  end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
772
  last_frame_index = actual_num_frames - 1
773
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
774
- conditioning_items2.append(ConditioningItem(end_tensor, last_frame_index//2, 1.0))
775
-
776
  print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
777
 
778
  call_kwargs = {
@@ -789,7 +695,7 @@ class VideoService:
789
  "decode_timestep": self.config["decode_timestep"],
790
  "decode_noise_scale": self.config["decode_noise_scale"],
791
  "stochastic_sampling": self.config["stochastic_sampling"],
792
- "image_cond_noise_scale": 0.05,
793
  "is_video": True,
794
  "vae_per_channel_normalize": True,
795
  "mixed_precision": (self.config["precision"] == "mixed_precision"),
@@ -797,12 +703,8 @@ class VideoService:
797
  "enhance_prompt": False,
798
  "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
799
  }
800
- print(f"[DEBUG] output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
801
-
802
  latents = None
803
- latents_list = []
804
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
805
-
806
 
807
  try:
808
  ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
@@ -819,7 +721,6 @@ class VideoService:
819
  downscale_factor = self.config.get("downscale_factor", 0.6666666)
820
  vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
821
 
822
- # --- <INÍCIO DA LÓGICA DE CÁLCULO EXATA> ---
823
  # Replica a fórmula da LTXMultiScalePipeline
824
  x_width = int(width_padded * downscale_factor)
825
  downscaled_width = x_width - (x_width % vae_scale_factor)
@@ -852,8 +753,8 @@ class VideoService:
852
  log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
853
  print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
854
  del base_latents; gc.collect(); torch.cuda.empty_cache()
855
-
856
 
 
857
  latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
858
  torch.cuda.empty_cache()
859
  try:
@@ -861,18 +762,11 @@ class VideoService:
861
  except Exception:
862
  pass
863
 
864
- #latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,15,0)
865
-
866
-
867
- lat_aup, lat_bup = self._dividir_latentes(latents_cpu_up)
868
- print(f"[DEBUG] Partição Aup: {tuple(lat_aup.shape)}")
869
- print(f"[DEBUG] Partição Bup: {tuple(lat_bup.shape)}")
870
 
871
- latents_parts_up = [lat_aup, lat_bup]
872
-
873
- #latents_parts_up = [latents_cpu_up]
874
-
875
- par = 0
876
  for latents in latents_parts_up:
877
 
878
  # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
@@ -886,39 +780,16 @@ class VideoService:
886
  print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
887
  # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
888
  t_pass2 = time.perf_counter()
889
-
890
- num_latent_frames_part = latents.shape[2]
891
-
892
-
893
- vae_temporal_scale = self.pipeline.video_scale_factor # Geralmente 4 ou 8
894
- num_pixel_frames_part = ((num_latent_frames_part - 1) * vae_temporal_scale) + 1
895
- print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
896
-
897
  second_pass_kwargs = call_kwargs.copy()
898
-
899
- if par==0:
900
- second_pass_kwargs.update({
901
- "conditioning_items": conditioning_items1,
902
- "output_type": "latent",
903
- "width": second_pass_width,
904
- "height": second_pass_height,
905
- "num_frames": num_pixel_frames_part,
906
- "latents": latents, # O tensor upscaled
907
- "guidance_scale": float(guidance_scale),
908
- **second_pass_config
909
- })
910
- else:
911
- second_pass_kwargs.update({
912
- "conditioning_items": conditioning_items2,
913
- "output_type": "latent",
914
- "width": second_pass_width,
915
- "height": second_pass_height,
916
- "num_frames": num_pixel_frames_part,
917
- "latents": latents, # O tensor upscaled
918
- "guidance_scale": float(guidance_scale),
919
- **second_pass_config
920
- })
921
- par+=1
922
 
923
  print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
924
  final_latents = self.pipeline(**second_pass_kwargs).images
@@ -943,70 +814,62 @@ class VideoService:
943
 
944
  # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
945
  print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
946
 
947
- temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
948
- results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
949
  partes_mp4 = []
950
  par = 0
951
-
952
- for latents_vae in latents_list:
953
-
954
- latents_cpu_vae = latents_vae.detach().to("cpu", non_blocking=True)
955
- torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
  try:
957
- torch.cuda.ipc_collect()
958
- except Exception:
959
- pass
 
960
 
961
- #latents_parts_vae = self._dividir_latentes_por_tamanho(latents_cpu_vae,4,1)
962
-
963
-
964
- lat_a, lat_b = self._dividir_latentes(latents_cpu_vae)
965
- print(f"[DEBUG] Partição A: {tuple(lat_a.shape)}")
966
- print(f"[DEBUG] Partição B: {tuple(lat_b.shape)}")
967
-
968
- latents_parts_vae = [lat_a, lat_b]
969
 
970
-
971
- for latents in latents_parts_vae:
972
- #print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
973
-
974
- par = par + 1
975
- output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
976
- final_output_path = None
977
-
978
- print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
979
- # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
980
- pixel_tensor = vae_manager_singleton.decode(
981
- latents.to(self.device, non_blocking=True),
982
- decode_timestep=float(self.config.get("decode_timestep", 0.05))
983
- )
984
- log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
985
-
986
- print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
987
- video_encode_tool_singleton.save_video_from_tensor(
988
- pixel_tensor,
989
- output_video_path,
990
- fps=call_kwargs["frame_rate"],
991
- progress_callback=progress_callback,
992
- )
993
-
994
- try:
995
- candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
996
- shutil.move(output_video_path, candidate)
997
- print(f"[DEBUG] MP4 parte {par} movido para {candidate}")
998
- partes_mp4.append(candidate)
999
-
1000
- except Exception as e:
1001
- final_output_path = output_video_path
1002
- print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
1003
-
1004
  total_partes = len(partes_mp4)
1005
  if (total_partes>1):
1006
  final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
1007
- #partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
1008
- final_vid = video_encode_tool_singleton.concatenate_videos(video_paths=partes_mp4, output_path="concate_fim.mp4", workspace_dir=results_dir)
1009
- self._concat_mp4s_no_reencode(partes_mp4, final_vid)
1010
  else:
1011
  final_vid = partes_mp4[0]
1012
 
 
1
  # ltx_server.py — VideoService (beta 1.1)
2
  # Sempre output_type="latent"; no final: VAE (bloco inteiro) → pixels → MP4.
3
  # Ignora UserWarning/FutureWarning e injeta VAE no manager com dtype/device corretos.
 
4
  # --- 0. WARNINGS E AMBIENTE ---
5
+
6
  import warnings
7
  warnings.filterwarnings("ignore", category=UserWarning)
8
  warnings.filterwarnings("ignore", category=FutureWarning)
9
  warnings.filterwarnings("ignore", message=".*")
 
10
  from huggingface_hub import logging
 
11
  logging.set_verbosity_error()
12
  logging.set_verbosity_warning()
13
  logging.set_verbosity_info()
14
  logging.set_verbosity_debug()
 
 
15
  LTXV_DEBUG=1
16
  LTXV_FRAME_LOG_EVERY=8
 
 
 
 
17
  import os, subprocess, shlex, tempfile
18
  import torch
19
  import json
 
36
  import traceback
37
  from einops import rearrange
38
  import torch.nn.functional as F
 
 
39
  from managers.vae_manager import vae_manager_singleton
40
  from tools.video_encode_tool import video_encode_tool_singleton
41
+ DEPS_DIR = Path("/data")
42
+ LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
43
+ if not LTX_VIDEO_REPO_DIR.exists():
44
+ print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
45
+ run_setup()
46
+ def run_setup():
47
+ setup_script_path = "setup.py"
48
+ if not os.path.exists(setup_script_path):
49
+ print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
50
+ return
51
+ try:
52
+ print("[DEBUG] Executando setup.py para dependências...")
53
+ subprocess.run([sys.executable, setup_script_path], check=True)
54
+ print("[DEBUG] Setup concluído com sucesso.")
55
+ except subprocess.CalledProcessError as e:
56
+ print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
57
+ sys.exit(1)
58
+ def add_deps_to_path():
59
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
60
+ if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
61
+ sys.path.insert(0, repo_path)
62
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
63
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
64
  try:
65
  import psutil
 
93
  return results
94
  except Exception:
95
  return []
 
96
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
97
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
98
  try:
 
116
  except Exception:
117
  continue
118
  return results
 
 
 
119
  def calculate_new_dimensions(orig_w, orig_h, divisor=8):
120
  """
121
  Calcula novas dimensões mantendo a proporção, garantindo que ambos os
 
149
 
150
  print(f"[Dimension Calc] Original: {orig_w}x{orig_h} -> Calculado: {new_w:.0f}x{new_h:.0f} -> Final (divisível por {divisor}): {final_w}x{final_h}")
151
  return final_h, final_w # Retorna (altura, largura)
 
 
152
  def handle_media_upload_for_dims(filepath, current_h, current_w):
153
  """
154
  Esta função agora usará o novo cálculo robusto.
 
172
  except Exception as e:
173
  print(f"Erro ao processar mídia para dimensões: {e}")
174
  return gr.update(value=current_h), gr.update(value=current_w)
 
 
175
  def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
176
  if not processes:
177
  return " - Processos ativos: (nenhum)\n"
 
182
  used_str = str(p["used_mb"]) if p.get("used_mb") is not None else "N/A"
183
  lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
184
  return "\n".join(lines) + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  def log_tensor_info(tensor, name="Tensor"):
186
  if not isinstance(tensor, torch.Tensor):
187
  print(f"\n[INFO] '{name}' não é tensor.")
 
196
  except Exception:
197
  pass
198
  print("------------------------------------------\n")
199
+ add_deps_to_path()
200
+ from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
201
+ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
202
+ from ltx_video.models.autoencoders.vae_encode import un_normalize_latents, normalize_latents
203
+ from ltx_video.pipelines.pipeline_ltx_video import adain_filter_latent
204
+ from api.ltx.inference import (
205
+ create_ltx_video_pipeline,
206
+ create_latent_upsampler,
207
+ load_image_to_tensor_with_resize_and_crop,
208
+ seed_everething,
209
+ calculate_padding,
210
+ load_media_file,
211
+ )
212
  class VideoService:
213
  def __init__(self):
214
  t0 = time.perf_counter()
 
329
  return yaml.safe_load(file)
330
 
331
  def _load_models(self):
 
 
 
 
 
 
332
  t0 = time.perf_counter()
333
  LTX_REPO = "Lightricks/LTX-Video"
334
+ print("[DEBUG] Baixando checkpoint principal...")
335
+ distilled_model_path = hf_hub_download(
336
+ repo_id=LTX_REPO,
337
+ filename=self.config["checkpoint_path"],
338
+ local_dir=os.getenv("HF_HOME"),
339
+ cache_dir=os.getenv("HF_HOME_CACHE"),
340
+ token=os.getenv("HF_TOKEN"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
  )
342
  self.config["checkpoint_path"] = distilled_model_path
343
+ print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
344
+
345
+ print("[DEBUG] Baixando upscaler espacial...")
346
+ spatial_upscaler_path = hf_hub_download(
347
+ repo_id=LTX_REPO,
348
+ filename=self.config["spatial_upscaler_model_path"],
349
+ local_dir=os.getenv("HF_HOME"),
350
+ cache_dir=os.getenv("HF_HOME_CACHE"),
351
+ token=os.getenv("HF_TOKEN")
352
  )
353
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
354
+ print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
355
 
356
+ print("[DEBUG] Construindo pipeline...")
 
357
  pipeline = create_ltx_video_pipeline(
358
  ckpt_path=self.config["checkpoint_path"],
359
  precision=self.config["precision"],
 
371
  print("[DEBUG] Construindo latent_upsampler...")
372
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
373
  print("[DEBUG] Upsampler pronto.")
 
374
  print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
375
  return pipeline, latent_upsampler
376
 
 
398
  pass
399
  print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
400
 
401
+
402
+
403
  @torch.no_grad()
404
  def _upsample_latents_internal(self, latents: torch.Tensor) -> torch.Tensor:
405
  """
 
416
  upsampled_latents = self.latent_upsampler(latents)
417
  upsampled_latents = normalize_latents(upsampled_latents, self.pipeline.vae, vae_per_channel_normalize=True)
418
  print(f"[DEBUG-UPSAMPLE] Shape de saída: {tuple(upsampled_latents.shape)}")
419
+
420
  return upsampled_latents
421
 
422
+
423
+
424
  def _apply_precision_policy(self):
425
  prec = str(self.config.get("precision", "")).lower()
426
  self.runtime_autocast_dtype = torch.float32
 
454
  print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
455
  return out
456
 
457
+
458
  def _dividir_latentes_por_tamanho(self, latents_brutos, num_latente_por_chunk: int, overlap: int = 1):
459
  """
460
  Divide o tensor de latentes em chunks com tamanho definido em número de latentes.
 
485
  start = (num_latente_por_chunk*i)
486
  end = (start+num_latente_por_chunk+overlap)
487
  if i+1 < n_chunks:
488
+ chunk = latents_brutos[:, :, start:end, :, :].clone().detach()
489
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
490
  else:
491
+ chunk = latents_brutos[:, :, start:, :, :].clone().detach()
492
  print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
493
  chunks.append(chunk)
494
  i+=1
 
512
  result = subprocess.run(cmd, capture_output=True, text=True, check=True)
513
  return int(result.stdout.strip())
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  def _gerar_lista_com_transicoes(self, pasta: str, video_paths: list[str], crossfade_frames: int = 8) -> list[str]:
516
  """
517
  Gera uma nova lista de vídeos aplicando transições suaves (blend frame a frame)
 
589
  print(f"[DEBUG] Video podado {i+1} adicionado {self._get_total_frames(video_podado)} frames ✅")
590
 
591
 
592
+
593
  print("===========CONCATECAO CAUSAL=============")
594
  print(f"[DEBUG] {nova_lista}")
595
  return nova_lista
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
 
597
  def _concat_mp4s_no_reencode(self, mp4_list: List[str], out_path: str):
598
  """
 
671
  if mode == "image-to-video":
672
  start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
673
  conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
 
 
674
  if middle_image_filepath and middle_frame_number is not None:
675
  middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
676
  safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
677
  conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
 
 
678
  if end_image_filepath:
679
  end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
680
  last_frame_index = actual_num_frames - 1
681
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
 
 
682
  print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
683
 
684
  call_kwargs = {
 
695
  "decode_timestep": self.config["decode_timestep"],
696
  "decode_noise_scale": self.config["decode_noise_scale"],
697
  "stochastic_sampling": self.config["stochastic_sampling"],
698
+ "image_cond_noise_scale": 0.01,
699
  "is_video": True,
700
  "vae_per_channel_normalize": True,
701
  "mixed_precision": (self.config["precision"] == "mixed_precision"),
 
703
  "enhance_prompt": False,
704
  "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
705
  }
 
 
706
  latents = None
707
+ latents_list[]
 
 
708
 
709
  try:
710
  ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
 
721
  downscale_factor = self.config.get("downscale_factor", 0.6666666)
722
  vae_scale_factor = self.pipeline.vae_scale_factor # Geralmente 8
723
 
 
724
  # Replica a fórmula da LTXMultiScalePipeline
725
  x_width = int(width_padded * downscale_factor)
726
  downscaled_width = x_width - (x_width % vae_scale_factor)
 
753
  log_tensor_info(upsampled_latents, "Latentes Pós-Upscale")
754
  print(f"[DEBUG] Upscale de Latentes concluído em {time.perf_counter() - t_upscale:.2f}s")
755
  del base_latents; gc.collect(); torch.cuda.empty_cache()
 
756
 
757
+ par = 0
758
  latents_cpu_up = upsampled_latents.detach().to("cpu", non_blocking=True)
759
  torch.cuda.empty_cache()
760
  try:
 
762
  except Exception:
763
  pass
764
 
765
+ latents_parts_up = self._dividir_latentes_por_tamanho(latents_cpu_up,4,1)
766
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
767
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
 
 
 
768
 
769
+
 
 
 
 
770
  for latents in latents_parts_up:
771
 
772
  # # --- ETAPA 3: REFINAMENTO DE TEXTURA (SECOND PASS) ---
 
780
  print(f"[DEBUG] Second Pass Dims: Target ({second_pass_width}x{second_pass_height})")
781
  # --- <FIM DA LÓGICA DE CÁLCULO EXATA> ---
782
  t_pass2 = time.perf_counter()
783
+
 
 
 
 
 
 
 
784
  second_pass_kwargs = call_kwargs.copy()
785
+ second_pass_kwargs.update({
786
+ "output_type": "latent",
787
+ "width": second_pass_width,
788
+ "height": second_pass_height,
789
+ "latents": upsampled_latents, # O tensor upscaled
790
+ "guidance_scale": float(guidance_scale),
791
+ **second_pass_config
792
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
 
794
  print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
795
  final_latents = self.pipeline(**second_pass_kwargs).images
 
814
 
815
  # --- ETAPA FINAL: DECODIFICAÇÃO E CODIFICAÇÃO MP4 ---
816
  print("\n--- INICIANDO ETAPA FINAL: DECODIFICAÇÃO E MONTAGEM ---")
817
+
818
+ #latents_cpu = latents.detach().to("cpu", non_blocking=True)
819
+ #torch.cuda.empty_cache()
820
+ #try:
821
+ # torch.cuda.ipc_collect()
822
+ #except Exception:
823
+ # pass
824
+
825
+ latents_parts[]
826
+ for latents in latents_list:
827
+ latents_parts.append(self._dividir_latentes_por_tamanho(latents_cpu,4,1))
828
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_"); self._register_tmp_dir(temp_dir)
829
+ results_dir = "/app/output"; os.makedirs(results_dir, exist_ok=True)
830
 
831
+
 
832
  partes_mp4 = []
833
  par = 0
834
+ for latents in latents_parts:
835
+ print(f"[DEBUG] Partição {par}: {tuple(latents.shape)}")
836
+
837
+ par = par + 1
838
+ output_video_path = os.path.join(temp_dir, f"output_{used_seed}_{par}.mp4")
839
+ final_output_path = None
840
+
841
+ print("[DEBUG] Decodificando bloco de latentes com VAE → tensor de pixels...")
842
+ # Usar manager com timestep por item; previne target_shape e rota NoneType.decode
843
+ pixel_tensor = vae_manager_singleton.decode(
844
+ latents.to(self.device, non_blocking=True),
845
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
846
+ )
847
+ log_tensor_info(pixel_tensor, "Pixel tensor (VAE saída)")
848
+
849
+ print("[DEBUG] Codificando MP4 a partir do tensor de pixels (bloco inteiro)...")
850
+ video_encode_tool_singleton.save_video_from_tensor(
851
+ pixel_tensor,
852
+ output_video_path,
853
+ fps=call_kwargs["frame_rate"],
854
+ progress_callback=progress_callback
855
+ )
856
+
857
+ candidate = os.path.join(results_dir, f"output_par_{par}.mp4")
858
  try:
859
+ shutil.move(output_video_path, candidate)
860
+ final_output_path = candidate
861
+ print(f"[DEBUG] MP4 parte {par} movido para {final_output_path}")
862
+ partes_mp4.append(final_output_path)
863
 
864
+ except Exception as e:
865
+ final_output_path = output_video_path
866
+ print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
 
 
 
 
 
867
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
868
  total_partes = len(partes_mp4)
869
  if (total_partes>1):
870
  final_vid = os.path.join(results_dir, f"concat_fim_{used_seed}.mp4")
871
+ partes_mp4_fade = self._gerar_lista_com_transicoes(pasta=results_dir, video_paths=partes_mp4, crossfade_frames=8)
872
+ self._concat_mp4s_no_reencode(partes_mp4_fade, final_vid)
 
873
  else:
874
  final_vid = partes_mp4[0]
875