EuuIia commited on
Commit
5e7dd18
·
verified ·
1 Parent(s): 690fc1d

Upload ltx_server.py

Browse files
Files changed (1) hide show
  1. api/ltx_server.py +227 -151
api/ltx_server.py CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  import torch
2
  import numpy as np
3
  import random
@@ -14,12 +18,15 @@ import subprocess
14
  import gc
15
  import shutil
16
  import contextlib
 
 
17
 
18
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
19
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
20
  try:
21
  import psutil
22
  import pynvml as nvml
 
23
  nvml.nvmlInit()
24
  handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
25
  try:
@@ -44,25 +51,29 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
44
  except Exception:
45
  pass
46
  results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
 
47
  nvml.nvmlShutdown()
48
  return results
49
- except Exception:
 
50
  return []
51
 
52
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
53
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
54
  try:
 
55
  out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
56
- except Exception:
 
57
  return []
58
  results = []
59
  for line in out.strip().splitlines():
60
  parts = [p.strip() for p in line.split(",")]
61
  if len(parts) >= 3:
62
  try:
63
- pid = int(parts[0])
64
- name = parts[1]
65
- used_mb = int(parts[2])
66
  user = "unknown"
67
  try:
68
  import psutil
@@ -87,30 +98,29 @@ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
87
  return "\n".join(lines) + "\n"
88
 
89
  def run_setup():
90
- """Executa o script setup.py para clonar as dependências necessárias."""
91
  setup_script_path = "setup.py"
92
  if not os.path.exists(setup_script_path):
93
- print("AVISO: script 'setup.py' não encontrado. Pulando a clonagem de dependências.")
94
  return
95
  try:
96
- print("--- Executando setup.py para garantir que as dependências estão presentes ---")
97
  subprocess.run([sys.executable, setup_script_path], check=True)
98
- print("--- Setup concluído com sucesso ---")
99
  except subprocess.CalledProcessError as e:
100
- print(f"ERRO CRÍTICO DURANTE O SETUP: 'setup.py' falhou com código {e.returncode}.")
101
  sys.exit(1)
102
 
103
  DEPS_DIR = Path("/data")
104
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
105
  if not LTX_VIDEO_REPO_DIR.exists():
 
106
  run_setup()
107
 
108
  def add_deps_to_path():
109
- """Adiciona o repositório clonado ao sys.path para que suas bibliotecas possam ser importadas."""
110
- if not LTX_VIDEO_REPO_DIR.exists():
111
- raise FileNotFoundError(f"Repositório LTX-Video não encontrado em '{LTX_VIDEO_REPO_DIR}'. Execute o setup.")
112
  if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
113
- sys.path.insert(0, str(LTX_VIDEO_REPO_DIR.resolve()))
 
114
 
115
  add_deps_to_path()
116
 
@@ -129,44 +139,51 @@ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
129
  # --- 4. FUNÇÕES HELPER DE LOG ---
130
  def log_tensor_info(tensor, name="Tensor"):
131
  if not isinstance(tensor, torch.Tensor):
132
- print(f"\n[INFO] O item '{name}' não é um tensor para logar.")
133
  return
134
- print(f"\n--- Informações do Tensor: {name} ---")
135
- print(f" - Shape: {tensor.shape}")
136
  print(f" - Dtype: {tensor.dtype}")
137
  print(f" - Device: {tensor.device}")
138
  if tensor.numel() > 0:
139
- print(f" - Min valor: {tensor.min().item():.4f}")
140
- print(f" - Max valor: {tensor.max().item():.4f}")
141
- print(f" - Média: {tensor.mean().item():.4f}")
142
- else:
143
- print(" - O tensor está vazio, sem estatísticas.")
144
  print("------------------------------------------\n")
145
 
146
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
147
  class VideoService:
148
  def __init__(self):
149
- print("Inicializando VideoService...")
 
 
 
150
  self.config = self._load_config()
 
151
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
152
  self.last_memory_reserved_mb = 0.0
153
  self._tmp_dirs = set()
154
  self._tmp_files = set()
155
  self._last_outputs = []
156
 
157
  self.pipeline, self.latent_upsampler = self._load_models()
158
- print(f"Movendo modelos para o dispositivo de inferência: {self.device}")
 
 
159
  self.pipeline.to(self.device)
160
  if self.latent_upsampler:
161
  self.latent_upsampler.to(self.device)
162
 
163
- # Política de precisão (FP8 opcional + autocast coerente)
164
  self._apply_precision_policy()
 
165
 
166
  if self.device == "cuda":
167
  torch.cuda.empty_cache()
168
  self._log_gpu_memory("Após carregar modelos")
169
- print("VideoService pronto para uso.")
 
170
 
171
  def _log_gpu_memory(self, stage_name: str):
172
  if self.device != "cuda":
@@ -181,51 +198,54 @@ class VideoService:
181
  processes = _query_gpu_processes_via_nvml(device_index)
182
  if not processes:
183
  processes = _query_gpu_processes_via_nvidiasmi(device_index)
184
- print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
185
- print(f" - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
186
- print(f" - Variação desde o último log: {delta_mb:+.2f} MB")
187
  if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
188
- print(f" - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
189
  print(_gpu_process_table(processes, os.getpid()), end="")
190
  print("--------------------------------------------------\n")
191
  self.last_memory_reserved_mb = current_reserved_mb
192
 
193
  def _register_tmp_dir(self, d: str):
194
- try:
195
- if d and os.path.isdir(d):
196
- self._tmp_dirs.add(d)
197
- except Exception:
198
- pass
199
 
200
  def _register_tmp_file(self, f: str):
201
- try:
202
- if f and os.path.isfile(f):
203
- self._tmp_files.add(f)
204
- except Exception:
205
- pass
206
 
207
  def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
 
208
  keep = set(keep_paths or [])
209
  extras = set(extra_paths or [])
210
 
 
211
  for f in list(self._tmp_files | extras):
212
  try:
213
  if f not in keep and os.path.isfile(f):
214
  os.remove(f)
215
- except Exception:
216
- pass
 
 
217
  finally:
218
  self._tmp_files.discard(f)
219
 
 
220
  for d in list(self._tmp_dirs):
221
  try:
222
  if d not in keep and os.path.isdir(d):
223
  shutil.rmtree(d, ignore_errors=True)
224
- except Exception:
225
- pass
 
 
226
  finally:
227
  self._tmp_dirs.discard(d)
228
 
 
229
  gc.collect()
230
  try:
231
  if clear_gpu and torch.cuda.is_available():
@@ -234,13 +254,13 @@ class VideoService:
234
  torch.cuda.ipc_collect()
235
  except Exception:
236
  pass
237
- except Exception:
238
- pass
239
 
240
  try:
241
  self._log_gpu_memory("Após finalize")
242
- except Exception:
243
- pass
244
 
245
  def _load_config(self):
246
  base = LTX_VIDEO_REPO_DIR / "configs"
@@ -252,15 +272,18 @@ class VideoService:
252
  ]
253
  for cfg in candidates:
254
  if cfg.exists():
 
255
  with open(cfg, "r") as file:
256
  return yaml.safe_load(file)
257
- config_file_path = base / "ltxv-13b-0.9.8-distilled.yaml"
258
- with open(config_file_path, "r") as file:
 
259
  return yaml.safe_load(file)
260
 
261
  def _load_models(self):
 
262
  LTX_REPO = "Lightricks/LTX-Video"
263
-
264
  distilled_model_path = hf_hub_download(
265
  repo_id=LTX_REPO,
266
  filename=self.config["checkpoint_path"],
@@ -269,7 +292,9 @@ class VideoService:
269
  token=os.getenv("HF_TOKEN"),
270
  )
271
  self.config["checkpoint_path"] = distilled_model_path
 
272
 
 
273
  spatial_upscaler_path = hf_hub_download(
274
  repo_id=LTX_REPO,
275
  filename=self.config["spatial_upscaler_model_path"],
@@ -278,7 +303,9 @@ class VideoService:
278
  token=os.getenv("HF_TOKEN"),
279
  )
280
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
 
281
 
 
282
  pipeline = create_ltx_video_pipeline(
283
  ckpt_path=self.config["checkpoint_path"],
284
  precision=self.config["precision"],
@@ -289,49 +316,60 @@ class VideoService:
289
  prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
290
  prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
291
  )
 
292
 
293
  latent_upsampler = None
294
  if self.config.get("spatial_upscaler_model_path"):
 
295
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
296
-
 
297
  return pipeline, latent_upsampler
298
 
299
  def _promote_fp8_weights_to_bf16(self, module):
300
  if not isinstance(module, torch.nn.Module):
 
301
  return
302
  f8 = getattr(torch, "float8_e4m3fn", None)
303
  if f8 is None:
 
304
  return
 
305
  for _, p in module.named_parameters(recurse=True):
306
  try:
307
  if p.dtype == f8:
308
  with torch.no_grad():
309
  p.data = p.data.to(torch.bfloat16)
 
310
  except Exception:
311
  pass
312
  for _, b in module.named_buffers(recurse=True):
313
  try:
314
  if hasattr(b, "dtype") and b.dtype == f8:
315
  b.data = b.data.to(torch.bfloat16)
 
316
  except Exception:
317
  pass
 
318
 
319
  def _apply_precision_policy(self):
320
  prec = str(self.config.get("precision", "")).lower()
321
  self.runtime_autocast_dtype = torch.float32
 
322
  if prec == "float8_e4m3fn":
323
  self.runtime_autocast_dtype = torch.bfloat16
324
  force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
 
325
  if force_promote and hasattr(torch, "float8_e4m3fn"):
326
  try:
327
  self._promote_fp8_weights_to_bf16(self.pipeline)
328
- except Exception:
329
- pass
330
  try:
331
  if self.latent_upsampler:
332
  self._promote_fp8_weights_to_bf16(self.latent_upsampler)
333
- except Exception:
334
- pass
335
  elif prec == "bfloat16":
336
  self.runtime_autocast_dtype = torch.bfloat16
337
  elif prec == "mixed_precision":
@@ -340,35 +378,41 @@ class VideoService:
340
  self.runtime_autocast_dtype = torch.float32
341
 
342
  def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
 
343
  tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
344
  tensor = torch.nn.functional.pad(tensor, padding_values)
345
- if self.device == "cuda":
346
- return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
347
- return tensor.to(self.device)
348
 
349
- # Nova: decodificação de latentes fora da pipeline com VAE e escrita incremental
350
  def _decode_latents_to_video(self, latents: torch.Tensor, output_video_path: str, frame_rate: int,
351
  padding_values, progress_callback=None):
 
352
  pad_left, pad_right, pad_top, pad_bottom = padding_values
 
 
 
353
  with imageio.get_writer(output_video_path, fps=frame_rate, codec="libx264", quality=8) as writer:
354
- T = latents.shape[2]
355
  for i in range(T):
356
  latent_chw = latents[0, :, i].to(self.device)
357
  with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext():
358
  pixel_bchw = None
359
  if hasattr(self.pipeline, "decode_latents"):
360
  pixel_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
 
 
361
  elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
362
  pixel_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
 
 
363
  else:
364
- raise RuntimeError("Pipeline não expõe decode_latents nem vae.decode para decodificar latentes.")
365
- pixel_chw = pixel_bchw[0]
366
  if pixel_chw.min() < 0:
367
  pixel_chw = (pixel_chw.clamp(-1, 1) + 1.0) / 2.0
368
  else:
369
  pixel_chw = pixel_chw.clamp(0, 1)
370
- H = pixel_chw.shape[1]
371
- W = pixel_chw.shape[2]
372
  h_end = H - pad_bottom if pad_bottom > 0 else H
373
  w_end = W - pad_right if pad_right > 0 else W
374
  pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
@@ -380,6 +424,9 @@ class VideoService:
380
  writer.append_data(frame_hwc_u8)
381
  if progress_callback:
382
  progress_callback(i + 1, T)
 
 
 
383
 
384
  def generate(
385
  self,
@@ -402,8 +449,10 @@ class VideoService:
402
  guidance_scale=3.0,
403
  improve_texture=True,
404
  progress_callback=None,
405
- external_decode=True, # NOVO: decodificar fora da pipeline
406
  ):
 
 
407
  if self.device == "cuda":
408
  torch.cuda.empty_cache()
409
  torch.cuda.reset_peak_memory_stats()
@@ -416,16 +465,19 @@ class VideoService:
416
 
417
  used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
418
  seed_everething(used_seed)
 
419
 
420
  FPS = 24.0
421
  MAX_NUM_FRAMES = 257
422
  target_frames_rounded = round(duration * FPS)
423
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
424
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
 
425
 
426
  height_padded = ((height - 1) // 32 + 1) * 32
427
  width_padded = ((width - 1) // 32 + 1) * 32
428
  padding_values = calculate_padding(height, width, height_padded, width_padded)
 
429
 
430
  generator = torch.Generator(device=self.device).manual_seed(used_seed)
431
  conditioning_items = []
@@ -441,6 +493,7 @@ class VideoService:
441
  end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
442
  last_frame_index = actual_num_frames - 1
443
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
 
444
 
445
  call_kwargs = {
446
  "prompt": prompt,
@@ -450,7 +503,7 @@ class VideoService:
450
  "num_frames": actual_num_frames,
451
  "frame_rate": int(FPS),
452
  "generator": generator,
453
- "output_type": "latent" if external_decode else "pt", # aqui alternamos o tipo de saída
454
  "conditioning_items": conditioning_items if conditioning_items else None,
455
  "media_items": None,
456
  "decode_timestep": self.config["decode_timestep"],
@@ -464,92 +517,111 @@ class VideoService:
464
  "enhance_prompt": False,
465
  "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
466
  }
 
467
 
468
  if mode == "video-to-video":
469
- call_kwargs["media_items"] = load_media_file(
470
  media_path=input_video_filepath,
471
  height=height,
472
  width=width,
473
  max_frames=int(frames_to_use),
474
  padding=padding_values,
475
  ).to(self.device)
 
 
476
 
477
  latents = None
478
  result_tensor = None
479
  multi_scale_pipeline = None
480
 
481
- if improve_texture:
482
- if not self.latent_upsampler:
483
- raise ValueError("Upscaler espacial não carregado.")
484
- multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
485
- first_pass_args = self.config.get("first_pass", {}).copy()
486
- first_pass_args["guidance_scale"] = float(guidance_scale)
487
- second_pass_args = self.config.get("second_pass", {}).copy()
488
- second_pass_args["guidance_scale"] = float(guidance_scale)
489
- multi_scale_call_kwargs = call_kwargs.copy()
490
- multi_scale_call_kwargs.update(
491
- {
492
- "downscale_factor": self.config["downscale_factor"],
493
- "first_pass": first_pass_args,
494
- "second_pass": second_pass_args,
495
- }
496
- )
497
- ctx = contextlib.nullcontext()
498
- if self.device == "cuda":
499
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
500
- with ctx:
501
- result = multi_scale_pipeline(**multi_scale_call_kwargs)
502
- # Captura latentes ou imagens conforme o output_type
503
- if external_decode:
504
- latents = getattr(result, "latents", None) or getattr(result, "images", None) or result
505
- else:
506
- result_tensor = getattr(result, "images", None) or result
507
- if not external_decode:
508
- log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
509
- else:
510
- single_pass_kwargs = call_kwargs.copy()
511
- first_pass_config = self.config.get("first_pass", {})
512
- single_pass_kwargs.update(
513
- {
514
- "guidance_scale": float(guidance_scale),
515
- "stg_scale": first_pass_config.get("stg_scale"),
516
- "rescaling_scale": first_pass_config.get("rescaling_scale"),
517
- "skip_block_list": first_pass_config.get("skip_block_list"),
518
- }
519
- )
520
- # Agenda única para guidance_mapping consistente
521
- schedule = first_pass_config.get("timesteps")
522
- if schedule is None:
523
- schedule = first_pass_config.get("guidance_timesteps")
524
- if mode == "video-to-video":
525
- schedule = [0.7]
526
- print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
527
- if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
528
- single_pass_kwargs["timesteps"] = schedule
529
- single_pass_kwargs["guidance_timesteps"] = schedule
530
-
531
- print("\n[INFO] Executando pipeline de etapa única...")
532
- ctx = contextlib.nullcontext()
533
- if self.device == "cuda":
534
- ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype)
535
- with ctx:
536
- result = self.pipeline(**single_pass_kwargs)
537
- if external_decode:
538
- latents = getattr(result, "latents", None) or getattr(result, "images", None) or result
539
  else:
540
- result_tensor = getattr(result, "images", None) or result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
 
542
- # Staging seguro em tmp e move para diretório persistente
543
- temp_dir = tempfile.mkdtemp(prefix="ltxv_")
544
- self._register_tmp_dir(temp_dir)
545
- results_dir = "/app/output"
546
- os.makedirs(results_dir, exist_ok=True)
 
547
 
548
- final_output_path = None
549
- output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
550
- try:
551
  if external_decode:
552
- # Decodifica latentes -> MP4, quadro a quadro
553
  self._decode_latents_to_video(
554
  latents=latents,
555
  output_video_path=output_video_path,
@@ -558,15 +630,9 @@ class VideoService:
558
  progress_callback=progress_callback,
559
  )
560
  else:
561
- # Caminho antigo: tensor em espaço de pixels -> escrever quadro a quadro
562
- # Aplicar corte de padding antes de escrever
563
- pad_left, pad_right, pad_top, pad_bottom = padding_values
564
- slice_h_end = -pad_bottom if pad_bottom > 0 else None
565
- slice_w_end = -pad_right if pad_right > 0 else None
566
- result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
567
- log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
568
  with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
569
- T = result_tensor.shape[2]
570
  for i in range(T):
571
  frame_chw = result_tensor[0, :, i]
572
  frame_hwc_u8 = (frame_chw.permute(1, 2, 0)
@@ -578,17 +644,27 @@ class VideoService:
578
  writer.append_data(frame_hwc_u8)
579
  if progress_callback:
580
  progress_callback(i + 1, T)
 
 
581
 
582
  candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
583
  try:
584
  shutil.move(output_video_path, candidate_final)
585
  final_output_path = candidate_final
586
- except Exception:
 
587
  final_output_path = output_video_path
588
- self._register_tmp_file(output_video_path)
589
 
 
590
  self._log_gpu_memory("Fim da Geração")
 
591
  return final_output_path, used_seed
 
 
 
 
 
592
  finally:
593
  try:
594
  del latents
@@ -611,13 +687,13 @@ class VideoService:
611
  torch.cuda.ipc_collect()
612
  except Exception:
613
  pass
614
- except Exception:
615
- pass
616
 
617
  try:
618
- self.finalize(keep_paths=[final_output_path] if final_output_path else [])
619
- except Exception:
620
- pass
621
 
622
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
623
  video_generation_service = VideoService()
 
1
+ # ltx_server.py — VideoService com logs de depuração detalhados (init→MP4)
2
+ # Opção external_decode: True (default) decodifica latentes com VAE fora da pipeline.
3
+
4
+ # --- 1. IMPORTAÇÕES ---
5
  import torch
6
  import numpy as np
7
  import random
 
18
  import gc
19
  import shutil
20
  import contextlib
21
+ import time
22
+ import traceback
23
 
24
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
25
  def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
26
  try:
27
  import psutil
28
  import pynvml as nvml
29
+ print("[DEBUG] NVML: inicializando para consulta de processos...")
30
  nvml.nvmlInit()
31
  handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
32
  try:
 
51
  except Exception:
52
  pass
53
  results.append({"pid": pid, "name": name, "user": user, "used_mb": used_mb})
54
+ print("[DEBUG] NVML: finalizando...")
55
  nvml.nvmlShutdown()
56
  return results
57
+ except Exception as e:
58
+ print(f"[DEBUG] NVML indisponível ou falhou: {e}")
59
  return []
60
 
61
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
62
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
63
  try:
64
+ print(f"[DEBUG] Rodando: {cmd}")
65
  out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
66
+ except Exception as e:
67
+ print(f"[DEBUG] nvidia-smi falhou: {e}")
68
  return []
69
  results = []
70
  for line in out.strip().splitlines():
71
  parts = [p.strip() for p in line.split(",")]
72
  if len(parts) >= 3:
73
  try:
74
+ pid = int(parts[^21_0])
75
+ name = parts[^21_1]
76
+ used_mb = int(parts[^21_2])
77
  user = "unknown"
78
  try:
79
  import psutil
 
98
  return "\n".join(lines) + "\n"
99
 
100
  def run_setup():
 
101
  setup_script_path = "setup.py"
102
  if not os.path.exists(setup_script_path):
103
+ print("[DEBUG] 'setup.py' não encontrado. Pulando clonagem de dependências.")
104
  return
105
  try:
106
+ print("[DEBUG] Executando setup.py para dependências...")
107
  subprocess.run([sys.executable, setup_script_path], check=True)
108
+ print("[DEBUG] Setup concluído com sucesso.")
109
  except subprocess.CalledProcessError as e:
110
+ print(f"[DEBUG] ERRO no setup.py (code {e.returncode}). Abortando.")
111
  sys.exit(1)
112
 
113
  DEPS_DIR = Path("/data")
114
  LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
115
  if not LTX_VIDEO_REPO_DIR.exists():
116
+ print(f"[DEBUG] Repositório não encontrado em {LTX_VIDEO_REPO_DIR}. Rodando setup...")
117
  run_setup()
118
 
119
  def add_deps_to_path():
120
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
 
 
121
  if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
122
+ sys.path.insert(0, repo_path)
123
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
124
 
125
  add_deps_to_path()
126
 
 
139
  # --- 4. FUNÇÕES HELPER DE LOG ---
140
  def log_tensor_info(tensor, name="Tensor"):
141
  if not isinstance(tensor, torch.Tensor):
142
+ print(f"\n[INFO] '{name}' não é tensor.")
143
  return
144
+ print(f"\n--- Tensor: {name} ---")
145
+ print(f" - Shape: {tuple(tensor.shape)}")
146
  print(f" - Dtype: {tensor.dtype}")
147
  print(f" - Device: {tensor.device}")
148
  if tensor.numel() > 0:
149
+ try:
150
+ print(f" - Min: {tensor.min().item():.4f} Max: {tensor.max().item():.4f} Mean: {tensor.mean().item():.4f}")
151
+ except Exception:
152
+ pass
 
153
  print("------------------------------------------\n")
154
 
155
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
156
  class VideoService:
157
  def __init__(self):
158
+ t0 = time.perf_counter()
159
+ print("[DEBUG] Inicializando VideoService...")
160
+ self.debug = os.getenv("LTXV_DEBUG", "1") == "1"
161
+ self.frame_log_every = int(os.getenv("LTXV_FRAME_LOG_EVERY", "8"))
162
  self.config = self._load_config()
163
+ print(f"[DEBUG] Config carregada (precision={self.config.get('precision')}, sampler={self.config.get('sampler')})")
164
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
165
+ print(f"[DEBUG] Device selecionado: {self.device}")
166
  self.last_memory_reserved_mb = 0.0
167
  self._tmp_dirs = set()
168
  self._tmp_files = set()
169
  self._last_outputs = []
170
 
171
  self.pipeline, self.latent_upsampler = self._load_models()
172
+ print(f"[DEBUG] Pipeline e Upsampler carregados. Upsampler ativo? {bool(self.latent_upsampler)}")
173
+
174
+ print(f"[DEBUG] Movendo modelos para {self.device}...")
175
  self.pipeline.to(self.device)
176
  if self.latent_upsampler:
177
  self.latent_upsampler.to(self.device)
178
 
 
179
  self._apply_precision_policy()
180
+ print(f"[DEBUG] runtime_autocast_dtype = {getattr(self, 'runtime_autocast_dtype', None)}")
181
 
182
  if self.device == "cuda":
183
  torch.cuda.empty_cache()
184
  self._log_gpu_memory("Após carregar modelos")
185
+
186
+ print(f"[DEBUG] VideoService pronto. boot_time={time.perf_counter()-t0:.3f}s")
187
 
188
  def _log_gpu_memory(self, stage_name: str):
189
  if self.device != "cuda":
 
198
  processes = _query_gpu_processes_via_nvml(device_index)
199
  if not processes:
200
  processes = _query_gpu_processes_via_nvidiasmi(device_index)
201
+ print(f"\n--- [LOG GPU] {stage_name} (cuda:{device_index}) ---")
202
+ print(f" - Reservado: {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB (Δ={delta_mb:+.2f} MB)")
 
203
  if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
204
+ print(f" - Pico reservado (nesta fase): {peak_reserved_mb:.2f} MB")
205
  print(_gpu_process_table(processes, os.getpid()), end="")
206
  print("--------------------------------------------------\n")
207
  self.last_memory_reserved_mb = current_reserved_mb
208
 
209
  def _register_tmp_dir(self, d: str):
210
+ if d and os.path.isdir(d):
211
+ self._tmp_dirs.add(d)
212
+ print(f"[DEBUG] Registrado tmp dir: {d}")
 
 
213
 
214
  def _register_tmp_file(self, f: str):
215
+ if f and os.path.exists(f):
216
+ self._tmp_files.add(f)
217
+ print(f"[DEBUG] Registrado tmp file: {f}")
 
 
218
 
219
  def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
220
+ print("[DEBUG] Finalize: iniciando limpeza...")
221
  keep = set(keep_paths or [])
222
  extras = set(extra_paths or [])
223
 
224
+ removed_files = 0
225
  for f in list(self._tmp_files | extras):
226
  try:
227
  if f not in keep and os.path.isfile(f):
228
  os.remove(f)
229
+ removed_files += 1
230
+ print(f"[DEBUG] Removido arquivo tmp: {f}")
231
+ except Exception as e:
232
+ print(f"[DEBUG] Falha removendo arquivo {f}: {e}")
233
  finally:
234
  self._tmp_files.discard(f)
235
 
236
+ removed_dirs = 0
237
  for d in list(self._tmp_dirs):
238
  try:
239
  if d not in keep and os.path.isdir(d):
240
  shutil.rmtree(d, ignore_errors=True)
241
+ removed_dirs += 1
242
+ print(f"[DEBUG] Removido diretório tmp: {d}")
243
+ except Exception as e:
244
+ print(f"[DEBUG] Falha removendo diretório {d}: {e}")
245
  finally:
246
  self._tmp_dirs.discard(d)
247
 
248
+ print(f"[DEBUG] Finalize: arquivos removidos={removed_files}, dirs removidos={removed_dirs}")
249
  gc.collect()
250
  try:
251
  if clear_gpu and torch.cuda.is_available():
 
254
  torch.cuda.ipc_collect()
255
  except Exception:
256
  pass
257
+ except Exception as e:
258
+ print(f"[DEBUG] Finalize: limpeza GPU falhou: {e}")
259
 
260
  try:
261
  self._log_gpu_memory("Após finalize")
262
+ except Exception as e:
263
+ print(f"[DEBUG] Log GPU pós-finalize falhou: {e}")
264
 
265
  def _load_config(self):
266
  base = LTX_VIDEO_REPO_DIR / "configs"
 
272
  ]
273
  for cfg in candidates:
274
  if cfg.exists():
275
+ print(f"[DEBUG] Config selecionada: {cfg}")
276
  with open(cfg, "r") as file:
277
  return yaml.safe_load(file)
278
+ cfg = base / "ltxv-13b-0.9.8-distilled.yaml"
279
+ print(f"[DEBUG] Config fallback: {cfg}")
280
+ with open(cfg, "r") as file:
281
  return yaml.safe_load(file)
282
 
283
  def _load_models(self):
284
+ t0 = time.perf_counter()
285
  LTX_REPO = "Lightricks/LTX-Video"
286
+ print("[DEBUG] Baixando checkpoint principal (hf_hub_download)...")
287
  distilled_model_path = hf_hub_download(
288
  repo_id=LTX_REPO,
289
  filename=self.config["checkpoint_path"],
 
292
  token=os.getenv("HF_TOKEN"),
293
  )
294
  self.config["checkpoint_path"] = distilled_model_path
295
+ print(f"[DEBUG] Checkpoint em: {distilled_model_path}")
296
 
297
+ print("[DEBUG] Baixando upscaler espacial (hf_hub_download)...")
298
  spatial_upscaler_path = hf_hub_download(
299
  repo_id=LTX_REPO,
300
  filename=self.config["spatial_upscaler_model_path"],
 
303
  token=os.getenv("HF_TOKEN"),
304
  )
305
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
306
+ print(f"[DEBUG] Upscaler em: {spatial_upscaler_path}")
307
 
308
+ print("[DEBUG] Construindo pipeline...")
309
  pipeline = create_ltx_video_pipeline(
310
  ckpt_path=self.config["checkpoint_path"],
311
  precision=self.config["precision"],
 
316
  prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
317
  prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
318
  )
319
+ print("[DEBUG] Pipeline pronto.")
320
 
321
  latent_upsampler = None
322
  if self.config.get("spatial_upscaler_model_path"):
323
+ print("[DEBUG] Construindo latent_upsampler...")
324
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
325
+ print("[DEBUG] Upsampler pronto.")
326
+ print(f"[DEBUG] _load_models() tempo total={time.perf_counter()-t0:.3f}s")
327
  return pipeline, latent_upsampler
328
 
329
  def _promote_fp8_weights_to_bf16(self, module):
330
  if not isinstance(module, torch.nn.Module):
331
+ print("[DEBUG] Promoção FP8→BF16 ignorada: alvo não é nn.Module.")
332
  return
333
  f8 = getattr(torch, "float8_e4m3fn", None)
334
  if f8 is None:
335
+ print("[DEBUG] torch.float8_e4m3fn indisponível.")
336
  return
337
+ p_cnt = b_cnt = 0
338
  for _, p in module.named_parameters(recurse=True):
339
  try:
340
  if p.dtype == f8:
341
  with torch.no_grad():
342
  p.data = p.data.to(torch.bfloat16)
343
+ p_cnt += 1
344
  except Exception:
345
  pass
346
  for _, b in module.named_buffers(recurse=True):
347
  try:
348
  if hasattr(b, "dtype") and b.dtype == f8:
349
  b.data = b.data.to(torch.bfloat16)
350
+ b_cnt += 1
351
  except Exception:
352
  pass
353
+ print(f"[DEBUG] FP8→BF16: params_promoted={p_cnt}, buffers_promoted={b_cnt}")
354
 
355
  def _apply_precision_policy(self):
356
  prec = str(self.config.get("precision", "")).lower()
357
  self.runtime_autocast_dtype = torch.float32
358
+ print(f"[DEBUG] Aplicando política de precisão: {prec}")
359
  if prec == "float8_e4m3fn":
360
  self.runtime_autocast_dtype = torch.bfloat16
361
  force_promote = os.getenv("LTXV_FORCE_BF16_ON_FP8", "0") == "1"
362
+ print(f"[DEBUG] FP8 detectado. force_promote={force_promote}")
363
  if force_promote and hasattr(torch, "float8_e4m3fn"):
364
  try:
365
  self._promote_fp8_weights_to_bf16(self.pipeline)
366
+ except Exception as e:
367
+ print(f"[DEBUG] Promoção FP8→BF16 na pipeline falhou: {e}")
368
  try:
369
  if self.latent_upsampler:
370
  self._promote_fp8_weights_to_bf16(self.latent_upsampler)
371
+ except Exception as e:
372
+ print(f"[DEBUG] Promoção FP8→BF16 no upsampler falhou: {e}")
373
  elif prec == "bfloat16":
374
  self.runtime_autocast_dtype = torch.bfloat16
375
  elif prec == "mixed_precision":
 
378
  self.runtime_autocast_dtype = torch.float32
379
 
380
  def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
381
+ print(f"[DEBUG] Carregando condicionamento: {filepath}")
382
  tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
383
  tensor = torch.nn.functional.pad(tensor, padding_values)
384
+ out = tensor.to(self.device, dtype=self.runtime_autocast_dtype) if self.device == "cuda" else tensor.to(self.device)
385
+ print(f"[DEBUG] Cond shape={tuple(out.shape)} dtype={out.dtype} device={out.device}")
386
+ return out
387
 
 
388
  def _decode_latents_to_video(self, latents: torch.Tensor, output_video_path: str, frame_rate: int,
389
  padding_values, progress_callback=None):
390
+ print(f"[DEBUG] Decodificando latentes → vídeo: {output_video_path}")
391
  pad_left, pad_right, pad_top, pad_bottom = padding_values
392
+ T = latents.shape[^21_2]
393
+ print(f"[DEBUG] Latentes shape={tuple(latents.shape)} frames={T}")
394
+ start = time.perf_counter()
395
  with imageio.get_writer(output_video_path, fps=frame_rate, codec="libx264", quality=8) as writer:
 
396
  for i in range(T):
397
  latent_chw = latents[0, :, i].to(self.device)
398
  with torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext():
399
  pixel_bchw = None
400
  if hasattr(self.pipeline, "decode_latents"):
401
  pixel_bchw = self.pipeline.decode_latents(latent_chw.unsqueeze(0))
402
+ if i % self.frame_log_every == 0:
403
+ print(f"[DEBUG] decode_latents frame={i}")
404
  elif hasattr(self.pipeline, "vae") and hasattr(self.pipeline.vae, "decode"):
405
  pixel_bchw = self.pipeline.vae.decode(latent_chw.unsqueeze(0))
406
+ if i % self.frame_log_every == 0:
407
+ print(f"[DEBUG] vae.decode frame={i}")
408
  else:
409
+ raise RuntimeError("Pipeline não possui decode_latents/vae.decode.")
410
+ pixel_chw = pixel_bchw[^21_0]
411
  if pixel_chw.min() < 0:
412
  pixel_chw = (pixel_chw.clamp(-1, 1) + 1.0) / 2.0
413
  else:
414
  pixel_chw = pixel_chw.clamp(0, 1)
415
+ H, W = pixel_chw.shape[^21_1], pixel_chw.shape[^21_2]
 
416
  h_end = H - pad_bottom if pad_bottom > 0 else H
417
  w_end = W - pad_right if pad_right > 0 else W
418
  pixel_chw = pixel_chw[:, pad_top:h_end, pad_left:w_end]
 
424
  writer.append_data(frame_hwc_u8)
425
  if progress_callback:
426
  progress_callback(i + 1, T)
427
+ if i % self.frame_log_every == 0:
428
+ print(f"[DEBUG] frame {i}/{T} escrito.")
429
+ print(f"[DEBUG] Decodificação+escrita concluída em {time.perf_counter()-start:.3f}s")
430
 
431
  def generate(
432
  self,
 
449
  guidance_scale=3.0,
450
  improve_texture=True,
451
  progress_callback=None,
452
+ external_decode=True,
453
  ):
454
+ t_all = time.perf_counter()
455
+ print(f"[DEBUG] generate() begin mode={mode} external_decode={external_decode} improve_texture={improve_texture}")
456
  if self.device == "cuda":
457
  torch.cuda.empty_cache()
458
  torch.cuda.reset_peak_memory_stats()
 
465
 
466
  used_seed = random.randint(0, 2**32 - 1) if randomize_seed else int(seed)
467
  seed_everething(used_seed)
468
+ print(f"[DEBUG] Seed usado: {used_seed}")
469
 
470
  FPS = 24.0
471
  MAX_NUM_FRAMES = 257
472
  target_frames_rounded = round(duration * FPS)
473
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
474
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
475
+ print(f"[DEBUG] Frames alvo: {actual_num_frames} (dur={duration}s @ {FPS}fps)")
476
 
477
  height_padded = ((height - 1) // 32 + 1) * 32
478
  width_padded = ((width - 1) // 32 + 1) * 32
479
  padding_values = calculate_padding(height, width, height_padded, width_padded)
480
+ print(f"[DEBUG] Dimensões: ({height},{width}) -> pad ({height_padded},{width_padded}); padding={padding_values}")
481
 
482
  generator = torch.Generator(device=self.device).manual_seed(used_seed)
483
  conditioning_items = []
 
493
  end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
494
  last_frame_index = actual_num_frames - 1
495
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
496
+ print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
497
 
498
  call_kwargs = {
499
  "prompt": prompt,
 
503
  "num_frames": actual_num_frames,
504
  "frame_rate": int(FPS),
505
  "generator": generator,
506
+ "output_type": "latent" if external_decode else "pt",
507
  "conditioning_items": conditioning_items if conditioning_items else None,
508
  "media_items": None,
509
  "decode_timestep": self.config["decode_timestep"],
 
517
  "enhance_prompt": False,
518
  "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
519
  }
520
+ print(f"[DEBUG] call_kwargs.output_type={call_kwargs['output_type']} skip_layer_strategy={call_kwargs['skip_layer_strategy']}")
521
 
522
  if mode == "video-to-video":
523
+ media = load_media_file(
524
  media_path=input_video_filepath,
525
  height=height,
526
  width=width,
527
  max_frames=int(frames_to_use),
528
  padding=padding_values,
529
  ).to(self.device)
530
+ call_kwargs["media_items"] = media
531
+ print(f"[DEBUG] media_items shape={tuple(media.shape)}")
532
 
533
  latents = None
534
  result_tensor = None
535
  multi_scale_pipeline = None
536
 
537
+ try:
538
+ if improve_texture:
539
+ if not self.latent_upsampler:
540
+ raise ValueError("Upscaler espacial não carregado.")
541
+ print("[DEBUG] Multi-escala: construindo pipeline...")
542
+ multi_scale_pipeline = LTXMultiScalePipeline(self.pipeline, self.latent_upsampler)
543
+ first_pass_args = self.config.get("first_pass", {}).copy()
544
+ first_pass_args["guidance_scale"] = float(guidance_scale)
545
+ second_pass_args = self.config.get("second_pass", {}).copy()
546
+ second_pass_args["guidance_scale"] = float(guidance_scale)
547
+
548
+ multi_scale_call_kwargs = call_kwargs.copy()
549
+ multi_scale_call_kwargs.update(
550
+ {
551
+ "downscale_factor": self.config["downscale_factor"],
552
+ "first_pass": first_pass_args,
553
+ "second_pass": second_pass_args,
554
+ }
555
+ )
556
+ print("[DEBUG] Chamando multi_scale_pipeline...")
557
+ t_ms = time.perf_counter()
558
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
559
+ with ctx:
560
+ result = multi_scale_pipeline(**multi_scale_call_kwargs)
561
+ print(f"[DEBUG] multi_scale_pipeline tempo={time.perf_counter()-t_ms:.3f}s")
562
+
563
+ if external_decode:
564
+ if hasattr(result, "latents"):
565
+ latents = result.latents
566
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
567
+ latents = result.images
568
+ else:
569
+ latents = result
570
+ print(f"[DEBUG] Latentes obtidos (multi-escala): shape={tuple(latents.shape)}")
571
+ else:
572
+ result_tensor = result.images if hasattr(result, "images") else result
573
+ print(f"[DEBUG] Pixels obtidos (multi-escala): shape={tuple(result_tensor.shape)}")
574
+ log_tensor_info(result_tensor, "Saída Multi-Scale (pixel)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  else:
576
+ single_pass_kwargs = call_kwargs.copy()
577
+ first_pass_config = self.config.get("first_pass", {})
578
+ single_pass_kwargs.update(
579
+ {
580
+ "guidance_scale": float(guidance_scale),
581
+ "stg_scale": first_pass_config.get("stg_scale"),
582
+ "rescaling_scale": first_pass_config.get("rescaling_scale"),
583
+ "skip_block_list": first_pass_config.get("skip_block_list"),
584
+ }
585
+ )
586
+ schedule = first_pass_config.get("timesteps")
587
+ if schedule is None:
588
+ schedule = first_pass_config.get("guidance_timesteps")
589
+ if mode == "video-to-video":
590
+ schedule = [0.7]
591
+ print("[INFO] Modo video-to-video (etapa única): timesteps=[0.7]")
592
+ if isinstance(schedule, (list, tuple)) and len(schedule) > 0:
593
+ single_pass_kwargs["timesteps"] = schedule
594
+ single_pass_kwargs["guidance_timesteps"] = schedule
595
+ print(f"[DEBUG] Single-pass: timesteps_len={len(schedule) if schedule else 0}")
596
+
597
+ print("\n[INFO] Executando pipeline de etapa única...")
598
+ t_sp = time.perf_counter()
599
+ ctx = torch.autocast(device_type="cuda", dtype=self.runtime_autocast_dtype) if self.device == "cuda" else contextlib.nullcontext()
600
+ with ctx:
601
+ result = self.pipeline(**single_pass_kwargs)
602
+ print(f"[DEBUG] single-pass tempo={time.perf_counter()-t_sp:.3f}s")
603
+
604
+ if external_decode:
605
+ if hasattr(result, "latents"):
606
+ latents = result.latents
607
+ elif hasattr(result, "images") and isinstance(result.images, torch.Tensor):
608
+ latents = result.images
609
+ else:
610
+ latents = result
611
+ print(f"[DEBUG] Latentes obtidos (single-pass): shape={tuple(latents.shape)}")
612
+ else:
613
+ result_tensor = result.images if hasattr(result, "images") else result
614
+ print(f"[DEBUG] Pixels obtidos (single-pass): shape={tuple(result_tensor.shape)}")
615
 
616
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_")
617
+ self._register_tmp_dir(temp_dir)
618
+ results_dir = "/app/output"
619
+ os.makedirs(results_dir, exist_ok=True)
620
+ output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
621
+ final_output_path = None
622
 
 
 
 
623
  if external_decode:
624
+ print("[DEBUG] Iniciando decodificação de latentes MP4...")
625
  self._decode_latents_to_video(
626
  latents=latents,
627
  output_video_path=output_video_path,
 
630
  progress_callback=progress_callback,
631
  )
632
  else:
633
+ print("[DEBUG] Escrevendo vídeo a partir de pixels (sem latentes)...")
 
 
 
 
 
 
634
  with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8) as writer:
635
+ T = result_tensor.shape[^21_2]
636
  for i in range(T):
637
  frame_chw = result_tensor[0, :, i]
638
  frame_hwc_u8 = (frame_chw.permute(1, 2, 0)
 
644
  writer.append_data(frame_hwc_u8)
645
  if progress_callback:
646
  progress_callback(i + 1, T)
647
+ if i % self.frame_log_every == 0:
648
+ print(f"[DEBUG] frame {i}/{T} escrito (pixel).")
649
 
650
  candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
651
  try:
652
  shutil.move(output_video_path, candidate_final)
653
  final_output_path = candidate_final
654
+ print(f"[DEBUG] MP4 movido para {final_output_path}")
655
+ except Exception as e:
656
  final_output_path = output_video_path
657
+ print(f"[DEBUG] Falha no move; usando tmp como final: {e}")
658
 
659
+ self._register_tmp_file(output_video_path)
660
  self._log_gpu_memory("Fim da Geração")
661
+ print(f"[DEBUG] generate() fim ok. total_time={time.perf_counter()-t_all:.3f}s")
662
  return final_output_path, used_seed
663
+
664
+ except Exception as e:
665
+ print("[DEBUG] EXCEÇÃO NA GERAÇÃO:")
666
+ print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
667
+ raise
668
  finally:
669
  try:
670
  del latents
 
687
  torch.cuda.ipc_collect()
688
  except Exception:
689
  pass
690
+ except Exception as e:
691
+ print(f"[DEBUG] Limpeza GPU no finally falhou: {e}")
692
 
693
  try:
694
+ self.finalize(keep_paths=[])
695
+ except Exception as e:
696
+ print(f"[DEBUG] finalize() no finally falhou: {e}")
697
 
698
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
699
  video_generation_service = VideoService()