EuuIia commited on
Commit
bd507dd
·
verified ·
1 Parent(s): 86c2fc6

Update video_service.py

Browse files
Files changed (1) hide show
  1. video_service.py +201 -103
video_service.py CHANGED
@@ -14,6 +14,8 @@ import tempfile
14
  from huggingface_hub import hf_hub_download
15
  import sys
16
  import subprocess
 
 
17
 
18
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
19
 
@@ -23,7 +25,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
23
  import pynvml as nvml
24
  nvml.nvmlInit()
25
  handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
26
- # Try v3, then fall back to the generic name if binding differs
27
  try:
28
  procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
29
  except Exception:
@@ -33,7 +34,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
33
  pid = int(p.pid)
34
  used_mb = None
35
  try:
36
- # NVML returns bytes; some bindings may use NVML_VALUE_NOT_AVAILABLE
37
  if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
38
  used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
39
  except Exception:
@@ -53,7 +53,6 @@ def _query_gpu_processes_via_nvml(device_index: int) -> List[Dict]:
53
  return []
54
 
55
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
56
- # CSV, no header, no units gives lines: "PID,process_name,used_memory"
57
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
58
  try:
59
  out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
@@ -82,7 +81,6 @@ def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
82
  def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
83
  if not processes:
84
  return " - Processos ativos: (nenhum)\n"
85
- # sort by used_mb desc, then pid
86
  processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
87
  lines = [" - Processos ativos (PID | USER | NAME | VRAM MB):"]
88
  for p in processes:
@@ -91,36 +89,6 @@ def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
91
  lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
92
  return "\n".join(lines) + "\n"
93
 
94
- # Integração no método existente:
95
- def _log_gpu_memory(self, stage_name: str):
96
- import torch
97
- if self.device != "cuda":
98
- return
99
- device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
100
- current_reserved_b = torch.cuda.memory_reserved(device_index)
101
- current_reserved_mb = current_reserved_b / (1024 ** 2)
102
- total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
103
- total_memory_mb = total_memory_b / (1024 ** 2)
104
- peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
105
- delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
106
-
107
- # Coleta de processos: tenta NVML, depois fallback para nvidia-smi
108
- processes = _query_gpu_processes_via_nvml(device_index)
109
- if not processes:
110
- processes = _query_gpu_processes_via_nvidiasmi(device_index)
111
-
112
- print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
113
- print(f" - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
114
- print(f" - Variação desde o último log: {delta_mb:+.2f} MB")
115
- if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
116
- print(f" - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
117
- # Imprime tabela de processos
118
- print(_gpu_process_table(processes, os.getpid()), end="")
119
- print("--------------------------------------------------\n")
120
- self.last_memory_reserved_mb = current_reserved_mb
121
-
122
-
123
-
124
  def run_setup():
125
  """Executa o script setup.py para clonar as dependências necessárias."""
126
  setup_script_path = "setup.py"
@@ -151,9 +119,12 @@ add_deps_to_path()
151
 
152
  # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
153
  from inference import (
154
- create_ltx_video_pipeline, create_latent_upsampler,
155
- load_image_to_tensor_with_resize_and_crop, seed_everething,
156
- calculate_padding, load_media_file
 
 
 
157
  )
158
  from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
159
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
@@ -175,15 +146,13 @@ def log_tensor_info(tensor, name="Tensor"):
175
  print(" - O tensor está vazio, sem estatísticas.")
176
  print("------------------------------------------\n")
177
 
178
-
179
-
180
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
181
  class VideoService:
182
  def __init__(self):
183
  print("Inicializando VideoService...")
184
  self.config = self._load_config()
185
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
186
- self.last_memory_reserved_mb = 0
187
  self._tmp_dirs = set()
188
  self._tmp_files = set()
189
  self._last_outputs = []
@@ -196,25 +165,53 @@ class VideoService:
196
  torch.cuda.empty_cache()
197
  self._log_gpu_memory("Após carregar modelos")
198
  print("VideoService pronto para uso.")
199
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  def _register_tmp_dir(self, d: str):
201
- if d and os.path.isdir(d):
202
- self._tmp_dirs.add(d)
 
 
 
203
 
204
  def _register_tmp_file(self, f: str):
205
- if f and os.path.isfile(f):
206
- self._tmp_files.add(f)
 
 
 
207
 
208
  def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
209
  """
210
- Remove temporários e coleta memória.
211
  keep_paths: caminhos que não devem ser removidos (ex.: vídeo final).
212
  extra_paths: caminhos adicionais para tentar remover (opcional).
213
  """
214
  keep = set(keep_paths or [])
215
  extras = set(extra_paths or [])
216
 
217
- # Remoção de arquivos
218
  for f in list(self._tmp_files | extras):
219
  try:
220
  if f not in keep and os.path.isfile(f):
@@ -224,7 +221,6 @@ class VideoService:
224
  finally:
225
  self._tmp_files.discard(f)
226
 
227
- # Remoção de diretórios
228
  for d in list(self._tmp_dirs):
229
  try:
230
  if d not in keep and os.path.isdir(d):
@@ -234,13 +230,10 @@ class VideoService:
234
  finally:
235
  self._tmp_dirs.discard(d)
236
 
237
- # Coleta de GC e limpeza de VRAM
238
  gc.collect()
239
  try:
240
- import torch
241
  if clear_gpu and torch.cuda.is_available():
242
  torch.cuda.empty_cache()
243
- # Limpa buffers de IPC quando aplicável
244
  try:
245
  torch.cuda.ipc_collect()
246
  except Exception:
@@ -248,13 +241,11 @@ class VideoService:
248
  except Exception:
249
  pass
250
 
251
- # Log opcional pós-limpeza
252
  try:
253
  self._log_gpu_memory("Após finalize")
254
  except Exception:
255
  pass
256
-
257
-
258
  def _load_config(self):
259
  config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml"
260
  with open(config_file_path, "r") as file:
@@ -262,28 +253,68 @@ class VideoService:
262
 
263
  def _load_models(self):
264
  LTX_REPO = "Lightricks/LTX-Video"
265
- distilled_model_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["checkpoint_path"], local_dir=os.getenv("HF_HOME"), cache_dir=os.getenv("HF_HOME_CACHE"), token=os.getenv("HF_TOKEN"))
 
 
 
 
 
 
266
  self.config["checkpoint_path"] = distilled_model_path
267
- spatial_upscaler_path = hf_hub_download(repo_id=LTX_REPO, filename=self.config["spatial_upscaler_model_path"], local_dir=os.getenv("HF_HOME"), cache_dir=os.getenv("HF_HOME_CACHE"), token=os.getenv("HF_TOKEN"))
 
 
 
 
 
 
 
268
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
269
- pipeline = create_ltx_video_pipeline(ckpt_path=self.config["checkpoint_path"], precision=self.config["precision"], text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"], sampler=self.config["sampler"], device="cpu", enhance_prompt=False, prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"], prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"])
 
 
 
 
 
 
 
 
 
 
 
270
  latent_upsampler = None
271
  if self.config.get("spatial_upscaler_model_path"):
272
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
 
273
  return pipeline, latent_upsampler
274
-
275
  def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
276
  tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
277
  tensor = torch.nn.functional.pad(tensor, padding_values)
278
  return tensor.to(self.device)
279
 
280
- def generate(self, prompt, negative_prompt, mode="text-to-video",
281
- start_image_filepath=None,
282
- middle_image_filepath=None, middle_frame_number=None, middle_image_weight=1.0,
283
- end_image_filepath=None, end_image_weight=1.0,
284
- input_video_filepath=None, height=512, width=704, duration=2.0,
285
- frames_to_use=9, seed=42, randomize_seed=True, guidance_scale=3.0,
286
- improve_texture=True, progress_callback=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  if self.device == "cuda":
288
  torch.cuda.empty_cache()
289
  torch.cuda.reset_peak_memory_stats()
@@ -302,14 +333,14 @@ class VideoService:
302
  target_frames_rounded = round(duration * FPS)
303
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
304
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
305
-
306
  height_padded = ((height - 1) // 32 + 1) * 32
307
  width_padded = ((width - 1) // 32 + 1) * 32
308
  padding_values = calculate_padding(height, width, height_padded, width_padded)
309
-
310
  generator = torch.Generator(device=self.device).manual_seed(used_seed)
311
-
312
  conditioning_items = []
 
313
  if mode == "image-to-video":
314
  start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
315
  conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
@@ -323,22 +354,41 @@ class VideoService:
323
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
324
 
325
  call_kwargs = {
326
- "prompt": prompt, "negative_prompt": negative_prompt, "height": height_padded, "width": width_padded,
327
- "num_frames": actual_num_frames, "frame_rate": int(FPS), "generator": generator, "output_type": "pt",
328
- "conditioning_items": conditioning_items if conditioning_items else None,
 
 
 
 
 
 
329
  "media_items": None,
330
- "decode_timestep": self.config["decode_timestep"], "decode_noise_scale": self.config["decode_noise_scale"],
331
- "stochastic_sampling": self.config["stochastic_sampling"], "image_cond_noise_scale": 0.15,
332
- "is_video": True, "vae_per_channel_normalize": True,
 
 
 
333
  "mixed_precision": (self.config["precision"] == "mixed_precision"),
334
- "offload_to_cpu": False, "enhance_prompt": False,
335
- "skip_layer_strategy": SkipLayerStrategy.AttentionValues
 
336
  }
337
 
338
  if mode == "video-to-video":
339
- call_kwargs["media_items"] = load_media_file(media_path=input_video_filepath, height=height, width=width, max_frames=int(frames_to_use), padding=padding_values).to(self.device)
 
 
 
 
 
 
340
 
341
  result_tensor = None
 
 
 
342
  if improve_texture:
343
  if not self.latent_upsampler:
344
  raise ValueError("Upscaler espacial não carregado.")
@@ -347,53 +397,101 @@ class VideoService:
347
  first_pass_args["guidance_scale"] = float(guidance_scale)
348
  second_pass_args = self.config.get("second_pass", {}).copy()
349
  second_pass_args["guidance_scale"] = float(guidance_scale)
 
350
  multi_scale_call_kwargs = call_kwargs.copy()
351
- multi_scale_call_kwargs.update({"downscale_factor": self.config["downscale_factor"], "first_pass": first_pass_args, "second_pass": second_pass_args})
 
 
 
 
 
 
352
  result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
353
  log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
354
  else:
355
  single_pass_kwargs = call_kwargs.copy()
356
  first_pass_config = self.config.get("first_pass", {})
357
- single_pass_kwargs.update({
358
- "guidance_scale": float(guidance_scale),
359
- "stg_scale": first_pass_config.get("stg_scale"),
360
- "rescaling_scale": first_pass_config.get("rescaling_scale"),
361
- "skip_block_list": first_pass_config.get("skip_block_list"),
362
- })
363
-
364
- # --- <INÍCIO DA CORREÇÃO> ---
365
  if mode == "video-to-video":
366
- single_pass_kwargs["timesteps"] = [0.7] # CORRIGIDO: Passar como uma lista
367
  print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
368
  else:
369
  single_pass_kwargs["timesteps"] = first_pass_config.get("timesteps")
370
- # --- <FIM DA CORREÇÃO> ---
371
-
372
  print("\n[INFO] Executando pipeline de etapa única...")
373
  result_tensor = self.pipeline(**single_pass_kwargs).images
374
-
375
  pad_left, pad_right, pad_top, pad_bottom = padding_values
376
  slice_h_end = -pad_bottom if pad_bottom > 0 else None
377
  slice_w_end = -pad_right if pad_right > 0 else None
378
-
379
  result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
380
  log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
381
 
382
  video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
383
- temp_dir = tempfile.mkdtemp()
 
 
 
 
 
 
384
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
- with imageio.get_writer(output_video_path, fps=call_kwargs["frame_rate"], codec='libx264', quality=8) as writer:
387
- total_frames = len(video_np)
388
- for i, frame in enumerate(video_np):
389
- writer.append_data(frame)
390
- if progress_callback:
391
- progress_callback(i + 1, total_frames)
392
-
393
- self._log_gpu_memory("Fim da Geração")
 
 
394
 
395
- finalize()
396
- return output_video_path, used_seed
 
 
397
 
398
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
399
- video_generation_service = VideoService()
 
14
  from huggingface_hub import hf_hub_download
15
  import sys
16
  import subprocess
17
+ import gc
18
+ import shutil
19
 
20
  # --- 2. GERENCIAMENTO DE DEPENDÊNCIAS E SETUP ---
21
 
 
25
  import pynvml as nvml
26
  nvml.nvmlInit()
27
  handle = nvml.nvmlDeviceGetHandleByIndex(device_index)
 
28
  try:
29
  procs = nvml.nvmlDeviceGetComputeRunningProcesses_v3(handle)
30
  except Exception:
 
34
  pid = int(p.pid)
35
  used_mb = None
36
  try:
 
37
  if getattr(p, "usedGpuMemory", None) is not None and p.usedGpuMemory not in (0,):
38
  used_mb = max(0, int(p.usedGpuMemory) // (1024 * 1024))
39
  except Exception:
 
53
  return []
54
 
55
  def _query_gpu_processes_via_nvidiasmi(device_index: int) -> List[Dict]:
 
56
  cmd = f"nvidia-smi -i {device_index} --query-compute-apps=pid,process_name,used_memory --format=csv,noheader,nounits"
57
  try:
58
  out = subprocess.check_output(shlex.split(cmd), stderr=subprocess.STDOUT, text=True, timeout=2.0)
 
81
  def _gpu_process_table(processes: List[Dict], current_pid: int) -> str:
82
  if not processes:
83
  return " - Processos ativos: (nenhum)\n"
 
84
  processes = sorted(processes, key=lambda x: (x.get("used_mb") or 0), reverse=True)
85
  lines = [" - Processos ativos (PID | USER | NAME | VRAM MB):"]
86
  for p in processes:
 
89
  lines.append(f" {star} {p['pid']} | {p['user']} | {p['name']} | {used_str}")
90
  return "\n".join(lines) + "\n"
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def run_setup():
93
  """Executa o script setup.py para clonar as dependências necessárias."""
94
  setup_script_path = "setup.py"
 
119
 
120
  # --- 3. IMPORTAÇÕES ESPECÍFICAS DO MODELO ---
121
  from inference import (
122
+ create_ltx_video_pipeline,
123
+ create_latent_upsampler,
124
+ load_image_to_tensor_with_resize_and_crop,
125
+ seed_everething,
126
+ calculate_padding,
127
+ load_media_file,
128
  )
129
  from ltx_video.pipelines.pipeline_ltx_video import ConditioningItem, LTXMultiScalePipeline
130
  from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
 
146
  print(" - O tensor está vazio, sem estatísticas.")
147
  print("------------------------------------------\n")
148
 
 
 
149
  # --- 5. CLASSE PRINCIPAL DO SERVIÇO ---
150
  class VideoService:
151
  def __init__(self):
152
  print("Inicializando VideoService...")
153
  self.config = self._load_config()
154
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
155
+ self.last_memory_reserved_mb = 0.0
156
  self._tmp_dirs = set()
157
  self._tmp_files = set()
158
  self._last_outputs = []
 
165
  torch.cuda.empty_cache()
166
  self._log_gpu_memory("Após carregar modelos")
167
  print("VideoService pronto para uso.")
168
+
169
+ # Método de log de GPU como parte da classe
170
+ def _log_gpu_memory(self, stage_name: str):
171
+ if self.device != "cuda":
172
+ return
173
+ device_index = torch.cuda.current_device() if torch.cuda.is_available() else 0
174
+ current_reserved_b = torch.cuda.memory_reserved(device_index)
175
+ current_reserved_mb = current_reserved_b / (1024 ** 2)
176
+ total_memory_b = torch.cuda.get_device_properties(device_index).total_memory
177
+ total_memory_mb = total_memory_b / (1024 ** 2)
178
+ peak_reserved_mb = torch.cuda.max_memory_reserved(device_index) / (1024 ** 2)
179
+ delta_mb = current_reserved_mb - getattr(self, "last_memory_reserved_mb", 0.0)
180
+ processes = _query_gpu_processes_via_nvml(device_index)
181
+ if not processes:
182
+ processes = _query_gpu_processes_via_nvidiasmi(device_index)
183
+ print(f"\n--- [LOG DE MEMÓRIA GPU] - {stage_name} (cuda:{device_index}) ---")
184
+ print(f" - Uso Atual (Reservado): {current_reserved_mb:.2f} MB / {total_memory_mb:.2f} MB")
185
+ print(f" - Variação desde o último log: {delta_mb:+.2f} MB")
186
+ if peak_reserved_mb > getattr(self, "last_memory_reserved_mb", 0.0):
187
+ print(f" - Pico de Uso (nesta operação): {peak_reserved_mb:.2f} MB")
188
+ print(_gpu_process_table(processes, os.getpid()), end="")
189
+ print("--------------------------------------------------\n")
190
+ self.last_memory_reserved_mb = current_reserved_mb
191
+
192
  def _register_tmp_dir(self, d: str):
193
+ try:
194
+ if d and os.path.isdir(d):
195
+ self._tmp_dirs.add(d)
196
+ except Exception:
197
+ pass
198
 
199
  def _register_tmp_file(self, f: str):
200
+ try:
201
+ if f and os.path.isfile(f):
202
+ self._tmp_files.add(f)
203
+ except Exception:
204
+ pass
205
 
206
  def finalize(self, keep_paths=None, extra_paths=None, clear_gpu=True):
207
  """
208
+ Remove temporários e coleta memória.
209
  keep_paths: caminhos que não devem ser removidos (ex.: vídeo final).
210
  extra_paths: caminhos adicionais para tentar remover (opcional).
211
  """
212
  keep = set(keep_paths or [])
213
  extras = set(extra_paths or [])
214
 
 
215
  for f in list(self._tmp_files | extras):
216
  try:
217
  if f not in keep and os.path.isfile(f):
 
221
  finally:
222
  self._tmp_files.discard(f)
223
 
 
224
  for d in list(self._tmp_dirs):
225
  try:
226
  if d not in keep and os.path.isdir(d):
 
230
  finally:
231
  self._tmp_dirs.discard(d)
232
 
 
233
  gc.collect()
234
  try:
 
235
  if clear_gpu and torch.cuda.is_available():
236
  torch.cuda.empty_cache()
 
237
  try:
238
  torch.cuda.ipc_collect()
239
  except Exception:
 
241
  except Exception:
242
  pass
243
 
 
244
  try:
245
  self._log_gpu_memory("Após finalize")
246
  except Exception:
247
  pass
248
+
 
249
  def _load_config(self):
250
  config_file_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled.yaml"
251
  with open(config_file_path, "r") as file:
 
253
 
254
  def _load_models(self):
255
  LTX_REPO = "Lightricks/LTX-Video"
256
+ distilled_model_path = hf_hub_download(
257
+ repo_id=LTX_REPO,
258
+ filename=self.config["checkpoint_path"],
259
+ local_dir=os.getenv("HF_HOME"),
260
+ cache_dir=os.getenv("HF_HOME_CACHE"),
261
+ token=os.getenv("HF_TOKEN"),
262
+ )
263
  self.config["checkpoint_path"] = distilled_model_path
264
+
265
+ spatial_upscaler_path = hf_hub_download(
266
+ repo_id=LTX_REPO,
267
+ filename=self.config["spatial_upscaler_model_path"],
268
+ local_dir=os.getenv("HF_HOME"),
269
+ cache_dir=os.getenv("HF_HOME_CACHE"),
270
+ token=os.getenv("HF_TOKEN"),
271
+ )
272
  self.config["spatial_upscaler_model_path"] = spatial_upscaler_path
273
+
274
+ pipeline = create_ltx_video_pipeline(
275
+ ckpt_path=self.config["checkpoint_path"],
276
+ precision=self.config["precision"],
277
+ text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
278
+ sampler=self.config["sampler"],
279
+ device="cpu",
280
+ enhance_prompt=False,
281
+ prompt_enhancer_image_caption_model_name_or_path=self.config["prompt_enhancer_image_caption_model_name_or_path"],
282
+ prompt_enhancer_llm_model_name_or_path=self.config["prompt_enhancer_llm_model_name_or_path"],
283
+ )
284
+
285
  latent_upsampler = None
286
  if self.config.get("spatial_upscaler_model_path"):
287
  latent_upsampler = create_latent_upsampler(self.config["spatial_upscaler_model_path"], device="cpu")
288
+
289
  return pipeline, latent_upsampler
290
+
291
  def _prepare_conditioning_tensor(self, filepath, height, width, padding_values):
292
  tensor = load_image_to_tensor_with_resize_and_crop(filepath, height, width)
293
  tensor = torch.nn.functional.pad(tensor, padding_values)
294
  return tensor.to(self.device)
295
 
296
+ def generate(
297
+ self,
298
+ prompt,
299
+ negative_prompt,
300
+ mode="text-to-video",
301
+ start_image_filepath=None,
302
+ middle_image_filepath=None,
303
+ middle_frame_number=None,
304
+ middle_image_weight=1.0,
305
+ end_image_filepath=None,
306
+ end_image_weight=1.0,
307
+ input_video_filepath=None,
308
+ height=512,
309
+ width=704,
310
+ duration=2.0,
311
+ frames_to_use=9,
312
+ seed=42,
313
+ randomize_seed=True,
314
+ guidance_scale=3.0,
315
+ improve_texture=True,
316
+ progress_callback=None,
317
+ ):
318
  if self.device == "cuda":
319
  torch.cuda.empty_cache()
320
  torch.cuda.reset_peak_memory_stats()
 
333
  target_frames_rounded = round(duration * FPS)
334
  n_val = round((float(target_frames_rounded) - 1.0) / 8.0)
335
  actual_num_frames = max(9, min(MAX_NUM_FRAMES, int(n_val * 8 + 1)))
336
+
337
  height_padded = ((height - 1) // 32 + 1) * 32
338
  width_padded = ((width - 1) // 32 + 1) * 32
339
  padding_values = calculate_padding(height, width, height_padded, width_padded)
340
+
341
  generator = torch.Generator(device=self.device).manual_seed(used_seed)
 
342
  conditioning_items = []
343
+
344
  if mode == "image-to-video":
345
  start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
346
  conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
 
354
  conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
355
 
356
  call_kwargs = {
357
+ "prompt": prompt,
358
+ "negative_prompt": negative_prompt,
359
+ "height": height_padded,
360
+ "width": width_padded,
361
+ "num_frames": actual_num_frames,
362
+ "frame_rate": int(FPS),
363
+ "generator": generator,
364
+ "output_type": "pt",
365
+ "conditioning_items": conditioning_items if conditioning_items else None,
366
  "media_items": None,
367
+ "decode_timestep": self.config["decode_timestep"],
368
+ "decode_noise_scale": self.config["decode_noise_scale"],
369
+ "stochastic_sampling": self.config["stochastic_sampling"],
370
+ "image_cond_noise_scale": 0.15,
371
+ "is_video": True,
372
+ "vae_per_channel_normalize": True,
373
  "mixed_precision": (self.config["precision"] == "mixed_precision"),
374
+ "offload_to_cpu": False,
375
+ "enhance_prompt": False,
376
+ "skip_layer_strategy": SkipLayerStrategy.AttentionValues,
377
  }
378
 
379
  if mode == "video-to-video":
380
+ call_kwargs["media_items"] = load_media_file(
381
+ media_path=input_video_filepath,
382
+ height=height,
383
+ width=width,
384
+ max_frames=int(frames_to_use),
385
+ padding=padding_values,
386
+ ).to(self.device)
387
 
388
  result_tensor = None
389
+ video_np = None
390
+ multi_scale_pipeline = None
391
+
392
  if improve_texture:
393
  if not self.latent_upsampler:
394
  raise ValueError("Upscaler espacial não carregado.")
 
397
  first_pass_args["guidance_scale"] = float(guidance_scale)
398
  second_pass_args = self.config.get("second_pass", {}).copy()
399
  second_pass_args["guidance_scale"] = float(guidance_scale)
400
+
401
  multi_scale_call_kwargs = call_kwargs.copy()
402
+ multi_scale_call_kwargs.update(
403
+ {
404
+ "downscale_factor": self.config["downscale_factor"],
405
+ "first_pass": first_pass_args,
406
+ "second_pass": second_pass_args,
407
+ }
408
+ )
409
  result_tensor = multi_scale_pipeline(**multi_scale_call_kwargs).images
410
  log_tensor_info(result_tensor, "Resultado da Etapa 2 (Saída do Pipeline Multi-Scale)")
411
  else:
412
  single_pass_kwargs = call_kwargs.copy()
413
  first_pass_config = self.config.get("first_pass", {})
414
+ single_pass_kwargs.update(
415
+ {
416
+ "guidance_scale": float(guidance_scale),
417
+ "stg_scale": first_pass_config.get("stg_scale"),
418
+ "rescaling_scale": first_pass_config.get("rescaling_scale"),
419
+ "skip_block_list": first_pass_config.get("skip_block_list"),
420
+ }
421
+ )
422
  if mode == "video-to-video":
423
+ single_pass_kwargs["timesteps"] = [0.7]
424
  print("[INFO] Modo video-to-video (etapa única): definindo timesteps (força) para [0.7]")
425
  else:
426
  single_pass_kwargs["timesteps"] = first_pass_config.get("timesteps")
427
+
 
428
  print("\n[INFO] Executando pipeline de etapa única...")
429
  result_tensor = self.pipeline(**single_pass_kwargs).images
430
+
431
  pad_left, pad_right, pad_top, pad_bottom = padding_values
432
  slice_h_end = -pad_bottom if pad_bottom > 0 else None
433
  slice_w_end = -pad_right if pad_right > 0 else None
 
434
  result_tensor = result_tensor[:, :, :actual_num_frames, pad_top:slice_h_end, pad_left:slice_w_end]
435
  log_tensor_info(result_tensor, "Tensor Final (Após Pós-processamento, Antes de Salvar)")
436
 
437
  video_np = (result_tensor[0].permute(1, 2, 3, 0).cpu().float().numpy() * 255).astype(np.uint8)
438
+
439
+ temp_dir = tempfile.mkdtemp(prefix="ltxv_")
440
+ self._register_tmp_dir(temp_dir)
441
+ results_dir = "/data/results"
442
+ os.makedirs(results_dir, exist_ok=True)
443
+
444
+ final_output_path = None
445
  output_video_path = os.path.join(temp_dir, f"output_{used_seed}.mp4")
446
+ try:
447
+ with imageio.get_writer(
448
+ output_video_path, fps=call_kwargs["frame_rate"], codec="libx264", quality=8
449
+ ) as writer:
450
+ total_frames = len(video_np)
451
+ for i, frame in enumerate(video_np):
452
+ writer.append_data(frame)
453
+ if progress_callback:
454
+ progress_callback(i + 1, total_frames)
455
+
456
+ candidate_final = os.path.join(results_dir, f"output_{used_seed}.mp4")
457
+ try:
458
+ shutil.move(output_video_path, candidate_final)
459
+ final_output_path = candidate_final
460
+ except Exception:
461
+ final_output_path = output_video_path
462
+ self._register_tmp_file(output_video_path)
463
+
464
+ self._log_gpu_memory("Fim da Geração")
465
+ return final_output_path, used_seed
466
+ finally:
467
+ try:
468
+ del result_tensor
469
+ except Exception:
470
+ pass
471
+ try:
472
+ del video_np
473
+ except Exception:
474
+ pass
475
+ try:
476
+ del multi_scale_pipeline
477
+ except Exception:
478
+ pass
479
 
480
+ gc.collect()
481
+ try:
482
+ if self.device == "cuda":
483
+ torch.cuda.empty_cache()
484
+ try:
485
+ torch.cuda.ipc_collect()
486
+ except Exception:
487
+ pass
488
+ except Exception:
489
+ pass
490
 
491
+ try:
492
+ self.finalize(keep_paths=[final_output_path] if final_output_path else [])
493
+ except Exception:
494
+ pass
495
 
496
  print("Criando instância do VideoService. O carregamento do modelo começará agora...")
497
+ video_generation_service = VideoService()