eeuuia commited on
Commit
e13ea4b
·
verified ·
1 Parent(s): 8569f9a

Upload 5 files

Browse files
api/ltx/ltx_aduc_manager.py CHANGED
@@ -1,7 +1,6 @@
1
  # FILE: api/ltx/ltx_aduc_manager.py
2
- # DESCRIPTION: A simplified, robust pool manager for a unified LTX worker.
3
- # This worker handles all tasks, including Transformer generation and VAE operations,
4
- # while still respecting the GPU separation defined by the GPUManager.
5
 
6
  import logging
7
  import torch
@@ -10,14 +9,11 @@ from pathlib import Path
10
  import threading
11
  import queue
12
  import time
13
- import yaml
14
- import os
15
- from huggingface_hub import hf_hub_download
16
- from typing import List, Optional, Callable, Any, Tuple, Dict
17
 
18
- # --- Importa o gerenciador de GPUs e o builder de baixo nível ---
 
19
  from managers.gpu_manager import gpu_manager
20
- from api.ltx.ltx_utils import build_complete_pipeline_on_cpu, create_transformer
21
 
22
  # --- Adiciona o path do LTX-Video para importação de tipos ---
23
  LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
@@ -28,138 +24,168 @@ def add_deps_to_path():
28
  add_deps_to_path()
29
 
30
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
 
31
 
32
  # ==============================================================================
33
- # --- FUNÇÃO DE ORQUESTRAÇÃO DA CONSTRUÇÃO (Interna ao Manager) ---
34
  # ==============================================================================
35
 
36
- def get_complete_pipeline() -> LTXVideoPipeline:
37
- """
38
- Orquestra a construção do pipeline LTX COMPLETO, incluindo o VAE, na CPU.
39
- """
40
- config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
41
- with open(config_path, "r") as file:
42
- config = yaml.safe_load(file)
43
-
44
- ckpt_path = hf_hub_download(
45
- repo_id="Lightricks/LTX-Video",
46
- filename=config["checkpoint_path"],
47
- cache_dir=os.environ.get("HF_HOME")
48
- )
49
- return build_complete_pipeline_on_cpu(ckpt_path, config)
50
-
51
- # ==============================================================================
52
- # --- CLASSE DE WORKER UNIFICADO ---
53
- # ==============================================================================
54
-
55
- class LTXWorker(threading.Thread):
56
- """
57
- Um worker unificado que gerencia uma instância completa do pipeline LTX.
58
- Ele carrega o modelo e distribui seus componentes (Transformer/VAE) para as GPUs corretas.
59
- """
60
- def __init__(self, worker_id: int):
61
  super().__init__()
62
  self.worker_id = worker_id
63
- self.pipeline: Optional[LTXVideoPipeline] = None
64
  self.is_healthy = False
65
  self.is_busy = False
66
- self.daemon = True
67
- self.autocast_dtype: torch.dtype = torch.float32
68
 
69
  def run(self):
70
- """Inicializa o worker: carrega o pipeline e o move para as GPUs."""
71
  try:
72
- self.pipeline = get_complete_pipeline()
73
- self._set_precision_policy()
74
-
75
- main_device = gpu_manager.get_ltx_device()
76
- vae_device = gpu_manager.get_ltx_vae_device()
77
-
78
- logging.info(f"[LTXWorker-{self.worker_id}] Moving components -> Main: {main_device}, VAE: {vae_device}")
79
- self.pipeline.to(main_device) # Move tudo para a GPU principal primeiro
80
- self.pipeline.vae.to(vae_device) # Move especificamente o VAE para sua GPU dedicada
81
-
82
  self.is_healthy = True
83
- logging.info(f"✅ LTXWorker {self.worker_id} is healthy. Main on {main_device}, VAE on {vae_device}.")
84
  except Exception:
85
  self.is_healthy = False
86
- logging.error(f"❌ LTXWorker {self.worker_id} FAILED to initialize!", exc_info=True)
 
 
 
 
87
 
88
- def _set_precision_policy(self):
89
- """Define a política de precisão para operações de autocast."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  try:
91
- config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
92
- with open(config_path, "r") as file: config = yaml.safe_load(file)
93
- precision = str(config.get("precision", "bfloat16")).lower()
94
- if precision in ["float8_e4m3fn", "bfloat16"]: self.autocast_dtype = torch.bfloat16
95
- elif precision == "mixed_precision": self.autocast_dtype = torch.float16
96
- except Exception:
97
- logging.warning(f"[LTXWorker-{self.worker_id}] Could not set precision policy, defaulting to float32.", exc_info=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
 
100
  self.is_busy = True
 
101
  try:
102
- # O job recebe o pipeline completo e o dtype para o autocast
103
- result = job_func(self.pipeline, self.autocast_dtype, *args, **kwargs)
104
  return result
105
- except Exception:
 
106
  self.is_healthy = False
107
  raise
108
  finally:
109
  self.is_busy = False
110
 
111
  # ==============================================================================
112
- # --- O GERENCIADOR DE POOL (SINGLETON) ---
113
  # ==============================================================================
114
  class LTXAducManager:
115
  _instance = None
116
  _initialized = False
117
 
118
  def __new__(cls, *args, **kwargs):
119
- if cls._instance is None: cls._instance = super().__new__(cls)
 
120
  return cls._instance
121
 
122
  def __init__(self):
123
  if self._initialized: return
124
 
125
- logging.info("🏭 Initializing Simplified Pool Manager for LTX...")
126
 
127
- self.workers: List[LTXWorker] = []
128
- self.job_queue = queue.Queue()
 
 
129
  self.pool_lock = threading.Lock()
130
 
131
  self._initialize_workers()
132
 
133
- self.dispatcher = threading.Thread(target=self._dispatch_jobs, daemon=True)
 
 
134
  self.health_monitor = threading.Thread(target=self._health_check_loop, daemon=True)
135
- self.dispatcher.start()
 
 
136
  self.health_monitor.start()
137
 
138
  self._initialized = True
139
- logging.info("✅ Simplified Pool Manager is running.")
140
 
141
  def _initialize_workers(self):
142
- with self.pool_lock:
143
- # Por enquanto, criamos um único worker unificado.
144
- # No futuro, este loop pode criar múltiplos workers se houver mais GPUs.
145
- worker = LTXWorker(worker_id=0)
146
- self.workers.append(worker)
147
- worker.start()
148
 
149
- def _get_available_worker(self) -> Optional[LTXWorker]:
150
  with self.pool_lock:
151
- for worker in self.workers:
152
- if worker.is_healthy and not worker.is_busy:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  return worker
154
  return None
155
 
156
- def _dispatch_jobs(self):
 
157
  while True:
158
- job_func, args, kwargs, future = self.job_queue.get()
159
  worker = None
160
  while worker is None:
161
- worker = self._get_available_worker()
162
- if worker is None: time.sleep(0.1)
 
 
163
  try:
164
  result = worker.execute(job_func, args, kwargs)
165
  future.put(result)
@@ -167,22 +193,51 @@ class LTXAducManager:
167
  future.put(e)
168
 
169
  def _health_check_loop(self):
 
170
  while True:
171
  time.sleep(30)
 
172
  with self.pool_lock:
173
- for i, worker in enumerate(self.workers):
174
  if not worker.is_alive() or not worker.is_healthy:
175
- logging.warning(f"LTX Worker {worker.worker_id} is UNHEALTHY. Restarting...")
176
- new_worker = LTXWorker(worker_id=worker.worker_id)
177
- self.workers[i] = new_worker
178
  new_worker.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- def submit_job(self, job_func: Callable, *args, **kwargs) -> Any:
181
- future = queue.Queue(1)
182
- self.job_queue.put((job_func, args, kwargs, future))
183
  result = future.get()
184
- if isinstance(result, Exception): raise result
 
 
 
185
  return result
186
 
 
187
  # --- INSTANCIAÇÃO GLOBAL ---
188
- ltx_aduc_manager = LTXAducManager()
 
 
 
 
 
 
1
  # FILE: api/ltx/ltx_aduc_manager.py
2
+ # DESCRIPTION: An advanced, fault-tolerant pool manager for LTX and VAE workers.
3
+ # It handles job queuing, load balancing, and health monitoring for production-grade stability.
 
4
 
5
  import logging
6
  import torch
 
9
  import threading
10
  import queue
11
  import time
12
+ from typing import List, Optional, Callable, Any, Tuple
 
 
 
13
 
14
+ # Imports dos builders e do gpu_manager
15
+ from api.ltx.ltx_utils import get_main_ltx_pipeline, get_main_vae
16
  from managers.gpu_manager import gpu_manager
 
17
 
18
  # --- Adiciona o path do LTX-Video para importação de tipos ---
19
  LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 
24
  add_deps_to_path()
25
 
26
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
27
+ from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
28
 
29
  # ==============================================================================
30
+ # --- CLASSES DE WORKER (Especialistas em Tarefas) ---
31
  # ==============================================================================
32
 
33
+ class BaseWorker(threading.Thread):
34
+ """Classe base para nossos workers com gerenciamento de estado e saúde."""
35
+ def __init__(self, worker_id: int, device: torch.device):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  super().__init__()
37
  self.worker_id = worker_id
38
+ self.device = device
39
  self.is_healthy = False
40
  self.is_busy = False
41
+ self.daemon = True # Permite que o programa principal saia
 
42
 
43
  def run(self):
44
+ """O loop de vida do worker, responsável por carregar os modelos."""
45
  try:
46
+ self._load_models()
 
 
 
 
 
 
 
 
 
47
  self.is_healthy = True
48
+ logging.info(f"✅ Worker {self.worker_id} ({self.__class__.__name__}) on {self.device} is healthy and ready.")
49
  except Exception:
50
  self.is_healthy = False
51
+ logging.error(f"❌ Worker {self.worker_id} on {self.device} FAILED to initialize!", exc_info=True)
52
+
53
+ def _load_models(self):
54
+ """Método a ser implementado pelas classes filhas."""
55
+ raise NotImplementedError
56
 
57
+ def get_status(self) -> Tuple[bool, bool]:
58
+ """Retorna (is_healthy, is_busy)."""
59
+ return self.is_healthy, self.is_busy
60
+
61
+ class LTXMainWorker(BaseWorker):
62
+ """Worker especialista para o pipeline principal do LTX."""
63
+ def __init__(self, worker_id: int, device: torch.device):
64
+ super().__init__(worker_id, device)
65
+ self.pipeline: Optional[LTXVideoPipeline] = None
66
+
67
+ def _load_models(self):
68
+ logging.info(f"[LTXWorker-{self.worker_id}] Loading models to CPU...")
69
+ self.pipeline = get_main_ltx_pipeline()
70
+ logging.info(f"[LTXWorker-{self.worker_id}] Moving pipeline to {self.device}...")
71
+ self.pipeline.to(self.device)
72
+
73
+ def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
74
+ """Executa um trabalho, gerenciando o estado 'busy'."""
75
+ self.is_busy = True
76
+ logging.info(f"Worker {self.worker_id} (LTX) starting job: {job_func.__name__}")
77
  try:
78
+ result = job_func(self.pipeline, *args, **kwargs)
79
+ logging.info(f"Worker {self.worker_id} (LTX) finished job successfully.")
80
+ return result
81
+ except Exception as e:
82
+ logging.error(f"Worker {self.worker_id} (LTX) job failed!", exc_info=True)
83
+ self.is_healthy = False # Falha em um job marca o worker como não saudável
84
+ raise
85
+ finally:
86
+ self.is_busy = False
87
+
88
+ class VAEWorker(BaseWorker):
89
+ """Worker especialista para o modelo VAE."""
90
+ def __init__(self, worker_id: int, device: torch.device):
91
+ super().__init__(worker_id, device)
92
+ self.vae: Optional[CausalVideoAutoencoder] = None
93
+
94
+ def _load_models(self):
95
+ logging.info(f"[VAEWorker-{self.worker_id}] Loading VAE model to CPU...")
96
+ self.vae = get_main_vae()
97
+ logging.info(f"[VAEWorker-{self.worker_id}] Moving VAE to {self.device}...")
98
+ self.vae.to(self.device)
99
+ self.vae.eval()
100
 
101
  def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
102
+ """Executa um trabalho, gerenciando o estado 'busy'."""
103
  self.is_busy = True
104
+ logging.info(f"Worker {self.worker_id} (VAE) starting job: {job_func.__name__}")
105
  try:
106
+ result = job_func(self.vae, *args, **kwargs)
107
+ logging.info(f"Worker {self.worker_id} (VAE) finished job successfully.")
108
  return result
109
+ except Exception as e:
110
+ logging.error(f"Worker {self.worker_id} (VAE) job failed!", exc_info=True)
111
  self.is_healthy = False
112
  raise
113
  finally:
114
  self.is_busy = False
115
 
116
  # ==============================================================================
117
+ # --- O GERENCIADOR DE POOL AVANÇADO (SINGLETON) ---
118
  # ==============================================================================
119
  class LTXAducManager:
120
  _instance = None
121
  _initialized = False
122
 
123
  def __new__(cls, *args, **kwargs):
124
+ if cls._instance is None:
125
+ cls._instance = super().__new__(cls)
126
  return cls._instance
127
 
128
  def __init__(self):
129
  if self._initialized: return
130
 
131
+ logging.info("🏭 Initializing Advanced Pool Manager for LTX...")
132
 
133
+ self.ltx_workers: List[LTXMainWorker] = []
134
+ self.vae_workers: List[VAEWorker] = []
135
+ self.ltx_job_queue = queue.Queue()
136
+ self.vae_job_queue = queue.Queue()
137
  self.pool_lock = threading.Lock()
138
 
139
  self._initialize_workers()
140
 
141
+ # Inicia threads consumidores para processar as filas
142
+ self.ltx_dispatcher = threading.Thread(target=self._dispatch_jobs, args=(self.ltx_job_queue, self.ltx_workers), daemon=True)
143
+ self.vae_dispatcher = threading.Thread(target=self._dispatch_jobs, args=(self.vae_job_queue, self.vae_workers), daemon=True)
144
  self.health_monitor = threading.Thread(target=self._health_check_loop, daemon=True)
145
+
146
+ self.ltx_dispatcher.start()
147
+ self.vae_dispatcher.start()
148
  self.health_monitor.start()
149
 
150
  self._initialized = True
151
+ logging.info("✅ Advanced Pool Manager is running with all threads started.")
152
 
153
  def _initialize_workers(self):
154
+ """Cria e inicia os workers com base nas GPUs alocadas."""
155
+ # Supondo que gpu_manager agora tenha get_ltx_devices() e get_seedvr_devices() que retornam listas
156
+ ltx_gpus = gpu_manager.get_ltx_device() # Ajuste se o nome for diferente
157
+ vae_gpus = gpu_manager.get_ltx_vae_device() # Ajuste se o nome for diferente
 
 
158
 
 
159
  with self.pool_lock:
160
+ for i, device_id in enumerate([ltx_gpus]): # Assumindo que retorna uma lista
161
+ worker = LTXMainWorker(worker_id=i, device=torch.device(f"cuda:{device_id}"))
162
+ self.ltx_workers.append(worker)
163
+ worker.start()
164
+
165
+ for i, device_id in enumerate([vae_gpus]): # Assumindo que retorna uma lista
166
+ worker = VAEWorker(worker_id=i, device=torch.device(f"cuda:{device_id}"))
167
+ self.vae_workers.append(worker)
168
+ worker.start()
169
+
170
+ def _get_available_worker(self, worker_pool: List[BaseWorker]) -> Optional[BaseWorker]:
171
+ """Encontra um worker saudável e desocupado no pool."""
172
+ with self.pool_lock:
173
+ for worker in worker_pool:
174
+ healthy, busy = worker.get_status()
175
+ if healthy and not busy:
176
  return worker
177
  return None
178
 
179
+ def _dispatch_jobs(self, job_queue: queue.Queue, worker_pool: List[BaseWorker]):
180
+ """Loop do thread consumidor que pega trabalhos da fila e os despacha."""
181
  while True:
182
+ job_func, args, kwargs, future = job_queue.get()
183
  worker = None
184
  while worker is None:
185
+ worker = self._get_available_worker(worker_pool)
186
+ if worker is None:
187
+ time.sleep(0.1) # Espera por um worker ficar livre
188
+
189
  try:
190
  result = worker.execute(job_func, args, kwargs)
191
  future.put(result)
 
193
  future.put(e)
194
 
195
  def _health_check_loop(self):
196
+ """Thread que periodicamente verifica e reinicia workers não saudáveis."""
197
  while True:
198
  time.sleep(30)
199
+ logging.debug("Running health check on all workers...")
200
  with self.pool_lock:
201
+ for i, worker in enumerate(self.ltx_workers):
202
  if not worker.is_alive() or not worker.is_healthy:
203
+ logging.warning(f"LTX Worker {worker.worker_id} on {worker.device} is UNHEALTHY. Restarting...")
204
+ new_worker = LTXMainWorker(worker.worker_id, worker.device)
205
+ self.ltx_workers[i] = new_worker
206
  new_worker.start()
207
+ # Repetir o laço para VAE workers
208
+ for i, worker in enumerate(self.vae_workers):
209
+ if not worker.is_alive() or not worker.is_healthy:
210
+ logging.warning(f"VAE Worker {worker.worker_id} on {worker.device} is UNHEALTHY. Restarting...")
211
+ new_worker = VAEWorker(worker.worker_id, worker.device)
212
+ self.vae_workers[i] = new_worker
213
+ new_worker.start()
214
+
215
+ def submit_job(self, job_type: str, job_func: Callable, *args, **kwargs) -> Any:
216
+ """
217
+ Ponto de entrada público para submeter um trabalho ao pool.
218
+ Esta função é síncrona: ela espera pelo resultado.
219
+ """
220
+ if job_type not in ['ltx', 'vae']:
221
+ raise ValueError("Invalid job_type. Must be 'ltx' or 'vae'.")
222
+
223
+ job_queue = self.ltx_job_queue if job_type == 'ltx' else self.vae_job_queue
224
+ future = queue.Queue() # Usamos uma fila como um 'future' para obter o resultado de volta
225
 
226
+ job_queue.put((job_func, args, kwargs, future))
227
+
228
+ # Bloqueia e espera pelo resultado ser colocado no 'future' pelo dispatcher
229
  result = future.get()
230
+
231
+ if isinstance(result, Exception):
232
+ raise result # Se o job falhou, re-lança a exceção no thread principal
233
+
234
  return result
235
 
236
+ # ==============================================================================
237
  # --- INSTANCIAÇÃO GLOBAL ---
238
+ # ==============================================================================
239
+ try:
240
+ ltx_aduc_manager = LTXAducManager()
241
+ except Exception as e:
242
+ logging.critical("CRITICAL ERROR: Failed to initialize the LTXAducManager pool.", exc_info=True)
243
+ ltx_aduc_manager = None
api/ltx/ltx_aduc_orchestrator.py CHANGED
@@ -1,26 +1,21 @@
1
  # FILE: api/ltx_aduc_orchestrator.py
2
  # DESCRIPTION: The main workflow orchestrator for the ADUC-SDR LTX suite.
3
- # In this simplified architecture, it coordinates a single unified client (LtxAducPipeline)
4
- # to execute the complete video generation pipeline from prompt to MP4.
5
 
6
  import logging
7
  import time
8
- import yaml
9
- import os
10
- import sys
11
  from PIL import Image
12
- from typing import Optional, Dict, Union
13
 
14
- # O Orquestrador importa o CLIENTE UNIFICADO que ele vai coordenar.
 
15
  from api.ltx.ltx_aduc_pipeline import ltx_aduc_pipeline
 
16
 
17
  # O Orquestrador importa as FERRAMENTAS de que precisa para as tarefas finais.
18
  from tools.video_encode_tool import video_encode_tool_singleton
19
 
20
- # Importa o Path para carregar a configuração.
21
- from pathlib import Path
22
- LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
23
-
24
  # ==============================================================================
25
  # --- A CLASSE ORQUESTRADORA (Cérebro do Workflow) ---
26
  # ==============================================================================
@@ -28,26 +23,16 @@ LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
28
  class LtxAducOrchestrator:
29
  """
30
  Orquestra o fluxo de trabalho completo de geração de vídeo,
31
- coordenando o cliente unificado LTX. É o ponto de entrada principal para a UI.
32
  """
33
  def __init__(self):
34
  """
35
- Inicializa o orquestrador, carregando a configuração base uma única vez.
 
36
  """
37
- self.output_dir = "/app/output"
38
- self.base_config = self._load_base_config()
39
  logging.info("✅ LTX ADUC Orchestrator initialized and ready.")
40
 
41
- def _load_base_config(self) -> Dict:
42
- """Carrega a configuração base do arquivo YAML, que contém os parâmetros padrão."""
43
- try:
44
- config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
45
- with open(config_path, "r") as file:
46
- return yaml.safe_load(file)
47
- except Exception as e:
48
- logging.error(f"Failed to load base config file. Orchestrator may not function correctly. Error: {e}")
49
- return {}
50
-
51
  def __call__(
52
  self,
53
  prompt: str,
@@ -59,80 +44,87 @@ class LtxAducOrchestrator:
59
  output_filename_base: str = "ltx_aduc_video"
60
  ) -> Optional[str]:
61
  """
62
- Ponto de entrada principal do Orquestrador. Executa o pipeline completo de geração de vídeo.
63
 
64
  Args:
65
- prompt (str): O prompt de texto completo, onde cada nova linha é uma cena.
66
- initial_image (Optional[Image.Image]): Imagem PIL para condicionar a primeira cena.
67
  height (int): Altura do vídeo final.
68
  width (int): Largura do vídeo final.
69
  duration_in_seconds (float): Duração total desejada do vídeo.
70
- ltx_configs (Optional[Dict]): Configurações avançadas da UI para sobrescrever os padrões.
71
- output_filename_base (str): Nome base para o arquivo de vídeo de saída.
72
 
73
  Returns:
74
- Optional[str]: O caminho para o arquivo .mp4 gerado, ou None em caso de falha.
75
  """
76
  t0 = time.time()
77
  logging.info(f"Orchestrator starting new job for prompt: '{prompt.splitlines()[0]}...'")
78
 
79
  try:
80
  # =================================================================
81
- # --- ETAPA 1: PREPARAÇÃO DOS INPUTS E CONFIGURAÇÕES ---
82
  # =================================================================
 
83
  prompt_list = [line.strip() for line in prompt.splitlines() if line.strip()]
84
  if not prompt_list:
85
  raise ValueError("O prompt está vazio ou não contém linhas válidas.")
86
 
 
87
  initial_conditioning_items = []
88
  if initial_image:
89
- logging.info("Orchestrator delegating: create conditioning item.")
90
- conditioning_params = [(0, 1.0)] # (frame_number, strength)
91
- initial_conditioning_items = ltx_aduc_pipeline.encode_to_conditioning_items(
92
- media_list=[initial_image],
93
- params=conditioning_params,
94
- resolution=(height, width)
 
 
 
95
  )
96
-
97
- common_ltx_args = self.base_config.get("first_pass", {}).copy()
98
- common_ltx_args.update({
99
- 'negative_prompt': "blurry, low quality, bad anatomy, deformed",
100
- 'height': height,
101
- 'width': width
102
- })
103
- if ltx_configs:
104
- common_ltx_args.update(ltx_configs)
105
 
106
  # =================================================================
107
- # --- ETAPA 2: DELEGAR GERAÇÃO DO VÍDEO LATENTE ---
108
  # =================================================================
109
- logging.info("Orchestrator delegating: generate latent video.")
110
- final_latents, used_seed = ltx_aduc_pipeline.generate_latents(
 
111
  prompt_list=prompt_list,
 
 
 
112
  duration_in_seconds=duration_in_seconds,
113
- common_ltx_args=common_ltx_args,
114
- initial_conditioning_items=initial_conditioning_items
115
  )
 
116
  if final_latents is None:
117
  raise RuntimeError("LTX client failed to generate a latent tensor.")
118
- logging.info(f"Orchestrator received latent tensor with shape: {final_latents.shape}")
119
 
120
  # =================================================================
121
- # --- ETAPA 3: DELEGAR DECODIFICAÇÃO PARA PIXELS ---
122
  # =================================================================
123
- logging.info("Orchestrator delegating: decode latent to pixels.")
124
- pixel_tensor = ltx_aduc_pipeline.decode_to_pixels(final_latents)
 
 
 
 
 
125
  if pixel_tensor is None:
126
- raise RuntimeError("LTX client failed to decode the latent tensor.")
127
- logging.info(f"Orchestrator received pixel tensor with shape: {pixel_tensor.shape}")
128
 
129
  # =================================================================
130
- # --- ETAPA 4: TAREFA FINAL - CODIFICAR PARA MP4 ---
131
  # =================================================================
132
  video_filename = f"{output_filename_base}_{int(time.time())}_{used_seed}.mp4"
133
  output_path = f"{self.output_dir}/{video_filename}"
134
 
135
- logging.info(f"Orchestrator executing final task: saving tensor to MP4 at {output_path}")
 
136
  video_encode_tool_singleton.save_video_from_tensor(
137
  pixel_5d=pixel_tensor,
138
  path=output_path,
@@ -150,5 +142,10 @@ class LtxAducOrchestrator:
150
 
151
  # ==============================================================================
152
  # --- INSTÂNCIA SINGLETON DO ORQUESTRADOR ---
 
153
  # ==============================================================================
154
- ltx_aduc_orchestrator = LtxAducOrchestrator()
 
 
 
 
 
1
  # FILE: api/ltx_aduc_orchestrator.py
2
  # DESCRIPTION: The main workflow orchestrator for the ADUC-SDR LTX suite.
3
+ # It acts as the primary entry point for the UI, coordinating the specialized
4
+ # LTX and VAE clients to execute a complete video generation pipeline from prompt to MP4.
5
 
6
  import logging
7
  import time
 
 
 
8
  from PIL import Image
9
+ from typing import Optional, Dict
10
 
11
+ # O Orquestrador importa os CLIENTES especialistas que ele vai coordenar.
12
+ # Estes clientes são responsáveis por submeter os trabalhos ao pool de workers.
13
  from api.ltx.ltx_aduc_pipeline import ltx_aduc_pipeline
14
+ from api.ltx.vae_aduc_pipeline import vae_aduc_pipeline
15
 
16
  # O Orquestrador importa as FERRAMENTAS de que precisa para as tarefas finais.
17
  from tools.video_encode_tool import video_encode_tool_singleton
18
 
 
 
 
 
19
  # ==============================================================================
20
  # --- A CLASSE ORQUESTRADORA (Cérebro do Workflow) ---
21
  # ==============================================================================
 
23
  class LtxAducOrchestrator:
24
  """
25
  Orquestra o fluxo de trabalho completo de geração de vídeo,
26
+ coordenando os clientes LTX e VAE. É o ponto de entrada principal para a UI.
27
  """
28
  def __init__(self):
29
  """
30
+ Inicializa o orquestrador. A inicialização é leve, pois os modelos
31
+ pesados são gerenciados pelo LTXAducManager em segundo plano.
32
  """
33
+ self.output_dir = "/app/output" # Diretório padrão para salvar os vídeos
 
34
  logging.info("✅ LTX ADUC Orchestrator initialized and ready.")
35
 
 
 
 
 
 
 
 
 
 
 
36
  def __call__(
37
  self,
38
  prompt: str,
 
44
  output_filename_base: str = "ltx_aduc_video"
45
  ) -> Optional[str]:
46
  """
47
+ Ponto de entrada principal do Orquestrador. Executa o pipeline completo.
48
 
49
  Args:
50
+ prompt (str): O prompt de texto completo. Cada nova linha é tratada como uma cena.
51
+ initial_image (Optional[Image.Image]): Uma imagem PIL para condicionar a primeira cena.
52
  height (int): Altura do vídeo final.
53
  width (int): Largura do vídeo final.
54
  duration_in_seconds (float): Duração total desejada do vídeo.
55
+ ltx_configs (Optional[Dict]): Configurações avançadas para a geração LTX (steps, guidance, etc.).
56
+ output_filename_base (str): O nome base para o arquivo de vídeo final.
57
 
58
  Returns:
59
+ Optional[str]: O caminho do arquivo de vídeo .mp4 gerado, ou None em caso de falha.
60
  """
61
  t0 = time.time()
62
  logging.info(f"Orchestrator starting new job for prompt: '{prompt.splitlines()[0]}...'")
63
 
64
  try:
65
  # =================================================================
66
+ # --- ETAPA 1: PREPARAÇÃO DO INPUT ---
67
  # =================================================================
68
+ # Converte a string do prompt em uma lista de cenas.
69
  prompt_list = [line.strip() for line in prompt.splitlines() if line.strip()]
70
  if not prompt_list:
71
  raise ValueError("O prompt está vazio ou não contém linhas válidas.")
72
 
73
+ # Prepara o item de condicionamento inicial, se uma imagem for fornecida.
74
  initial_conditioning_items = []
75
  if initial_image:
76
+ logging.info("Preparing initial conditioning item via VAE client...")
77
+ # Define os parâmetros: aplicar no frame 0 com força total (1.0).
78
+ conditioning_params = [(0, 1.0)]
79
+ # Chama o cliente VAE para fazer o trabalho pesado de conversão de imagem para LatentConditioningItem.
80
+ initial_conditioning_items = vae_aduc_pipeline(
81
+ media=[initial_image],
82
+ task='create_conditioning_items',
83
+ target_resolution=(height, width),
84
+ conditioning_params=conditioning_params
85
  )
86
+ logging.info(f"Successfully created {len(initial_conditioning_items)} conditioning item(s).")
 
 
 
 
 
 
 
 
87
 
88
  # =================================================================
89
+ # --- ETAPA 2: GERAÇÃO DO VÍDEO LATENTE ---
90
  # =================================================================
91
+ logging.info("Submitting job to LTX client for latent video generation...")
92
+ # Chama o cliente LTX para gerar o tensor latente completo.
93
+ final_latents, used_seed = ltx_aduc_pipeline(
94
  prompt_list=prompt_list,
95
+ initial_conditioning_items=initial_conditioning_items,
96
+ height=height,
97
+ width=width,
98
  duration_in_seconds=duration_in_seconds,
99
+ ltx_configs=ltx_configs
 
100
  )
101
+
102
  if final_latents is None:
103
  raise RuntimeError("LTX client failed to generate a latent tensor.")
104
+ logging.info(f"LTX client returned latent tensor with shape: {final_latents.shape}")
105
 
106
  # =================================================================
107
+ # --- ETAPA 3: DECODIFICAÇÃO DO LATENTE PARA PIXELS ---
108
  # =================================================================
109
+ logging.info("Submitting job to VAE client for latent-to-pixel decoding...")
110
+ # Chama o cliente VAE para converter o resultado em um vídeo visível (tensor de pixels).
111
+ pixel_tensor = vae_aduc_pipeline(
112
+ media=final_latents,
113
+ task='decode'
114
+ )
115
+
116
  if pixel_tensor is None:
117
+ raise RuntimeError("VAE client failed to decode the latent tensor.")
118
+ logging.info(f"VAE client returned pixel tensor with shape: {pixel_tensor.shape}")
119
 
120
  # =================================================================
121
+ # --- ETAPA 4: CODIFICAÇÃO PARA ARQUIVO DE VÍDEO MP4 ---
122
  # =================================================================
123
  video_filename = f"{output_filename_base}_{int(time.time())}_{used_seed}.mp4"
124
  output_path = f"{self.output_dir}/{video_filename}"
125
 
126
+ logging.info(f"Submitting job to VideoEncodeTool to save final MP4 to: {output_path}")
127
+ # Usa a ferramenta de vídeo para salvar o tensor de pixels no arquivo final.
128
  video_encode_tool_singleton.save_video_from_tensor(
129
  pixel_5d=pixel_tensor,
130
  path=output_path,
 
142
 
143
  # ==============================================================================
144
  # --- INSTÂNCIA SINGLETON DO ORQUESTRADOR ---
145
+ # Este é o ponto de entrada principal que a UI (app.py) irá chamar.
146
  # ==============================================================================
147
+ try:
148
+ ltx_aduc_orchestrator = LtxAducOrchestrator()
149
+ except Exception as e:
150
+ logging.critical("CRITICAL: Failed to initialize the LtxAducOrchestrator.", exc_info=True)
151
+ ltx_aduc_orchestrator = None
api/ltx/ltx_aduc_pipeline.py CHANGED
@@ -1,130 +1,142 @@
1
  # FILE: api/ltx/ltx_aduc_pipeline.py
2
- # DESCRIPTION: A unified high-level client for submitting ALL LTX-related jobs (generation and VAE)
3
- # to the LTXAducManager pool.
 
4
 
5
  import logging
6
  import time
7
  import torch
8
  import random
9
- from typing import List, Optional, Tuple, Dict
10
- from PIL import Image
11
- from dataclasses import dataclass
12
- from pathlib import Path
13
- import sys
14
 
15
- from api.ltx.ltx_utils import load_image_to_tensor_with_resize_and_crop # Importa o helper de ltx_utils
16
-
17
- # O cliente importa o MANAGER para submeter todos os trabalhos.
18
  from api.ltx.ltx_aduc_manager import ltx_aduc_manager
19
 
20
- # Adiciona o path do LTX-Video para importações de baixo nível e tipos.
21
- LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
22
- def add_deps_to_path():
23
- repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
24
- if repo_path not in sys.path:
25
- sys.path.insert(0, repo_path)
26
- add_deps_to_path()
27
-
28
- from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
29
- from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
30
-
31
 
32
- # ==============================================================================
33
- # --- DEFINIÇÕES DE ESTRUTURA ---
34
- # ==============================================================================
35
 
36
- @dataclass
37
- class LatentConditioningItem:
38
- """Estrutura de dados para passar latentes condicionados ao job de geração."""
39
- latent_tensor: torch.Tensor
40
- media_frame_number: int
41
- conditioning_strength: float
 
42
 
43
  # ==============================================================================
44
  # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
45
  # ==============================================================================
46
 
47
- def _job_encode_media(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, pixel_tensor: torch.Tensor) -> torch.Tensor:
48
- """Job que usa o VAE do pipeline para codificar um tensor de pixel."""
49
- vae = pipeline.vae
50
- pixel_tensor_gpu = pixel_tensor.to(vae.device, dtype=vae.dtype)
51
- latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
52
- return latents.cpu()
53
-
54
- def _job_decode_latent(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, latent_tensor: torch.Tensor) -> torch.Tensor:
55
- """Job que usa o VAE do pipeline para decodificar um tensor latente."""
56
- vae = pipeline.vae
57
- latent_tensor_gpu = latent_tensor.to(vae.device, dtype=vae.dtype)
58
- pixels = vae_decode(latent_tensor_gpu, vae, is_video=True, vae_per_channel_normalize=True)
59
- return pixels.cpu()
60
-
61
- def _job_generate_latent_chunk(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, **kwargs) -> torch.Tensor:
62
- """Job que usa o pipeline principal para gerar um chunk de vídeo latente."""
63
- generator = torch.Generator(device=pipeline.device).manual_seed(kwargs['seed'])
64
- pipeline_kwargs = {"generator": generator, "output_type": "latent", **kwargs}
65
 
66
- with torch.autocast(device_type=pipeline.device.type, dtype=autocast_dtype):
67
- latents_raw = pipeline(**pipeline_kwargs).images
68
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  return latents_raw.cpu()
70
 
 
71
  # ==============================================================================
72
- # --- A CLASSE CLIENTE UNIFICADA ---
73
  # ==============================================================================
74
 
75
  class LtxAducPipeline:
76
  """
77
- Cliente unificado para orquestrar todas as tarefas LTX, incluindo geração e VAE.
 
78
  """
79
  def __init__(self):
80
- logging.info("✅ Unified LTX/VAE ADUC Pipeline (Client) initialized.")
 
81
  self.FRAMES_ALIGNMENT = 8
 
82
 
83
  def _get_random_seed(self) -> int:
 
84
  return random.randint(0, 2**32 - 1)
85
 
86
  def _align(self, dim: int, alignment: int = 8) -> int:
 
87
  return ((dim + alignment - 1) // alignment) * alignment
88
 
89
- # --- Métodos de API para o Orquestrador ---
90
-
91
- def encode_to_conditioning_items(self, media_list: List, params: List, resolution: Tuple[int, int]) -> List[LatentConditioningItem]:
92
- """Converte uma lista de imagens em uma lista de LatentConditioningItem."""
93
- pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, resolution[0], resolution[1]) for m in media_list]
94
- items = []
95
- for i, pt in enumerate(pixel_tensors):
96
- latent_tensor = ltx_aduc_manager.submit_job(_job_encode_media, pixel_tensor=pt)
97
- frame_number, strength = params[i]
98
- items.append(LatentConditioningItem(
99
- latent_tensor=latent_tensor,
100
- media_frame_number=frame_number,
101
- conditioning_strength=strength
102
- ))
103
- return items
104
-
105
- def decode_to_pixels(self, latent_tensor: torch.Tensor) -> torch.Tensor:
106
- """Decodifica um tensor latente em um tensor de pixels."""
107
- return ltx_aduc_manager.submit_job(_job_decode_latent, latent_tensor=latent_tensor)
108
-
109
- def generate_latents(
110
  self,
111
  prompt_list: List[str],
112
- duration_in_seconds: float,
113
- common_ltx_args: Dict,
114
- initial_conditioning_items: Optional[List[LatentConditioningItem]] = None
 
 
115
  ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
116
- """Gera um vídeo latente completo a partir de uma lista de prompts."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  t0 = time.time()
118
- logging.info(f"LTX Client received a generation job for {len(prompt_list)} scenes.")
 
 
 
 
119
  used_seed = self._get_random_seed()
 
120
 
 
121
  num_chunks = len(prompt_list)
122
  total_frames = self._align(int(duration_in_seconds * 24))
123
- frames_per_chunk_base = total_frames // num_chunks if num_chunks > 0 else total_frames
124
  overlap_frames = self._align(9) if num_chunks > 1 else 0
125
 
126
  final_latents_list = []
127
- overlap_condition_item = None
128
 
129
  for i, chunk_prompt in enumerate(prompt_list):
130
  current_conditions = []
@@ -133,43 +145,58 @@ class LtxAducPipeline:
133
  if overlap_condition_item:
134
  current_conditions.append(overlap_condition_item)
135
 
 
136
  num_frames_for_chunk = frames_per_chunk_base
137
- if i == num_chunks - 1:
138
  processed_frames = sum(f.shape[2] for f in final_latents_list)
139
  num_frames_for_chunk = total_frames - processed_frames
140
- num_frames_for_chunk = self._align(num_frames_for_chunk)
141
- if num_frames_for_chunk <= 0: continue
142
-
143
- job_specific_args = {
144
- "prompt": chunk_prompt,
145
- "num_frames": num_frames_for_chunk,
146
- "seed": used_seed + i,
147
- "conditioning_items": current_conditions
148
- }
149
- final_job_args = {**common_ltx_args, **job_specific_args}
150
 
151
- chunk_latents = ltx_aduc_manager.submit_job(_job_generate_latent_chunk, **final_job_args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
  if chunk_latents is None:
154
- logging.error(f"Failed to generate latents for scene {i+1}. Aborting.")
155
  return None, used_seed
156
 
 
157
  if i < num_chunks - 1:
 
158
  overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
159
  overlap_condition_item = LatentConditioningItem(
160
- latent_tensor=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
 
 
 
 
161
  final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
162
  else:
 
163
  final_latents_list.append(chunk_latents)
164
 
165
- if not final_latents_list:
166
- logging.warning("No latent chunks were generated.")
167
- return None, used_seed
168
-
169
  final_latents = torch.cat(final_latents_list, dim=2)
 
170
  logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
171
 
172
  return final_latents, used_seed
173
 
174
  # --- INSTÂNCIA SINGLETON DO CLIENTE ---
175
- ltx_aduc_pipeline = LtxAducPipeline()
 
 
 
 
 
1
  # FILE: api/ltx/ltx_aduc_pipeline.py
2
+ # DESCRIPTION: A high-level client for submitting LTX video generation jobs to the pool manager.
3
+ # Its sole responsibility is to orchestrate the generation of a final LATENT tensor from prompts
4
+ # and initial conditions, without handling pixel decoding.
5
 
6
  import logging
7
  import time
8
  import torch
9
  import random
10
+ import json
11
+ from typing import List, Optional, Tuple, Union, Dict
 
 
 
12
 
13
+ # O cliente importa o MANAGER para submeter trabalhos
 
 
14
  from api.ltx.ltx_aduc_manager import ltx_aduc_manager
15
 
16
+ # O cliente precisa da definição de LatentConditioningItem para os seus inputs
17
+ from api.ltx.vae_aduc_pipeline import LatentConditioningItem
 
 
 
 
 
 
 
 
 
18
 
 
 
 
19
 
20
+ DEPS_DIR = Path("/data")
21
+ LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
22
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
23
+ if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
24
+ sys.path.insert(0, repo_path)
25
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
26
+ from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
27
 
28
  # ==============================================================================
29
  # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
30
  # ==============================================================================
31
 
32
+ def _job_generate_latent_chunk(
33
+ pipeline: LTXVideoPipeline,
34
+ prompt: str,
35
+ negative_prompt: str,
36
+ height: int,
37
+ width: int,
38
+ num_frames: int,
39
+ seed: int,
40
+ conditioning_items: Optional[List[LatentConditioningItem]],
41
+ ltx_configs: Dict
42
+ ) -> torch.Tensor:
43
+ """
44
+ Função de trabalho que executa a geração de um único chunk (cena) de vídeo latente.
45
+ Esta função é executada DENTRO de um LTXMainWorker.
46
+ """
47
+ generator = torch.Generator(device=pipeline.device).manual_seed(seed)
 
 
48
 
49
+ # Monta os argumentos para a chamada do pipeline
50
+ pipeline_kwargs = {
51
+ "prompt": prompt,
52
+ "negative_prompt": negative_prompt,
53
+ "height": height,
54
+ "width": width,
55
+ "num_frames": num_frames,
56
+ "frame_rate": 24, # Padrão, pode ser parametrizado se necessário
57
+ "generator": generator,
58
+ "output_type": "latent", # Ponto chave: sempre pedimos latentes
59
+ "conditioning_items": conditioning_items if conditioning_items else None,
60
+ **ltx_configs # Aplica configurações avançadas (guidance, steps, etc.)
61
+ }
62
+
63
+ logging.info(f"[LTX Job] Gerando chunk com {num_frames} frames para o prompt: '{prompt[:50]}...'")
64
+
65
+ # O pipeline já está na GPU correta dentro do worker
66
+ with torch.autocast(device_type=pipeline.device.type, dtype=torch.bfloat16):
67
+ latents_raw = pipeline(**pipeline_kwargs).images
68
+
69
+ # Retorna o tensor latente na CPU para liberar VRAM do worker para o próximo job
70
  return latents_raw.cpu()
71
 
72
+
73
  # ==============================================================================
74
+ # --- A CLASSE CLIENTE (Interface Pública para Geração de Vídeo Latente) ---
75
  # ==============================================================================
76
 
77
  class LtxAducPipeline:
78
  """
79
+ Cliente de alto nível para orquestrar a geração de vídeo latente.
80
+ Submete trabalhos de geração de chunks de vídeo ao LTXAducManager.
81
  """
82
  def __init__(self):
83
+ logging.info("✅ LTX ADUC Pipeline (Client) initialized and ready to submit jobs.")
84
+ # O __init__ é limpo, sem carregar modelos.
85
  self.FRAMES_ALIGNMENT = 8
86
+ pass
87
 
88
  def _get_random_seed(self) -> int:
89
+ """Sempre gera e retorna uma nova semente aleatória."""
90
  return random.randint(0, 2**32 - 1)
91
 
92
  def _align(self, dim: int, alignment: int = 8) -> int:
93
+ """Alinha uma dimensão para o múltiplo mais próximo."""
94
  return ((dim + alignment - 1) // alignment) * alignment
95
 
96
+ def __call__(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  self,
98
  prompt_list: List[str],
99
+ initial_conditioning_items: Optional[List[LatentConditioningItem]] = None,
100
+ height: int = 432,
101
+ width: int = 768,
102
+ duration_in_seconds: float = 4.0,
103
+ ltx_configs: Optional[Dict] = None
104
  ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
105
+ """
106
+ Ponto de entrada principal para gerar um vídeo latente completo.
107
+
108
+ Args:
109
+ prompt_list: Lista de prompts, onde cada prompt é uma cena.
110
+ initial_conditioning_items: Lista de `LatentConditioningItem` para condicionar
111
+ a primeira cena.
112
+ height: Altura do vídeo.
113
+ width: Largura do vídeo.
114
+ duration_in_seconds: Duração total desejada do vídeo.
115
+ ltx_configs: Dicionário com configurações avançadas para o pipeline LTX
116
+ (guidance_scale, num_inference_steps, etc.).
117
+
118
+ Returns:
119
+ Uma tupla contendo:
120
+ - O tensor latente final completo (na CPU).
121
+ - A semente principal usada para a geração.
122
+ """
123
  t0 = time.time()
124
+ logging.info(f"LTX Client received a generation job with {len(prompt_list)} scenes.")
125
+
126
+ if not prompt_list:
127
+ raise ValueError("A lista de prompts não pode estar vazia.")
128
+
129
  used_seed = self._get_random_seed()
130
+ logging.info(f"Generation seed set to: {used_seed}")
131
 
132
+ # --- Lógica de Divisão de Chunks e Sobreposição ---
133
  num_chunks = len(prompt_list)
134
  total_frames = self._align(int(duration_in_seconds * 24))
135
+ frames_per_chunk_base = total_frames // num_chunks
136
  overlap_frames = self._align(9) if num_chunks > 1 else 0
137
 
138
  final_latents_list = []
139
+ overlap_condition_item: Optional[LatentConditioningItem] = None
140
 
141
  for i, chunk_prompt in enumerate(prompt_list):
142
  current_conditions = []
 
145
  if overlap_condition_item:
146
  current_conditions.append(overlap_condition_item)
147
 
148
+ # Calcula o número de frames para o chunk atual
149
  num_frames_for_chunk = frames_per_chunk_base
150
+ if i == num_chunks - 1: # Último chunk pega o resto
151
  processed_frames = sum(f.shape[2] for f in final_latents_list)
152
  num_frames_for_chunk = total_frames - processed_frames
 
 
 
 
 
 
 
 
 
 
153
 
154
+ num_frames_for_chunk = self._align(num_frames_for_chunk)
155
+
156
+ # --- Submissão do Job para o Chunk Atual ---
157
+ chunk_latents = ltx_aduc_manager.submit_job(
158
+ job_type='ltx',
159
+ job_func=_job_generate_latent_chunk,
160
+ # Passa todos os argumentos necessários para a função de trabalho
161
+ prompt=chunk_prompt,
162
+ negative_prompt="blurry, low quality, bad anatomy, deformed", # Pode ser parametrizado
163
+ height=height,
164
+ width=width,
165
+ num_frames=num_frames_for_chunk,
166
+ seed=used_seed + i, # Semente diferente para cada chunk para variedade
167
+ conditioning_items=current_conditions,
168
+ ltx_configs=ltx_configs or {}
169
+ )
170
 
171
  if chunk_latents is None:
172
+ logging.error(f"Failed to generate latents for scene {i+1}. Aborting generation.")
173
  return None, used_seed
174
 
175
+ # --- Gerenciamento do "Eco Cinético" (Sobreposição) ---
176
  if i < num_chunks - 1:
177
+ # Salva os últimos frames do chunk atual para condicionar o próximo
178
  overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
179
  overlap_condition_item = LatentConditioningItem(
180
+ latent_tensor=overlap_latents,
181
+ media_frame_number=0, # Sempre condiciona o início do próximo chunk
182
+ conditioning_strength=1.0 # Condicionamento forte
183
+ )
184
+ # Adiciona o chunk atual sem a sobreposição
185
  final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
186
  else:
187
+ # Adiciona o último chunk completo
188
  final_latents_list.append(chunk_latents)
189
 
190
+ # Concatena todos os chunks de latentes em um único tensor
 
 
 
191
  final_latents = torch.cat(final_latents_list, dim=2)
192
+
193
  logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
194
 
195
  return final_latents, used_seed
196
 
197
  # --- INSTÂNCIA SINGLETON DO CLIENTE ---
198
+ try:
199
+ ltx_aduc_pipeline = LtxAducPipeline()
200
+ except Exception as e:
201
+ logging.critical("CRITICAL: Failed to initialize the LtxAducPipeline client.", exc_info=True)
202
+ ltx_aduc_pipeline = None
api/ltx/ltx_utils.py CHANGED
@@ -1,263 +1,165 @@
1
  # FILE: api/ltx/ltx_utils.py
2
- # DESCRIPTION: A pure utility library for the LTX ecosystem.
3
- # Contains the official low-level builder function for the complete pipeline
4
- # and other stateless helper functions.
5
 
6
  import os
7
  import random
8
  import json
9
  import logging
 
10
  import sys
11
  from pathlib import Path
12
- from typing import Dict, Tuple, Union
13
- import torchvision.transforms.functional as TVF
14
- from PIL import Image
15
 
 
16
  import torch
 
 
17
  from safetensors import safe_open
18
  from transformers import T5EncoderModel, T5Tokenizer
19
 
20
  # ==============================================================================
21
- # --- CONFIGURAÇÃO DE PATH E IMPORTS DA BIBLIOTECA LTX ---
22
  # ==============================================================================
23
 
 
24
  LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 
 
 
25
 
26
  def add_deps_to_path():
27
- """Adiciona o diretório do repositório LTX ao sys.path para importação de suas bibliotecas."""
 
 
 
28
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
29
  if repo_path not in sys.path:
30
  sys.path.insert(0, repo_path)
31
  logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
32
 
 
33
  add_deps_to_path()
34
 
 
 
 
 
35
  try:
36
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
 
37
  from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
38
  from ltx_video.models.transformers.transformer3d import Transformer3DModel
39
  from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
40
  from ltx_video.schedulers.rf import RectifiedFlowScheduler
 
41
  except ImportError as e:
42
- logging.critical("Failed to import a core LTX-Video library component.", exc_info=True)
43
- raise ImportError(f"Could not import from LTX-Video library. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
44
 
45
  # ==============================================================================
46
- # --- FUNÇÃO HELPER 'create_transformer' (Essencial) ---
47
  # ==============================================================================
48
 
49
- def create_transformer(ckpt_path: str, precision: str) -> Transformer3DModel:
50
- """
51
- Cria e carrega o modelo Transformer3D com a lógica de precisão correta,
52
- incluindo suporte para a otimização float8_e4m3fn.
53
- """
54
- if precision == "float8_e4m3fn":
55
- try:
56
- from q8_kernels.integration.patch_transformer import patch_diffusers_transformer as patch_transformer_for_q8_kernels
57
- transformer = Transformer3DModel.from_pretrained(ckpt_path, dtype=torch.float8_e4m3fn)
58
- patch_transformer_for_q8_kernels(transformer)
59
- return transformer
60
- except ImportError:
61
- raise ValueError("Q8-Kernels not found. To use FP8 checkpoint, please install Q8 kernels from the project's wheels.")
62
- elif precision == "bfloat16":
63
- return Transformer3DModel.from_pretrained(ckpt_path).to(torch.bfloat16)
64
- else:
65
- return Transformer3DModel.from_pretrained(ckpt_path)
66
 
67
- # ==============================================================================
68
- # --- BUILDER DE BAIXO NÍVEL OFICIAL ---
69
- # ==============================================================================
 
70
 
71
- def build_complete_pipeline_on_cpu(checkpoint_path: str, config: Dict) -> LTXVideoPipeline:
72
- """
73
- Constrói o pipeline LTX COMPLETO, incluindo o VAE, e o mantém na CPU.
74
- Esta é a função de construção fundamental usada pelo LTXAducManager.
75
- """
76
- logging.info(f"Building complete LTX pipeline from checkpoint: {Path(checkpoint_path).name}")
 
 
77
 
78
- with safe_open(checkpoint_path, framework="pt") as f:
 
79
  metadata = f.metadata() or {}
80
  config_str = metadata.get("config", "{}")
81
- allowed_inference_steps = json.loads(config_str).get("allowed_inference_steps")
 
82
 
83
- precision = config.get("precision", "bfloat16")
 
 
 
84
 
85
- # Usa a função helper correta para criar o transformer
86
- transformer = create_transformer(checkpoint_path, precision).to("cpu")
87
-
88
- scheduler = RectifiedFlowScheduler.from_pretrained(checkpoint_path)
89
- text_encoder = T5EncoderModel.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="text_encoder").to("cpu")
90
- tokenizer = T5Tokenizer.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="tokenizer")
91
  patchifier = SymmetricPatchifier(patch_size=1)
92
- vae = CausalVideoAutoencoder.from_pretrained(checkpoint_path).to("cpu")
93
 
 
94
  if precision == "bfloat16":
95
- text_encoder.to(torch.bfloat16)
96
  vae.to(torch.bfloat16)
97
- # O transformer já foi convertido para bfloat16 dentro de create_transformer, se aplicável
98
-
 
99
  pipeline = LTXVideoPipeline(
100
- transformer=transformer,
101
- patchifier=patchifier,
102
- text_encoder=text_encoder,
103
- tokenizer=tokenizer,
104
- scheduler=scheduler,
105
- vae=vae, # VAE é incluído para que o pipeline possa ser auto-suficiente
106
  allowed_inference_steps=allowed_inference_steps,
107
- prompt_enhancer_image_caption_model=None,
108
- prompt_enhancer_image_caption_processor=None,
109
- prompt_enhancer_llm_model=None,
110
- prompt_enhancer_llm_tokenizer=None,
111
  )
112
-
113
- return pipeline
114
 
115
- # ==============================================================================
116
- # --- FUNÇÕES AUXILIARES GENÉRICAS ---
117
- # ==============================================================================
118
-
119
-
120
- # # FILE: api/ltx/ltx_utils.py
121
- # DESCRIPTION: A pure utility library for the LTX ecosystem.
122
- # Contains the official low-level builder function for the complete pipeline
123
- # and other stateless helper functions.
124
-
125
- import os
126
- import random
127
- import json
128
- import logging
129
- import sys
130
- from pathlib import Path
131
- from typing import Dict, Tuple
132
-
133
- import torch
134
- from safetensors import safe_open
135
- from transformers import T5EncoderModel, T5Tokenizer
136
-
137
- # ==============================================================================
138
- # --- CONFIGURAÇÃO DE PATH E IMPORTS DA BIBLIOTECA LTX ---
139
- # ==============================================================================
140
-
141
- LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
142
-
143
- def add_deps_to_path():
144
- """Adiciona o diretório do repositório LTX ao sys.path para importação de suas bibliotecas."""
145
- repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
146
- if repo_path not in sys.path:
147
- sys.path.insert(0, repo_path)
148
- logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
149
-
150
- add_deps_to_path()
151
-
152
- try:
153
- from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
154
- from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
155
- from ltx_video.models.transformers.transformer3d import Transformer3DModel
156
- from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
157
- from ltx_video.schedulers.rf import RectifiedFlowScheduler
158
- except ImportError as e:
159
- logging.critical("Failed to import a core LTX-Video library component.", exc_info=True)
160
- raise ImportError(f"Could not import from LTX-Video library. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
161
-
162
- # ==============================================================================
163
- # --- FUNÇÃO HELPER 'create_transformer' (Essencial) ---
164
- # ==============================================================================
165
-
166
- def create_transformer(ckpt_path: str, precision: str) -> Transformer3DModel:
167
- """
168
- Cria e carrega o modelo Transformer3D com a lógica de precisão correta,
169
- incluindo suporte para a otimização float8_e4m3fn.
170
- """
171
- if precision == "float8_e4m3fn":
172
- try:
173
- from q8_kernels.integration.patch_transformer import patch_diffusers_transformer as patch_transformer_for_q8_kernels
174
- transformer = Transformer3DModel.from_pretrained(ckpt_path, dtype=torch.float8_e4m3fn)
175
- patch_transformer_for_q8_kernels(transformer)
176
- return transformer
177
- except ImportError:
178
- raise ValueError("Q8-Kernels not found. To use FP8 checkpoint, please install Q8 kernels from the project's wheels.")
179
- elif precision == "bfloat16":
180
- return Transformer3DModel.from_pretrained(ckpt_path).to(torch.bfloat16)
181
- else:
182
- return Transformer3DModel.from_pretrained(ckpt_path)
183
-
184
- # ==============================================================================
185
- # --- BUILDER DE BAIXO NÍVEL OFICIAL ---
186
- # ==============================================================================
187
-
188
- def build_complete_pipeline_on_cpu(checkpoint_path: str, config: Dict) -> LTXVideoPipeline:
189
- """
190
- Constrói o pipeline LTX COMPLETO, incluindo o VAE, e o mantém na CPU.
191
- Esta é a função de construção fundamental usada pelo LTXAducManager.
192
- """
193
- logging.info(f"Building complete LTX pipeline from checkpoint: {Path(checkpoint_path).name}")
194
-
195
- with safe_open(checkpoint_path, framework="pt") as f:
196
- metadata = f.metadata() or {}
197
- config_str = metadata.get("config", "{}")
198
- allowed_inference_steps = json.loads(config_str).get("allowed_inference_steps")
199
-
200
- precision = config.get("precision", "bfloat16")
201
-
202
- # Usa a função helper correta para criar o transformer
203
- transformer = create_transformer(checkpoint_path, precision).to("cpu")
204
-
205
- scheduler = RectifiedFlowScheduler.from_pretrained(checkpoint_path)
206
- text_encoder = T5EncoderModel.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="text_encoder").to("cpu")
207
- tokenizer = T5Tokenizer.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="tokenizer")
208
- patchifier = SymmetricPatchifier(patch_size=1)
209
- vae = CausalVideoAutoencoder.from_pretrained(checkpoint_path).to("cpu")
210
 
 
211
  if precision == "bfloat16":
212
- text_encoder.to(torch.bfloat16)
213
  vae.to(torch.bfloat16)
214
- # O transformer já foi convertido para bfloat16 dentro de create_transformer, se aplicável
215
 
216
- pipeline = LTXVideoPipeline(
217
- transformer=transformer,
218
- patchifier=patchifier,
219
- text_encoder=text_encoder,
220
- tokenizer=tokenizer,
221
- scheduler=scheduler,
222
- vae=vae, # VAE é incluído para que o pipeline possa ser auto-suficiente
223
- allowed_inference_steps=allowed_inference_steps,
224
- prompt_enhancer_image_caption_model=None,
225
- prompt_enhancer_image_caption_processor=None,
226
- prompt_enhancer_llm_model=None,
227
- prompt_enhancer_llm_tokenizer=None,
228
- )
229
-
230
- return pipeline
231
 
232
  # ==============================================================================
233
- # --- FUNÇÕES AUXILIARES GENÉRICAS ---
234
  # ==============================================================================
235
 
236
  def seed_everything(seed: int):
237
- """
238
- Define a semente para PyTorch, NumPy e Python para garantir reprodutibilidade.
239
- """
240
  random.seed(seed)
241
  os.environ['PYTHONHASHSEED'] = str(seed)
242
  np.random.seed(seed)
243
  torch.manual_seed(seed)
244
  torch.cuda.manual_seed_all(seed)
245
  torch.backends.cudnn.deterministic = True
246
- torch.backends.cudnn.benchmark = Fals
247
-
248
  def load_image_to_tensor_with_resize_and_crop(
249
  image_input: Union[str, Image.Image],
250
  target_height: int,
251
  target_width: int,
252
  ) -> torch.Tensor:
253
- """
254
- Carrega, redimensiona, corta e processa uma imagem para um tensor de pixel 5D,
255
- normalizado para [-1, 1], pronto para ser enviado ao VAE para encoding.
256
- """
257
  if isinstance(image_input, str):
258
  image = Image.open(image_input).convert("RGB")
259
  elif isinstance(image_input, Image.Image):
260
- image = image_input.convert("RGB")
261
  else:
262
  raise ValueError("image_input must be a file path or a PIL Image object")
263
 
@@ -267,38 +169,22 @@ def load_image_to_tensor_with_resize_and_crop(
267
 
268
  if aspect_ratio_frame > aspect_ratio_target:
269
  new_width, new_height = int(input_height * aspect_ratio_target), input_height
270
- x_start = (input_width - new_width) // 2
271
- image = image.crop((x_start, 0, x_start + new_width, new_height))
272
  else:
273
- new_height = int(input_width / aspect_ratio_target)
274
- y_start = (input_height - new_height) // 2
275
- image = image.crop((0, y_start, input_width, y_start + new_height))
276
-
277
  image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
 
 
 
278
 
279
- frame_tensor = TVF.to_tensor(image)
280
-
281
- # Esta parte depende de 'crf_compressor', então precisamos importá-lo aqui também
282
- try:
283
- from ltx_video.pipelines import crf_compressor
284
- frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
285
- frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
286
- frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
287
- except ImportError:
288
- logging.warning("CRF Compressor not found. Skipping compression step.")
289
-
290
  frame_tensor = (frame_tensor * 2.0) - 1.0
291
- return frame_tensor.unsqueeze(0).unsqueeze(2)
292
-
293
-
294
- def seed_everything(seed: int):
295
- """
296
- Define a semente para PyTorch, NumPy e Python para garantir reprodutibilidade.
297
- """
298
- random.seed(seed)
299
- os.environ['PYTHONHASHSEED'] = str(seed)
300
- np.random.seed(seed)
301
- torch.manual_seed(seed)
302
- torch.cuda.manual_seed_all(seed)
303
- torch.backends.cudnn.deterministic = True
304
- torch.backends.cudnn.benchmark = False
 
1
  # FILE: api/ltx/ltx_utils.py
2
+ # DESCRIPTION: Comprehensive, self-contained utility module for the LTX pipeline.
3
+ # Handles dependency path injection, model loading, pipeline creation, and tensor preparation.
 
4
 
5
  import os
6
  import random
7
  import json
8
  import logging
9
+ import time
10
  import sys
11
  from pathlib import Path
12
+ from typing import Dict, Optional, Tuple, Union
13
+ from huggingface_hub import hf_hub_download
 
14
 
15
+ import numpy as np
16
  import torch
17
+ import torchvision.transforms.functional as TVF
18
+ from PIL import Image
19
  from safetensors import safe_open
20
  from transformers import T5EncoderModel, T5Tokenizer
21
 
22
  # ==============================================================================
23
+ # --- CRITICAL: DEPENDENCY PATH INJECTION ---
24
  # ==============================================================================
25
 
26
+ # Define o caminho para o repositório clonado
27
  LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
28
+ LTX_REPO_ID = "Lightricks/LTX-Video"
29
+ CACHE_DIR = os.environ.get("HF_HOME")
30
+
31
 
32
  def add_deps_to_path():
33
+ """
34
+ Adiciona o diretório do repositório LTX ao sys.path para garantir que suas
35
+ bibliotecas possam ser importadas.
36
+ """
37
  repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
38
  if repo_path not in sys.path:
39
  sys.path.insert(0, repo_path)
40
  logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
41
 
42
+ # Executa a função imediatamente para configurar o ambiente antes de qualquer importação.
43
  add_deps_to_path()
44
 
45
+
46
+ # ==============================================================================
47
+ # --- IMPORTAÇÕES DA BIBLIOTECA LTX-VIDEO (Após configuração do path) ---
48
+ # ==============================================================================
49
  try:
50
  from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
51
+ from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
52
  from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
53
  from ltx_video.models.transformers.transformer3d import Transformer3DModel
54
  from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
55
  from ltx_video.schedulers.rf import RectifiedFlowScheduler
56
+ import ltx_video.pipelines.crf_compressor as crf_compressor
57
  except ImportError as e:
58
+ raise ImportError(f"Could not import from LTX-Video library even after setting sys.path. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
59
+
60
 
61
  # ==============================================================================
62
+ # --- FUNÇÕES DE CONSTRUÇÃO DE MODELO E PIPELINE ---
63
  # ==============================================================================
64
 
65
+ def create_latent_upsampler(latent_upsampler_model_path: str, device: str) -> LatentUpsampler:
66
+ """Loads the Latent Upsampler model from a checkpoint path."""
67
+ logging.info(f"Loading Latent Upsampler from: {latent_upsampler_model_path} to device: {device}")
68
+ latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
69
+ latent_upsampler.to(device)
70
+ latent_upsampler.eval()
71
+ return latent_upsampler
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def build_ltx_pipeline_on_cpu(config: Dict) -> Tuple[LTXVideoPipeline, Optional[torch.nn.Module]]:
74
+ """Builds the complete LTX pipeline and upsampler on the CPU."""
75
+ t0 = time.perf_counter()
76
+ logging.info("Building LTX pipeline on CPU...")
77
 
78
+
79
+
80
+ ckpt_path_str = hf_hub_download(repo_id=LTX_REPO_ID, filename=config["checkpoint_path"], cache_dir=CACHE_DIR)
81
+ ckpt_path = Path(ckpt_path_str)
82
+ if not ckpt_path.is_file():
83
+ raise FileNotFoundError(f"Main checkpoint file not found: {ckpt_path}")
84
+
85
+ logging.info(f"Building LTX pipeline ckpt:{ckpt_path_str}")
86
 
87
+
88
+ with safe_open(ckpt_path, framework="pt") as f:
89
  metadata = f.metadata() or {}
90
  config_str = metadata.get("config", "{}")
91
+ configs = json.loads(config_str)
92
+ allowed_inference_steps = configs.get("allowed_inference_steps")
93
 
94
+
95
+ vae = CausalVideoAutoencoder.from_pretrained(ckpt_path).to("cpu")
96
+ transformer = Transformer3DModel.from_pretrained(ckpt_path).to("cpu")
97
+ scheduler = RectifiedFlowScheduler.from_pretrained(ckpt_path)
98
 
99
+ text_encoder_path = config["text_encoder_model_name_or_path"]
100
+ text_encoder = T5EncoderModel.from_pretrained(text_encoder_path, subfolder="text_encoder").to("cpu")
101
+ tokenizer = T5Tokenizer.from_pretrained(text_encoder_path, subfolder="tokenizer")
 
 
 
102
  patchifier = SymmetricPatchifier(patch_size=1)
 
103
 
104
+ precision = config.get("precision", "bfloat16")
105
  if precision == "bfloat16":
 
106
  vae.to(torch.bfloat16)
107
+ transformer.to(torch.bfloat16)
108
+ text_encoder.to(torch.bfloat16)
109
+
110
  pipeline = LTXVideoPipeline(
111
+ transformer=transformer, patchifier=patchifier, text_encoder=text_encoder,
112
+ tokenizer=tokenizer, scheduler=scheduler, vae=vae,
 
 
 
 
113
  allowed_inference_steps=allowed_inference_steps,
114
+ prompt_enhancer_image_caption_model=None, prompt_enhancer_image_caption_processor=None,
115
+ prompt_enhancer_llm_model=None, prompt_enhancer_llm_tokenizer=None,
 
 
116
  )
 
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ vae = CausalVideoAutoencoder.from_pretrained(ckpt_path).to("cpu")
120
  if precision == "bfloat16":
 
121
  vae.to(torch.bfloat16)
 
122
 
123
+ latent_upsampler = None
124
+ if config.get("spatial_upscaler_model_path"):
125
+ spatial_path = config["spatial_upscaler_model_path"]
126
+ spatial_path_str = hf_hub_download(repo_id=LTX_REPO_ID, filename=config["spatial_upscaler_model_path"], cache_dir=CACHE_DIR)
127
+ spatial_path = Path(spatial_path_str)
128
+ if not spatial_path.is_file():
129
+ raise FileNotFoundError(f"Main checkpoint upscaler file not found: {spatial_path_str}")
130
+ logging.info(f"Building UPSCALER pipeline ckpt:{spatial_path_str}")
131
+ latent_upsampler = create_latent_upsampler(spatial_path, device="cpu")
132
+ if precision == "bfloat16":
133
+ latent_upsampler.to(torch.bfloat16)
134
+
135
+ logging.info(f"LTX pipeline built on CPU in {time.perf_counter() - t0:.2f}s")
136
+ return pipeline, latent_upsampler, vae
137
+
138
 
139
  # ==============================================================================
140
+ # --- FUNÇÕES AUXILIARES (Seed, Preparação de Imagem) ---
141
  # ==============================================================================
142
 
143
  def seed_everything(seed: int):
144
+ """Sets the seed for reproducibility."""
 
 
145
  random.seed(seed)
146
  os.environ['PYTHONHASHSEED'] = str(seed)
147
  np.random.seed(seed)
148
  torch.manual_seed(seed)
149
  torch.cuda.manual_seed_all(seed)
150
  torch.backends.cudnn.deterministic = True
151
+ torch.backends.cudnn.benchmark = False
152
+
153
  def load_image_to_tensor_with_resize_and_crop(
154
  image_input: Union[str, Image.Image],
155
  target_height: int,
156
  target_width: int,
157
  ) -> torch.Tensor:
158
+ """Loads and processes an image into a 5D pixel tensor compatible with the LTX pipeline."""
 
 
 
159
  if isinstance(image_input, str):
160
  image = Image.open(image_input).convert("RGB")
161
  elif isinstance(image_input, Image.Image):
162
+ image = image_input
163
  else:
164
  raise ValueError("image_input must be a file path or a PIL Image object")
165
 
 
169
 
170
  if aspect_ratio_frame > aspect_ratio_target:
171
  new_width, new_height = int(input_height * aspect_ratio_target), input_height
172
+ x_start, y_start = (input_width - new_width) // 2, 0
 
173
  else:
174
+ new_width, new_height = input_width, int(input_width / aspect_ratio_target)
175
+ x_start, y_start = 0, (input_height - new_height) // 2
176
+
177
+ image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
178
  image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
179
+
180
+ frame_tensor = TVF.to_tensor(image) # PIL -> tensor (C, H, W) in [0, 1] range
181
+ frame_tensor = TVF.gaussian_blur(frame_tensor, kernel_size=(3, 3))
182
 
183
+ frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
184
+ frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
185
+ frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
186
+ # Normalize to [-1, 1] range, which the VAE expects for encoding
 
 
 
 
 
 
 
187
  frame_tensor = (frame_tensor * 2.0) - 1.0
188
+
189
+ # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
190
+ return frame_tensor.unsqueeze(0).unsqueeze(2)
 
 
 
 
 
 
 
 
 
 
 
api/ltx/vae_aduc_pipeline.py CHANGED
@@ -5,39 +5,36 @@
5
  import logging
6
  import time
7
  import torch
8
- import os
9
  import torchvision.transforms.functional as TVF
10
  from PIL import Image
11
- from typing import List, Union, Tuple, Literal, Optional
12
  from dataclasses import dataclass
13
- from pathlib import Path
 
14
  import sys
 
15
 
16
- # O cliente importa o MANAGER para submeter os trabalhos ao pool de workers.
17
  from api.ltx.ltx_aduc_manager import ltx_aduc_manager
18
 
19
- # --- Adiciona o path do LTX-Video para importações de baixo nível ---
20
- LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
21
- def add_deps_to_path():
22
- repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
23
- if repo_path not in sys.path:
24
- sys.path.insert(0, repo_path)
25
- add_deps_to_path()
26
-
27
- # Importações para anotação de tipos e para as funções de trabalho (jobs).
28
- from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
29
- from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
30
- import ltx_video.pipelines.crf_compressor as crf_compressor
31
 
32
  # ==============================================================================
33
- # --- DEFINIÇÕES DE ESTRUTURA E HELPERS ---
34
  # ==============================================================================
35
 
36
  @dataclass
37
  class LatentConditioningItem:
38
  """
39
  Estrutura de dados para passar latentes condicionados entre serviços.
40
- O tensor latente é mantido na CPU para economizar VRAM entre as etapas.
41
  """
42
  latent_tensor: torch.Tensor
43
  media_frame_number: int
@@ -50,32 +47,32 @@ def load_image_to_tensor_with_resize_and_crop(
50
  ) -> torch.Tensor:
51
  """
52
  Carrega e processa uma imagem para um tensor de pixel 5D, normalizado para [-1, 1],
53
- pronto para ser enviado ao VAE para encoding.
54
  """
55
  if isinstance(image_input, str):
56
  image = Image.open(image_input).convert("RGB")
57
  elif isinstance(image_input, Image.Image):
58
- image = image_input.convert("RGB")
59
  else:
60
  raise ValueError("image_input must be a file path or a PIL Image object")
61
 
62
- # Lógica de corte e redimensionamento para manter a proporção
63
  input_width, input_height = image.size
64
  aspect_ratio_target = target_width / target_height
65
  aspect_ratio_frame = input_width / input_height
 
66
  if aspect_ratio_frame > aspect_ratio_target:
67
  new_width, new_height = int(input_height * aspect_ratio_target), input_height
68
- x_start = (input_width - new_width) // 2
69
- image = image.crop((x_start, 0, x_start + new_width, new_height))
70
  else:
71
- new_height = int(input_width / aspect_ratio_target)
72
- y_start = (input_height - new_height) // 2
73
- image = image.crop((0, y_start, input_width, y_start + new_height))
74
-
75
  image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
76
-
77
- # Conversão para tensor e normalização
78
  frame_tensor = TVF.to_tensor(image)
 
 
79
  frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
80
  frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
81
  frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
@@ -83,20 +80,21 @@ def load_image_to_tensor_with_resize_and_crop(
83
  frame_tensor = (frame_tensor * 2.0) - 1.0
84
  return frame_tensor.unsqueeze(0).unsqueeze(2)
85
 
 
86
  # ==============================================================================
87
- # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool de VAE) ---
88
  # ==============================================================================
89
 
90
  def _job_encode_media(vae: CausalVideoAutoencoder, pixel_tensor: torch.Tensor) -> torch.Tensor:
91
- """Job que codifica um tensor de pixel em um tensor latente."""
92
  device = vae.device
93
  dtype = vae.dtype
94
  pixel_tensor_gpu = pixel_tensor.to(device, dtype=dtype)
95
  latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
96
  return latents.cpu()
97
 
98
- def _job_decode_latent(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor) -> torch.Tensor:
99
- """Job que decodifica um tensor latente em um tensor de pixels."""
100
  device = vae.device
101
  dtype = vae.dtype
102
  latent_tensor_gpu = latent_tensor.to(device, dtype=dtype)
@@ -108,17 +106,14 @@ def _job_decode_latent(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor)
108
  # ==============================================================================
109
 
110
  class VaeAducPipeline:
111
- """
112
- Cliente de alto nível para orquestrar todas as tarefas relacionadas ao VAE.
113
- Ele define a lógica de negócios e submete os trabalhos ao LTXAducManager.
114
- """
115
  def __init__(self):
116
  logging.info("✅ VAE ADUC Pipeline (Client) initialized and ready to submit jobs.")
117
  pass
118
 
119
  def __call__(
120
  self,
121
- media: Union[torch.Tensor, List[Union[Image.Image, str]]],
122
  task: Literal['encode', 'decode', 'create_conditioning_items'],
123
  target_resolution: Optional[Tuple[int, int]] = (512, 512),
124
  conditioning_params: Optional[List[Tuple[int, float]]] = None
@@ -131,7 +126,7 @@ class VaeAducPipeline:
131
  task: A tarefa a executar ('encode', 'decode', 'create_conditioning_items').
132
  target_resolution: A resolução (altura, largura) para o pré-processamento.
133
  conditioning_params: Para 'create_conditioning_items', uma lista de tuplas
134
- (frame_number, strength) para cada item de mídia.
135
 
136
  Returns:
137
  O resultado da tarefa, sempre na CPU.
@@ -142,13 +137,16 @@ class VaeAducPipeline:
142
  if task == 'encode':
143
  if not isinstance(media, list): media = [media]
144
  pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, target_resolution[0], target_resolution[1]) for m in media]
145
- results = [ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_encode_media, pixel_tensor=pt) for pt in pixel_tensors]
 
 
 
146
  return results
147
 
148
  elif task == 'decode':
149
  if not isinstance(media, torch.Tensor):
150
- raise TypeError("Para a tarefa 'decode', 'media' deve ser um único tensor latente.")
151
- return ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_decode_latent, latent_tensor=media)
152
 
153
  elif task == 'create_conditioning_items':
154
  if not isinstance(media, list) or not isinstance(conditioning_params, list) or len(media) != len(conditioning_params):
 
5
  import logging
6
  import time
7
  import torch
 
8
  import torchvision.transforms.functional as TVF
9
  from PIL import Image
10
+ from typing import List, Union, Tuple, Literal
11
  from dataclasses import dataclass
12
+ import os
13
+ import subprocess
14
  import sys
15
+ from pathlib import Path
16
 
 
17
  from api.ltx.ltx_aduc_manager import ltx_aduc_manager
18
 
19
+ DEPS_DIR = Path("/data")
20
+ LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
21
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
22
+ if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
23
+ sys.path.insert(0, repo_path)
24
+ print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
25
+ from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
26
+ from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
27
+ import ltx_video.pipelines.crf_compressor as crf_compressor
 
 
 
28
 
29
  # ==============================================================================
30
+ # --- DEFINIÇÕES DE ESTRUTURA E HELPERS (Importadas ou movidas para cá) ---
31
  # ==============================================================================
32
 
33
  @dataclass
34
  class LatentConditioningItem:
35
  """
36
  Estrutura de dados para passar latentes condicionados entre serviços.
37
+ O tensor latente é mantido na CPU para economizar VRAM.
38
  """
39
  latent_tensor: torch.Tensor
40
  media_frame_number: int
 
47
  ) -> torch.Tensor:
48
  """
49
  Carrega e processa uma imagem para um tensor de pixel 5D, normalizado para [-1, 1],
50
+ pronto para ser enviado ao VAE.
51
  """
52
  if isinstance(image_input, str):
53
  image = Image.open(image_input).convert("RGB")
54
  elif isinstance(image_input, Image.Image):
55
+ image = image_input
56
  else:
57
  raise ValueError("image_input must be a file path or a PIL Image object")
58
 
 
59
  input_width, input_height = image.size
60
  aspect_ratio_target = target_width / target_height
61
  aspect_ratio_frame = input_width / input_height
62
+
63
  if aspect_ratio_frame > aspect_ratio_target:
64
  new_width, new_height = int(input_height * aspect_ratio_target), input_height
65
+ x_start, y_start = (input_width - new_width) // 2, 0
 
66
  else:
67
+ new_width, new_height = input_width, int(input_width / aspect_ratio_target)
68
+ x_start, y_start = 0, (input_height - new_height) // 2
69
+
70
+ image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
71
  image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
72
+
 
73
  frame_tensor = TVF.to_tensor(image)
74
+ frame_tensor = TVF.gaussian_blur(frame_tensor, kernel_size=(3, 3))
75
+
76
  frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
77
  frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
78
  frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
 
80
  frame_tensor = (frame_tensor * 2.0) - 1.0
81
  return frame_tensor.unsqueeze(0).unsqueeze(2)
82
 
83
+
84
  # ==============================================================================
85
+ # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool) ---
86
  # ==============================================================================
87
 
88
  def _job_encode_media(vae: CausalVideoAutoencoder, pixel_tensor: torch.Tensor) -> torch.Tensor:
89
+ """Função de trabalho genérica para codificar um tensor de pixel."""
90
  device = vae.device
91
  dtype = vae.dtype
92
  pixel_tensor_gpu = pixel_tensor.to(device, dtype=dtype)
93
  latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
94
  return latents.cpu()
95
 
96
+ def _job_decode_latent_to_pixels(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor) -> torch.Tensor:
97
+ """Função de trabalho para decodificar um tensor latente."""
98
  device = vae.device
99
  dtype = vae.dtype
100
  latent_tensor_gpu = latent_tensor.to(device, dtype=dtype)
 
106
  # ==============================================================================
107
 
108
  class VaeAducPipeline:
109
+ """Cliente de alto nível para orquestrar todas as tarefas de VAE."""
 
 
 
110
  def __init__(self):
111
  logging.info("✅ VAE ADUC Pipeline (Client) initialized and ready to submit jobs.")
112
  pass
113
 
114
  def __call__(
115
  self,
116
+ media: Union[torch.Tensor, List[Union[Image.Image, torch.Tensor]]],
117
  task: Literal['encode', 'decode', 'create_conditioning_items'],
118
  target_resolution: Optional[Tuple[int, int]] = (512, 512),
119
  conditioning_params: Optional[List[Tuple[int, float]]] = None
 
126
  task: A tarefa a executar ('encode', 'decode', 'create_conditioning_items').
127
  target_resolution: A resolução (altura, largura) para o pré-processamento.
128
  conditioning_params: Para 'create_conditioning_items', uma lista de tuplas
129
+ (frame_number, strength) correspondente a cada item de mídia.
130
 
131
  Returns:
132
  O resultado da tarefa, sempre na CPU.
 
137
  if task == 'encode':
138
  if not isinstance(media, list): media = [media]
139
  pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, target_resolution[0], target_resolution[1]) for m in media]
140
+ results = []
141
+ for pt in pixel_tensors:
142
+ latent = ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_encode_media, pixel_tensor=pt)
143
+ results.append(latent)
144
  return results
145
 
146
  elif task == 'decode':
147
  if not isinstance(media, torch.Tensor):
148
+ raise TypeError("Para 'decode', 'media' deve ser um único tensor latente.")
149
+ return ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_decode_latent_to_pixels, latent_tensor=media)
150
 
151
  elif task == 'create_conditioning_items':
152
  if not isinstance(media, list) or not isinstance(conditioning_params, list) or len(media) != len(conditioning_params):