Test

Paused

App Files Files Community

eeuuia commited on Oct 12

Commit

9a6b3d7

verified ·

1 Parent(s): e964b76

Upload 5 files

Browse files

Files changed (5) hide show

api/ltx/ltx_aduc_manager.py +93 -148
api/ltx/ltx_aduc_orchestrator.py +61 -58
api/ltx/ltx_aduc_pipeline.py +101 -128
api/ltx/ltx_utils.py +216 -102
api/ltx/vae_aduc_pipeline.py +42 -40

api/ltx/ltx_aduc_manager.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # FILE: api/ltx/ltx_aduc_manager.py
-# DESCRIPTION: An advanced, fault-tolerant pool manager for LTX and VAE workers.
-# It handles job queuing, load balancing, and health monitoring for production-grade stability.
 import logging
 import torch
@@ -9,11 +10,14 @@ from pathlib import Path
 import threading
 import queue
 import time
-from typing import List, Optional, Callable, Any, Tuple
-# Imports dos builders e do gpu_manager
-from api.ltx.ltx_utils import get_main_ltx_pipeline, get_main_vae
 from managers.gpu_manager import gpu_manager
 # --- Adiciona o path do LTX-Video para importação de tipos ---
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
@@ -24,168 +28,138 @@ def add_deps_to_path():
 add_deps_to_path()
 from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
-from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
 # ==============================================================================
-# --- CLASSES DE WORKER (Especialistas em Tarefas) ---
 # ==============================================================================
-class BaseWorker(threading.Thread):
-    """Classe base para nossos workers com gerenciamento de estado e saúde."""
-    def __init__(self, worker_id: int, device: torch.device):
         super().__init__()
         self.worker_id = worker_id
-        self.device = device
         self.is_healthy = False
         self.is_busy = False
-        self.daemon = True # Permite que o programa principal saia
     def run(self):
-        """O loop de vida do worker, responsável por carregar os modelos."""
         try:
-            self._load_models()
             self.is_healthy = True
-            logging.info(f"✅ Worker {self.worker_id} ({self.__class__.__name__}) on {self.device} is healthy and ready.")
         except Exception:
             self.is_healthy = False
-            logging.error(f"❌ Worker {self.worker_id} on {self.device} FAILED to initialize!", exc_info=True)
-    def _load_models(self):
-        """Método a ser implementado pelas classes filhas."""
-        raise NotImplementedError
-    def get_status(self) -> Tuple[bool, bool]:
-        """Retorna (is_healthy, is_busy)."""
-        return self.is_healthy, self.is_busy
-class LTXMainWorker(BaseWorker):
-    """Worker especialista para o pipeline principal do LTX."""
-    def __init__(self, worker_id: int, device: torch.device):
-        super().__init__(worker_id, device)
-        self.pipeline: Optional[LTXVideoPipeline] = None
-    def _load_models(self):
-        logging.info(f"[LTXWorker-{self.worker_id}] Loading models to CPU...")
-        self.pipeline = get_main_ltx_pipeline()
-        logging.info(f"[LTXWorker-{self.worker_id}] Moving pipeline to {self.device}...")
-        self.pipeline.to(self.device)
-    def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
-        """Executa um trabalho, gerenciando o estado 'busy'."""
-        self.is_busy = True
-        logging.info(f"Worker {self.worker_id} (LTX) starting job: {job_func.__name__}")
         try:
-            result = job_func(self.pipeline, *args, **kwargs)
-            logging.info(f"Worker {self.worker_id} (LTX) finished job successfully.")
-            return result
-        except Exception as e:
-            logging.error(f"Worker {self.worker_id} (LTX) job failed!", exc_info=True)
-            self.is_healthy = False # Falha em um job marca o worker como não saudável
-            raise
-        finally:
-            self.is_busy = False
-class VAEWorker(BaseWorker):
-    """Worker especialista para o modelo VAE."""
-    def __init__(self, worker_id: int, device: torch.device):
-        super().__init__(worker_id, device)
-        self.vae: Optional[CausalVideoAutoencoder] = None
-    def _load_models(self):
-        logging.info(f"[VAEWorker-{self.worker_id}] Loading VAE model to CPU...")
-        self.vae = get_main_vae()
-        logging.info(f"[VAEWorker-{self.worker_id}] Moving VAE to {self.device}...")
-        self.vae.to(self.device)
-        self.vae.eval()
     def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
-        """Executa um trabalho, gerenciando o estado 'busy'."""
         self.is_busy = True
-        logging.info(f"Worker {self.worker_id} (VAE) starting job: {job_func.__name__}")
         try:
-            result = job_func(self.vae, *args, **kwargs)
-            logging.info(f"Worker {self.worker_id} (VAE) finished job successfully.")
             return result
-        except Exception as e:
-            logging.error(f"Worker {self.worker_id} (VAE) job failed!", exc_info=True)
             self.is_healthy = False
             raise
         finally:
             self.is_busy = False
 # ==============================================================================
-# --- O GERENCIADOR DE POOL AVANÇADO (SINGLETON) ---
 # ==============================================================================
 class LTXAducManager:
     _instance = None
     _initialized = False
     def __new__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
         return cls._instance
     def __init__(self):
         if self._initialized: return
-        logging.info("🏭 Initializing Advanced Pool Manager for LTX...")
-        self.ltx_workers: List[LTXMainWorker] = []
-        self.vae_workers: List[VAEWorker] = []
-        self.ltx_job_queue = queue.Queue()
-        self.vae_job_queue = queue.Queue()
         self.pool_lock = threading.Lock()
         self._initialize_workers()
-        # Inicia threads consumidores para processar as filas
-        self.ltx_dispatcher = threading.Thread(target=self._dispatch_jobs, args=(self.ltx_job_queue, self.ltx_workers), daemon=True)
-        self.vae_dispatcher = threading.Thread(target=self._dispatch_jobs, args=(self.vae_job_queue, self.vae_workers), daemon=True)
         self.health_monitor = threading.Thread(target=self._health_check_loop, daemon=True)
-        self.ltx_dispatcher.start()
-        self.vae_dispatcher.start()
         self.health_monitor.start()
         self._initialized = True
-        logging.info("✅ Advanced Pool Manager is running with all threads started.")
     def _initialize_workers(self):
-        """Cria e inicia os workers com base nas GPUs alocadas."""
-        # Supondo que gpu_manager agora tenha get_ltx_devices() e get_seedvr_devices() que retornam listas
-        ltx_gpus = gpu_manager.get_ltx_device() # Ajuste se o nome for diferente
-        vae_gpus = gpu_manager.get_ltx_vae_device() # Ajuste se o nome for diferente
         with self.pool_lock:
-            for i, device_id in enumerate([ltx_gpus]): # Assumindo que retorna uma lista
-                worker = LTXMainWorker(worker_id=i, device=torch.device(f"cuda:{device_id}"))
-                self.ltx_workers.append(worker)
-                worker.start()
-            for i, device_id in enumerate([vae_gpus]): # Assumindo que retorna uma lista
-                worker = VAEWorker(worker_id=i, device=torch.device(f"cuda:{device_id}"))
-                self.vae_workers.append(worker)
-                worker.start()
-    def _get_available_worker(self, worker_pool: List[BaseWorker]) -> Optional[BaseWorker]:
-        """Encontra um worker saudável e desocupado no pool."""
         with self.pool_lock:
-            for worker in worker_pool:
-                healthy, busy = worker.get_status()
-                if healthy and not busy:
                     return worker
         return None
-    def _dispatch_jobs(self, job_queue: queue.Queue, worker_pool: List[BaseWorker]):
-        """Loop do thread consumidor que pega trabalhos da fila e os despacha."""
         while True:
-            job_func, args, kwargs, future = job_queue.get()
             worker = None
             while worker is None:
-                worker = self._get_available_worker(worker_pool)
-                if worker is None:
-                    time.sleep(0.1) # Espera por um worker ficar livre
             try:
                 result = worker.execute(job_func, args, kwargs)
                 future.put(result)
@@ -193,51 +167,22 @@ class LTXAducManager:
                 future.put(e)
     def _health_check_loop(self):
-        """Thread que periodicamente verifica e reinicia workers não saudáveis."""
         while True:
             time.sleep(30)
-            logging.debug("Running health check on all workers...")
             with self.pool_lock:
-                for i, worker in enumerate(self.ltx_workers):
                     if not worker.is_alive() or not worker.is_healthy:
-                        logging.warning(f"LTX Worker {worker.worker_id} on {worker.device} is UNHEALTHY. Restarting...")
-                        new_worker = LTXMainWorker(worker.worker_id, worker.device)
-                        self.ltx_workers[i] = new_worker
                         new_worker.start()
-                # Repetir o laço para VAE workers
-                for i, worker in enumerate(self.vae_workers):
-                    if not worker.is_alive() or not worker.is_healthy:
-                        logging.warning(f"VAE Worker {worker.worker_id} on {worker.device} is UNHEALTHY. Restarting...")
-                        new_worker = VAEWorker(worker.worker_id, worker.device)
-                        self.vae_workers[i] = new_worker
-                        new_worker.start()
-    def submit_job(self, job_type: str, job_func: Callable, *args, **kwargs) -> Any:
-        """
-        Ponto de entrada público para submeter um trabalho ao pool.
-        Esta função é síncrona: ela espera pelo resultado.
-        """
-        if job_type not in ['ltx', 'vae']:
-            raise ValueError("Invalid job_type. Must be 'ltx' or 'vae'.")
-        job_queue = self.ltx_job_queue if job_type == 'ltx' else self.vae_job_queue
-        future = queue.Queue() # Usamos uma fila como um 'future' para obter o resultado de volta
-        job_queue.put((job_func, args, kwargs, future))
-        # Bloqueia e espera pelo resultado ser colocado no 'future' pelo dispatcher
         result = future.get()
-        if isinstance(result, Exception):
-            raise result # Se o job falhou, re-lança a exceção no thread principal
         return result
-# ==============================================================================
 # --- INSTANCIAÇÃO GLOBAL ---
-# ==============================================================================
-try:
-    ltx_aduc_manager = LTXAducManager()
-except Exception as e:
-    logging.critical("CRITICAL ERROR: Failed to initialize the LTXAducManager pool.", exc_info=True)
-    ltx_aduc_manager = None

 # FILE: api/ltx/ltx_aduc_manager.py
+# DESCRIPTION: A simplified, robust pool manager for a unified LTX worker.
+# This worker handles all tasks, including Transformer generation and VAE operations,
+# while still respecting the GPU separation defined by the GPUManager.
 import logging
 import torch
 import threading
 import queue
 import time
+import yaml
+import os
+from huggingface_hub import hf_hub_download
+from typing import List, Optional, Callable, Any, Tuple, Dict
+# --- Importa o gerenciador de GPUs e o builder de baixo nível ---
 from managers.gpu_manager import gpu_manager
+from api.ltx.ltx_utils import build_complete_pipeline_on_cpu, create_transformer
 # --- Adiciona o path do LTX-Video para importação de tipos ---
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 add_deps_to_path()
 from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
 # ==============================================================================
+# --- FUNÇÃO DE ORQUESTRAÇÃO DA CONSTRUÇÃO (Interna ao Manager) ---
 # ==============================================================================
+def get_complete_pipeline() -> LTXVideoPipeline:
+    """
+    Orquestra a construção do pipeline LTX COMPLETO, incluindo o VAE, na CPU.
+    """
+    config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
+    with open(config_path, "r") as file:
+        config = yaml.safe_load(file)
+    ckpt_path = hf_hub_download(
+        repo_id="Lightricks/LTX-Video",
+        filename=config["checkpoint_path"],
+        cache_dir=os.environ.get("HF_HOME")
+    )
+    return build_complete_pipeline_on_cpu(ckpt_path, config)
+# ==============================================================================
+# --- CLASSE DE WORKER UNIFICADO ---
+# ==============================================================================
+class LTXWorker(threading.Thread):
+    """
+    Um worker unificado que gerencia uma instância completa do pipeline LTX.
+    Ele carrega o modelo e distribui seus componentes (Transformer/VAE) para as GPUs corretas.
+    """
+    def __init__(self, worker_id: int):
         super().__init__()
         self.worker_id = worker_id
+        self.pipeline: Optional[LTXVideoPipeline] = None
         self.is_healthy = False
         self.is_busy = False
+        self.daemon = True
+        self.autocast_dtype: torch.dtype = torch.float32
     def run(self):
+        """Inicializa o worker: carrega o pipeline e o move para as GPUs."""
         try:
+            self.pipeline = get_complete_pipeline()
+            self._set_precision_policy()
+            main_device = gpu_manager.get_ltx_device()
+            vae_device = gpu_manager.get_ltx_vae_device()
+            logging.info(f"[LTXWorker-{self.worker_id}] Moving components -> Main: {main_device}, VAE: {vae_device}")
+            self.pipeline.to(main_device)      # Move tudo para a GPU principal primeiro
+            self.pipeline.vae.to(vae_device)   # Move especificamente o VAE para sua GPU dedicada
             self.is_healthy = True
+            logging.info(f"✅ LTXWorker {self.worker_id} is healthy. Main on {main_device}, VAE on {vae_device}.")
         except Exception:
             self.is_healthy = False
+            logging.error(f"❌ LTXWorker {self.worker_id} FAILED to initialize!", exc_info=True)
+    def _set_precision_policy(self):
+        """Define a política de precisão para operações de autocast."""
         try:
+            config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
+            with open(config_path, "r") as file: config = yaml.safe_load(file)
+            precision = str(config.get("precision", "bfloat16")).lower()
+            if precision in ["float8_e4m3fn", "bfloat16"]: self.autocast_dtype = torch.bfloat16
+            elif precision == "mixed_precision": self.autocast_dtype = torch.float16
+        except Exception:
+            logging.warning(f"[LTXWorker-{self.worker_id}] Could not set precision policy, defaulting to float32.", exc_info=True)
     def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
         self.is_busy = True
         try:
+            # O job recebe o pipeline completo e o dtype para o autocast
+            result = job_func(self.pipeline, self.autocast_dtype, *args, **kwargs)
             return result
+        except Exception:
             self.is_healthy = False
             raise
         finally:
             self.is_busy = False
 # ==============================================================================
+# --- O GERENCIADOR DE POOL (SINGLETON) ---
 # ==============================================================================
 class LTXAducManager:
     _instance = None
     _initialized = False
     def __new__(cls, *args, **kwargs):
+        if cls._instance is None: cls._instance = super().__new__(cls)
         return cls._instance
     def __init__(self):
         if self._initialized: return
+        logging.info("🏭 Initializing Simplified Pool Manager for LTX...")
+        self.workers: List[LTXWorker] = []
+        self.job_queue = queue.Queue()
         self.pool_lock = threading.Lock()
         self._initialize_workers()
+        self.dispatcher = threading.Thread(target=self._dispatch_jobs, daemon=True)
         self.health_monitor = threading.Thread(target=self._health_check_loop, daemon=True)
+        self.dispatcher.start()
         self.health_monitor.start()
         self._initialized = True
+        logging.info("✅ Simplified Pool Manager is running.")
     def _initialize_workers(self):
         with self.pool_lock:
+            # Por enquanto, criamos um único worker unificado.
+            # No futuro, este loop pode criar múltiplos workers se houver mais GPUs.
+            worker = LTXWorker(worker_id=0)
+            self.workers.append(worker)
+            worker.start()
+    def _get_available_worker(self) -> Optional[LTXWorker]:
         with self.pool_lock:
+            for worker in self.workers:
+                if worker.is_healthy and not worker.is_busy:
                     return worker
         return None
+    def _dispatch_jobs(self):
         while True:
+            job_func, args, kwargs, future = self.job_queue.get()
             worker = None
             while worker is None:
+                worker = self._get_available_worker()
+                if worker is None: time.sleep(0.1)
             try:
                 result = worker.execute(job_func, args, kwargs)
                 future.put(result)
                 future.put(e)
     def _health_check_loop(self):
         while True:
             time.sleep(30)
             with self.pool_lock:
+                for i, worker in enumerate(self.workers):
                     if not worker.is_alive() or not worker.is_healthy:
+                        logging.warning(f"LTX Worker {worker.worker_id} is UNHEALTHY. Restarting...")
+                        new_worker = LTXWorker(worker_id=worker.worker_id)
+                        self.workers[i] = new_worker
                         new_worker.start()
+    def submit_job(self, job_func: Callable, *args, **kwargs) -> Any:
+        future = queue.Queue(1)
+        self.job_queue.put((job_func, args, kwargs, future))
         result = future.get()
+        if isinstance(result, Exception): raise result
         return result
 # --- INSTANCIAÇÃO GLOBAL ---
+ltx_aduc_manager = LTXAducManager()

api/ltx/ltx_aduc_orchestrator.py CHANGED Viewed

@@ -1,21 +1,26 @@
 # FILE: api/ltx_aduc_orchestrator.py
 # DESCRIPTION: The main workflow orchestrator for the ADUC-SDR LTX suite.
-# It acts as the primary entry point for the UI, coordinating the specialized
-# LTX and VAE clients to execute a complete video generation pipeline from prompt to MP4.
 import logging
 import time
 from PIL import Image
-from typing import Optional, Dict
-# O Orquestrador importa os CLIENTES especialistas que ele vai coordenar.
-# Estes clientes são responsáveis por submeter os trabalhos ao pool de workers.
 from api.ltx.ltx_aduc_pipeline import ltx_aduc_pipeline
-from api.ltx.vae_aduc_pipeline import vae_aduc_pipeline
 # O Orquestrador importa as FERRAMENTAS de que precisa para as tarefas finais.
 from tools.video_encode_tool import video_encode_tool_singleton
 # ==============================================================================
 # --- A CLASSE ORQUESTRADORA (Cérebro do Workflow) ---
 # ==============================================================================
@@ -23,16 +28,26 @@ from tools.video_encode_tool import video_encode_tool_singleton
 class LtxAducOrchestrator:
     """
     Orquestra o fluxo de trabalho completo de geração de vídeo,
-    coordenando os clientes LTX e VAE. É o ponto de entrada principal para a UI.
     """
     def __init__(self):
         """
-        Inicializa o orquestrador. A inicialização é leve, pois os modelos
-        pesados são gerenciados pelo LTXAducManager em segundo plano.
         """
-        self.output_dir = "/app/output"  # Diretório padrão para salvar os vídeos
         logging.info("✅ LTX ADUC Orchestrator initialized and ready.")
     def __call__(
         self,
         prompt: str,
@@ -44,87 +59,80 @@ class LtxAducOrchestrator:
         output_filename_base: str = "ltx_aduc_video"
     ) -> Optional[str]:
         """
-        Ponto de entrada principal do Orquestrador. Executa o pipeline completo.
         Args:
-            prompt (str): O prompt de texto completo. Cada nova linha é tratada como uma cena.
-            initial_image (Optional[Image.Image]): Uma imagem PIL para condicionar a primeira cena.
             height (int): Altura do vídeo final.
             width (int): Largura do vídeo final.
             duration_in_seconds (float): Duração total desejada do vídeo.
-            ltx_configs (Optional[Dict]): Configurações avançadas para a geração LTX (steps, guidance, etc.).
-            output_filename_base (str): O nome base para o arquivo de vídeo final.
         Returns:
-            Optional[str]: O caminho do arquivo de vídeo .mp4 gerado, ou None em caso de falha.
         """
         t0 = time.time()
         logging.info(f"Orchestrator starting new job for prompt: '{prompt.splitlines()[0]}...'")
         try:
             # =================================================================
-            # --- ETAPA 1: PREPARAÇÃO DO INPUT ---
             # =================================================================
-            # Converte a string do prompt em uma lista de cenas.
             prompt_list = [line.strip() for line in prompt.splitlines() if line.strip()]
             if not prompt_list:
                 raise ValueError("O prompt está vazio ou não contém linhas válidas.")
-            # Prepara o item de condicionamento inicial, se uma imagem for fornecida.
             initial_conditioning_items = []
             if initial_image:
-                logging.info("Preparing initial conditioning item via VAE client...")
-                # Define os parâmetros: aplicar no frame 0 com força total (1.0).
-                conditioning_params = [(0, 1.0)]
-                # Chama o cliente VAE para fazer o trabalho pesado de conversão de imagem para LatentConditioningItem.
-                initial_conditioning_items = vae_aduc_pipeline(
-                    media=[initial_image],
-                    task='create_conditioning_items',
-                    target_resolution=(height, width),
-                    conditioning_params=conditioning_params
                 )
-                logging.info(f"Successfully created {len(initial_conditioning_items)} conditioning item(s).")
             # =================================================================
-            # --- ETAPA 2: GERAÇÃO DO VÍDEO LATENTE ---
             # =================================================================
-            logging.info("Submitting job to LTX client for latent video generation...")
-            # Chama o cliente LTX para gerar o tensor latente completo.
-            final_latents, used_seed = ltx_aduc_pipeline(
                 prompt_list=prompt_list,
-                initial_conditioning_items=initial_conditioning_items,
-                height=height,
-                width=width,
                 duration_in_seconds=duration_in_seconds,
-                ltx_configs=ltx_configs
             )
             if final_latents is None:
                 raise RuntimeError("LTX client failed to generate a latent tensor.")
-            logging.info(f"LTX client returned latent tensor with shape: {final_latents.shape}")
             # =================================================================
-            # --- ETAPA 3: DECODIFICAÇÃO DO LATENTE PARA PIXELS ---
             # =================================================================
-            logging.info("Submitting job to VAE client for latent-to-pixel decoding...")
-            # Chama o cliente VAE para converter o resultado em um vídeo visível (tensor de pixels).
-            pixel_tensor = vae_aduc_pipeline(
-                media=final_latents,
-                task='decode'
-            )
             if pixel_tensor is None:
-                raise RuntimeError("VAE client failed to decode the latent tensor.")
-            logging.info(f"VAE client returned pixel tensor with shape: {pixel_tensor.shape}")
             # =================================================================
-            # --- ETAPA 4: CODIFICAÇÃO PARA ARQUIVO DE VÍDEO MP4 ---
             # =================================================================
             video_filename = f"{output_filename_base}_{int(time.time())}_{used_seed}.mp4"
             output_path = f"{self.output_dir}/{video_filename}"
-            logging.info(f"Submitting job to VideoEncodeTool to save final MP4 to: {output_path}")
-            # Usa a ferramenta de vídeo para salvar o tensor de pixels no arquivo final.
             video_encode_tool_singleton.save_video_from_tensor(
                 pixel_5d=pixel_tensor,
                 path=output_path,
@@ -142,10 +150,5 @@ class LtxAducOrchestrator:
 # ==============================================================================
 # --- INSTÂNCIA SINGLETON DO ORQUESTRADOR ---
-# Este é o ponto de entrada principal que a UI (app.py) irá chamar.
 # ==============================================================================
-try:
-    ltx_aduc_orchestrator = LtxAducOrchestrator()
-except Exception as e:
-    logging.critical("CRITICAL: Failed to initialize the LtxAducOrchestrator.", exc_info=True)
-    ltx_aduc_orchestrator = None

 # FILE: api/ltx_aduc_orchestrator.py
 # DESCRIPTION: The main workflow orchestrator for the ADUC-SDR LTX suite.
+# In this simplified architecture, it coordinates a single unified client (LtxAducPipeline)
+# to execute the complete video generation pipeline from prompt to MP4.
 import logging
 import time
+import yaml
+import os
+import sys
 from PIL import Image
+from typing import Optional, Dict, Union
+# O Orquestrador importa o CLIENTE UNIFICADO que ele vai coordenar.
 from api.ltx.ltx_aduc_pipeline import ltx_aduc_pipeline
 # O Orquestrador importa as FERRAMENTAS de que precisa para as tarefas finais.
 from tools.video_encode_tool import video_encode_tool_singleton
+# Importa o Path para carregar a configuração.
+from pathlib import Path
+LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 # ==============================================================================
 # --- A CLASSE ORQUESTRADORA (Cérebro do Workflow) ---
 # ==============================================================================
 class LtxAducOrchestrator:
     """
     Orquestra o fluxo de trabalho completo de geração de vídeo,
+    coordenando o cliente unificado LTX. É o ponto de entrada principal para a UI.
     """
     def __init__(self):
         """
+        Inicializa o orquestrador, carregando a configuração base uma única vez.
         """
+        self.output_dir = "/app/output"
+        self.base_config = self._load_base_config()
         logging.info("✅ LTX ADUC Orchestrator initialized and ready.")
+    def _load_base_config(self) -> Dict:
+        """Carrega a configuração base do arquivo YAML, que contém os parâmetros padrão."""
+        try:
+            config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
+            with open(config_path, "r") as file:
+                return yaml.safe_load(file)
+        except Exception as e:
+            logging.error(f"Failed to load base config file. Orchestrator may not function correctly. Error: {e}")
+            return {}
     def __call__(
         self,
         prompt: str,
         output_filename_base: str = "ltx_aduc_video"
     ) -> Optional[str]:
         """
+        Ponto de entrada principal do Orquestrador. Executa o pipeline completo de geração de vídeo.
         Args:
+            prompt (str): O prompt de texto completo, onde cada nova linha é uma cena.
+            initial_image (Optional[Image.Image]): Imagem PIL para condicionar a primeira cena.
             height (int): Altura do vídeo final.
             width (int): Largura do vídeo final.
             duration_in_seconds (float): Duração total desejada do vídeo.
+            ltx_configs (Optional[Dict]): Configurações avançadas da UI para sobrescrever os padrões.
+            output_filename_base (str): Nome base para o arquivo de vídeo de saída.
         Returns:
+            Optional[str]: O caminho para o arquivo .mp4 gerado, ou None em caso de falha.
         """
         t0 = time.time()
         logging.info(f"Orchestrator starting new job for prompt: '{prompt.splitlines()[0]}...'")
         try:
             # =================================================================
+            # --- ETAPA 1: PREPARAÇÃO DOS INPUTS E CONFIGURAÇÕES ---
             # =================================================================
             prompt_list = [line.strip() for line in prompt.splitlines() if line.strip()]
             if not prompt_list:
                 raise ValueError("O prompt está vazio ou não contém linhas válidas.")
             initial_conditioning_items = []
             if initial_image:
+                logging.info("Orchestrator delegating: create conditioning item.")
+                conditioning_params = [(0, 1.0)] # (frame_number, strength)
+                initial_conditioning_items = ltx_aduc_pipeline.encode_to_conditioning_items(
+                    media_list=[initial_image],
+                    params=conditioning_params,
+                    resolution=(height, width)
                 )
+            common_ltx_args = self.base_config.get("first_pass", {}).copy()
+            common_ltx_args.update({
+                'negative_prompt': "blurry, low quality, bad anatomy, deformed",
+                'height': height,
+                'width': width
+            })
+            if ltx_configs:
+                common_ltx_args.update(ltx_configs)
             # =================================================================
+            # --- ETAPA 2: DELEGAR GERAÇÃO DO VÍDEO LATENTE ---
             # =================================================================
+            logging.info("Orchestrator delegating: generate latent video.")
+            final_latents, used_seed = ltx_aduc_pipeline.generate_latents(
                 prompt_list=prompt_list,
                 duration_in_seconds=duration_in_seconds,
+                common_ltx_args=common_ltx_args,
+                initial_conditioning_items=initial_conditioning_items
             )
             if final_latents is None:
                 raise RuntimeError("LTX client failed to generate a latent tensor.")
+            logging.info(f"Orchestrator received latent tensor with shape: {final_latents.shape}")
             # =================================================================
+            # --- ETAPA 3: DELEGAR DECODIFICAÇÃO PARA PIXELS ---
             # =================================================================
+            logging.info("Orchestrator delegating: decode latent to pixels.")
+            pixel_tensor = ltx_aduc_pipeline.decode_to_pixels(final_latents)
             if pixel_tensor is None:
+                raise RuntimeError("LTX client failed to decode the latent tensor.")
+            logging.info(f"Orchestrator received pixel tensor with shape: {pixel_tensor.shape}")
             # =================================================================
+            # --- ETAPA 4: TAREFA FINAL - CODIFICAR PARA MP4 ---
             # =================================================================
             video_filename = f"{output_filename_base}_{int(time.time())}_{used_seed}.mp4"
             output_path = f"{self.output_dir}/{video_filename}"
+            logging.info(f"Orchestrator executing final task: saving tensor to MP4 at {output_path}")
             video_encode_tool_singleton.save_video_from_tensor(
                 pixel_5d=pixel_tensor,
                 path=output_path,
 # ==============================================================================
 # --- INSTÂNCIA SINGLETON DO ORQUESTRADOR ---
 # ==============================================================================
+ltx_aduc_orchestrator = LtxAducOrchestrator()

api/ltx/ltx_aduc_pipeline.py CHANGED Viewed

@@ -1,142 +1,130 @@
 # FILE: api/ltx/ltx_aduc_pipeline.py
-# DESCRIPTION: A high-level client for submitting LTX video generation jobs to the pool manager.
-# Its sole responsibility is to orchestrate the generation of a final LATENT tensor from prompts
-# and initial conditions, without handling pixel decoding.
 import logging
 import time
 import torch
 import random
-import json
-from typing import List, Optional, Tuple, Union, Dict
-# O cliente importa o MANAGER para submeter trabalhos
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
-# O cliente precisa da definição de LatentConditioningItem para os seus inputs
-from api.ltx.vae_aduc_pipeline import LatentConditioningItem
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
-    sys.path.insert(0, repo_path)
-    print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
-    from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
 # ==============================================================================
 # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
 # ==============================================================================
-def _job_generate_latent_chunk(
-    pipeline: LTXVideoPipeline,
-    prompt: str,
-    negative_prompt: str,
-    height: int,
-    width: int,
-    num_frames: int,
-    seed: int,
-    conditioning_items: Optional[List[LatentConditioningItem]],
-    ltx_configs: Dict
-) -> torch.Tensor:
-    """
-    Função de trabalho que executa a geração de um único chunk (cena) de vídeo latente.
-    Esta função é executada DENTRO de um LTXMainWorker.
-    """
-    generator = torch.Generator(device=pipeline.device).manual_seed(seed)
-    # Monta os argumentos para a chamada do pipeline
-    pipeline_kwargs = {
-        "prompt": prompt,
-        "negative_prompt": negative_prompt,
-        "height": height,
-        "width": width,
-        "num_frames": num_frames,
-        "frame_rate": 24, # Padrão, pode ser parametrizado se necessário
-        "generator": generator,
-        "output_type": "latent", # Ponto chave: sempre pedimos latentes
-        "conditioning_items": conditioning_items if conditioning_items else None,
-        **ltx_configs # Aplica configurações avançadas (guidance, steps, etc.)
-    }
-    logging.info(f"[LTX Job] Gerando chunk com {num_frames} frames para o prompt: '{prompt[:50]}...'")
-    # O pipeline já está na GPU correta dentro do worker
-    with torch.autocast(device_type=pipeline.device.type, dtype=torch.bfloat16):
-         latents_raw = pipeline(**pipeline_kwargs).images
-    # Retorna o tensor latente na CPU para liberar VRAM do worker para o próximo job
     return latents_raw.cpu()
 # ==============================================================================
-# --- A CLASSE CLIENTE (Interface Pública para Geração de Vídeo Latente) ---
 # ==============================================================================
 class LtxAducPipeline:
     """
-    Cliente de alto nível para orquestrar a geração de vídeo latente.
-    Submete trabalhos de geração de chunks de vídeo ao LTXAducManager.
     """
     def __init__(self):
-        logging.info("✅ LTX ADUC Pipeline (Client) initialized and ready to submit jobs.")
-        # O __init__ é limpo, sem carregar modelos.
         self.FRAMES_ALIGNMENT = 8
-        pass
     def _get_random_seed(self) -> int:
-        """Sempre gera e retorna uma nova semente aleatória."""
         return random.randint(0, 2**32 - 1)
     def _align(self, dim: int, alignment: int = 8) -> int:
-        """Alinha uma dimensão para o múltiplo mais próximo."""
         return ((dim + alignment - 1) // alignment) * alignment
-    def __call__(
         self,
         prompt_list: List[str],
-        initial_conditioning_items: Optional[List[LatentConditioningItem]] = None,
-        height: int = 432,
-        width: int = 768,
-        duration_in_seconds: float = 4.0,
-        ltx_configs: Optional[Dict] = None
     ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
-        """
-        Ponto de entrada principal para gerar um vídeo latente completo.
-        Args:
-            prompt_list: Lista de prompts, onde cada prompt é uma cena.
-            initial_conditioning_items: Lista de `LatentConditioningItem` para condicionar
-                                        a primeira cena.
-            height: Altura do vídeo.
-            width: Largura do vídeo.
-            duration_in_seconds: Duração total desejada do vídeo.
-            ltx_configs: Dicionário com configurações avançadas para o pipeline LTX
-                         (guidance_scale, num_inference_steps, etc.).
-        Returns:
-            Uma tupla contendo:
-            - O tensor latente final completo (na CPU).
-            - A semente principal usada para a geração.
-        """
         t0 = time.time()
-        logging.info(f"LTX Client received a generation job with {len(prompt_list)} scenes.")
-        if not prompt_list:
-            raise ValueError("A lista de prompts não pode estar vazia.")
         used_seed = self._get_random_seed()
-        logging.info(f"Generation seed set to: {used_seed}")
-        # --- Lógica de Divisão de Chunks e Sobreposição ---
         num_chunks = len(prompt_list)
         total_frames = self._align(int(duration_in_seconds * 24))
-        frames_per_chunk_base = total_frames // num_chunks
         overlap_frames = self._align(9) if num_chunks > 1 else 0
         final_latents_list = []
-        overlap_condition_item: Optional[LatentConditioningItem] = None
         for i, chunk_prompt in enumerate(prompt_list):
             current_conditions = []
@@ -145,58 +133,43 @@ class LtxAducPipeline:
             if overlap_condition_item:
                 current_conditions.append(overlap_condition_item)
-            # Calcula o número de frames para o chunk atual
             num_frames_for_chunk = frames_per_chunk_base
-            if i == num_chunks - 1: # Último chunk pega o resto
                 processed_frames = sum(f.shape[2] for f in final_latents_list)
                 num_frames_for_chunk = total_frames - processed_frames
             num_frames_for_chunk = self._align(num_frames_for_chunk)
-            # --- Submissão do Job para o Chunk Atual ---
-            chunk_latents = ltx_aduc_manager.submit_job(
-                job_type='ltx',
-                job_func=_job_generate_latent_chunk,
-                # Passa todos os argumentos necessários para a função de trabalho
-                prompt=chunk_prompt,
-                negative_prompt="blurry, low quality, bad anatomy, deformed", # Pode ser parametrizado
-                height=height,
-                width=width,
-                num_frames=num_frames_for_chunk,
-                seed=used_seed + i, # Semente diferente para cada chunk para variedade
-                conditioning_items=current_conditions,
-                ltx_configs=ltx_configs or {}
-            )
             if chunk_latents is None:
-                logging.error(f"Failed to generate latents for scene {i+1}. Aborting generation.")
                 return None, used_seed
-            # --- Gerenciamento do "Eco Cinético" (Sobreposição) ---
             if i < num_chunks - 1:
-                # Salva os últimos frames do chunk atual para condicionar o próximo
                 overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
                 overlap_condition_item = LatentConditioningItem(
-                    latent_tensor=overlap_latents,
-                    media_frame_number=0, # Sempre condiciona o início do próximo chunk
-                    conditioning_strength=1.0 # Condicionamento forte
-                )
-                # Adiciona o chunk atual sem a sobreposição
                 final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
             else:
-                # Adiciona o último chunk completo
                 final_latents_list.append(chunk_latents)
-        # Concatena todos os chunks de latentes em um único tensor
         final_latents = torch.cat(final_latents_list, dim=2)
         logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
         return final_latents, used_seed
 # --- INSTÂNCIA SINGLETON DO CLIENTE ---
-try:
-    ltx_aduc_pipeline = LtxAducPipeline()
-except Exception as e:
-    logging.critical("CRITICAL: Failed to initialize the LtxAducPipeline client.", exc_info=True)
-    ltx_aduc_pipeline = None

 # FILE: api/ltx/ltx_aduc_pipeline.py
+# DESCRIPTION: A unified high-level client for submitting ALL LTX-related jobs (generation and VAE)
+# to the LTXAducManager pool.
 import logging
 import time
 import torch
 import random
+from typing import List, Optional, Tuple, Dict
+from PIL import Image
+from dataclasses import dataclass
+from pathlib import Path
+import sys
+from api.ltx.ltx_utils import load_image_to_tensor_with_resize_and_crop # Importa o helper de ltx_utils
+# O cliente importa o MANAGER para submeter todos os trabalhos.
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
+# Adiciona o path do LTX-Video para importações de baixo nível e tipos.
+LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
+def add_deps_to_path():
+    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+    if repo_path not in sys.path:
+        sys.path.insert(0, repo_path)
+add_deps_to_path()
+from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
+from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
+# ==============================================================================
+# --- DEFINIÇÕES DE ESTRUTURA ---
+# ==============================================================================
+@dataclass
+class LatentConditioningItem:
+    """Estrutura de dados para passar latentes condicionados ao job de geração."""
+    latent_tensor: torch.Tensor
+    media_frame_number: int
+    conditioning_strength: float
 # ==============================================================================
 # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
 # ==============================================================================
+def _job_encode_media(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, pixel_tensor: torch.Tensor) -> torch.Tensor:
+    """Job que usa o VAE do pipeline para codificar um tensor de pixel."""
+    vae = pipeline.vae
+    pixel_tensor_gpu = pixel_tensor.to(vae.device, dtype=vae.dtype)
+    latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
+    return latents.cpu()
+def _job_decode_latent(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, latent_tensor: torch.Tensor) -> torch.Tensor:
+    """Job que usa o VAE do pipeline para decodificar um tensor latente."""
+    vae = pipeline.vae
+    latent_tensor_gpu = latent_tensor.to(vae.device, dtype=vae.dtype)
+    pixels = vae_decode(latent_tensor_gpu, vae, is_video=True, vae_per_channel_normalize=True)
+    return pixels.cpu()
+def _job_generate_latent_chunk(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, **kwargs) -> torch.Tensor:
+    """Job que usa o pipeline principal para gerar um chunk de vídeo latente."""
+    generator = torch.Generator(device=pipeline.device).manual_seed(kwargs['seed'])
+    pipeline_kwargs = {"generator": generator, "output_type": "latent", **kwargs}
+    with torch.autocast(device_type=pipeline.device.type, dtype=autocast_dtype):
+        latents_raw = pipeline(**pipeline_kwargs).images
     return latents_raw.cpu()
 # ==============================================================================
+# --- A CLASSE CLIENTE UNIFICADA ---
 # ==============================================================================
 class LtxAducPipeline:
     """
+    Cliente unificado para orquestrar todas as tarefas LTX, incluindo geração e VAE.
     """
     def __init__(self):
+        logging.info("✅ Unified LTX/VAE ADUC Pipeline (Client) initialized.")
         self.FRAMES_ALIGNMENT = 8
     def _get_random_seed(self) -> int:
         return random.randint(0, 2**32 - 1)
     def _align(self, dim: int, alignment: int = 8) -> int:
         return ((dim + alignment - 1) // alignment) * alignment
+    # --- Métodos de API para o Orquestrador ---
+    def encode_to_conditioning_items(self, media_list: List, params: List, resolution: Tuple[int, int]) -> List[LatentConditioningItem]:
+        """Converte uma lista de imagens em uma lista de LatentConditioningItem."""
+        pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, resolution[0], resolution[1]) for m in media_list]
+        items = []
+        for i, pt in enumerate(pixel_tensors):
+            latent_tensor = ltx_aduc_manager.submit_job(_job_encode_media, pixel_tensor=pt)
+            frame_number, strength = params[i]
+            items.append(LatentConditioningItem(
+                latent_tensor=latent_tensor,
+                media_frame_number=frame_number,
+                conditioning_strength=strength
+            ))
+        return items
+    def decode_to_pixels(self, latent_tensor: torch.Tensor) -> torch.Tensor:
+        """Decodifica um tensor latente em um tensor de pixels."""
+        return ltx_aduc_manager.submit_job(_job_decode_latent, latent_tensor=latent_tensor)
+    def generate_latents(
         self,
         prompt_list: List[str],
+        duration_in_seconds: float,
+        common_ltx_args: Dict,
+        initial_conditioning_items: Optional[List[LatentConditioningItem]] = None
     ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
+        """Gera um vídeo latente completo a partir de uma lista de prompts."""
         t0 = time.time()
+        logging.info(f"LTX Client received a generation job for {len(prompt_list)} scenes.")
         used_seed = self._get_random_seed()
         num_chunks = len(prompt_list)
         total_frames = self._align(int(duration_in_seconds * 24))
+        frames_per_chunk_base = total_frames // num_chunks if num_chunks > 0 else total_frames
         overlap_frames = self._align(9) if num_chunks > 1 else 0
         final_latents_list = []
+        overlap_condition_item = None
         for i, chunk_prompt in enumerate(prompt_list):
             current_conditions = []
             if overlap_condition_item:
                 current_conditions.append(overlap_condition_item)
             num_frames_for_chunk = frames_per_chunk_base
+            if i == num_chunks - 1:
                 processed_frames = sum(f.shape[2] for f in final_latents_list)
                 num_frames_for_chunk = total_frames - processed_frames
             num_frames_for_chunk = self._align(num_frames_for_chunk)
+            if num_frames_for_chunk <= 0: continue
+            job_specific_args = {
+                "prompt": chunk_prompt,
+                "num_frames": num_frames_for_chunk,
+                "seed": used_seed + i,
+                "conditioning_items": current_conditions
+            }
+            final_job_args = {**common_ltx_args, **job_specific_args}
+            chunk_latents = ltx_aduc_manager.submit_job(_job_generate_latent_chunk, **final_job_args)
             if chunk_latents is None:
+                logging.error(f"Failed to generate latents for scene {i+1}. Aborting.")
                 return None, used_seed
             if i < num_chunks - 1:
                 overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
                 overlap_condition_item = LatentConditioningItem(
+                    latent_tensor=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
                 final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
             else:
                 final_latents_list.append(chunk_latents)
+        if not final_latents_list:
+            logging.warning("No latent chunks were generated.")
+            return None, used_seed
         final_latents = torch.cat(final_latents_list, dim=2)
         logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
         return final_latents, used_seed
 # --- INSTÂNCIA SINGLETON DO CLIENTE ---
+ltx_aduc_pipeline = LtxAducPipeline()

api/ltx/ltx_utils.py CHANGED Viewed

@@ -1,165 +1,263 @@
 # FILE: api/ltx/ltx_utils.py
-# DESCRIPTION: Comprehensive, self-contained utility module for the LTX pipeline.
-# Handles dependency path injection, model loading, pipeline creation, and tensor preparation.
 import os
 import random
 import json
 import logging
-import time
 import sys
 from pathlib import Path
-from typing import Dict, Optional, Tuple, Union
-from huggingface_hub import hf_hub_download
-import numpy as np
-import torch
 import torchvision.transforms.functional as TVF
 from PIL import Image
 from safetensors import safe_open
 from transformers import T5EncoderModel, T5Tokenizer
 # ==============================================================================
-# --- CRITICAL: DEPENDENCY PATH INJECTION ---
 # ==============================================================================
-# Define o caminho para o repositório clonado
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
-LTX_REPO_ID = "Lightricks/LTX-Video"
-CACHE_DIR = os.environ.get("HF_HOME")
 def add_deps_to_path():
-    """
-    Adiciona o diretório do repositório LTX ao sys.path para garantir que suas
-    bibliotecas possam ser importadas.
-    """
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
         logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
-# Executa a função imediatamente para configurar o ambiente antes de qualquer importação.
 add_deps_to_path()
-# ==============================================================================
-# --- IMPORTAÇÕES DA BIBLIOTECA LTX-VIDEO (Após configuração do path) ---
-# ==============================================================================
 try:
     from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
-    from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
     from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
     from ltx_video.models.transformers.transformer3d import Transformer3DModel
     from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
     from ltx_video.schedulers.rf import RectifiedFlowScheduler
-    import ltx_video.pipelines.crf_compressor as crf_compressor
 except ImportError as e:
-    raise ImportError(f"Could not import from LTX-Video library even after setting sys.path. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
 # ==============================================================================
-# --- FUNÇÕES DE CONSTRUÇÃO DE MODELO E PIPELINE ---
 # ==============================================================================
-def create_latent_upsampler(latent_upsampler_model_path: str, device: str) -> LatentUpsampler:
-    """Loads the Latent Upsampler model from a checkpoint path."""
-    logging.info(f"Loading Latent Upsampler from: {latent_upsampler_model_path} to device: {device}")
-    latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
-    latent_upsampler.to(device)
-    latent_upsampler.eval()
-    return latent_upsampler
-def build_ltx_pipeline_on_cpu(config: Dict) -> Tuple[LTXVideoPipeline, Optional[torch.nn.Module]]:
-    """Builds the complete LTX pipeline and upsampler on the CPU."""
-    t0 = time.perf_counter()
-    logging.info("Building LTX pipeline on CPU...")
-    ckpt_path_str = hf_hub_download(repo_id=LTX_REPO_ID, filename=config["checkpoint_path"], cache_dir=CACHE_DIR)
-    ckpt_path = Path(ckpt_path_str)
-    if not ckpt_path.is_file():
-        raise FileNotFoundError(f"Main checkpoint file not found: {ckpt_path}")
-    logging.info(f"Building LTX pipeline ckpt:{ckpt_path_str}")
-    with safe_open(ckpt_path, framework="pt") as f:
         metadata = f.metadata() or {}
         config_str = metadata.get("config", "{}")
-        configs = json.loads(config_str)
-        allowed_inference_steps = configs.get("allowed_inference_steps")
-    vae = CausalVideoAutoencoder.from_pretrained(ckpt_path).to("cpu")
-    transformer = Transformer3DModel.from_pretrained(ckpt_path).to("cpu")
-    scheduler = RectifiedFlowScheduler.from_pretrained(ckpt_path)
-    text_encoder_path = config["text_encoder_model_name_or_path"]
-    text_encoder = T5EncoderModel.from_pretrained(text_encoder_path, subfolder="text_encoder").to("cpu")
-    tokenizer = T5Tokenizer.from_pretrained(text_encoder_path, subfolder="tokenizer")
     patchifier = SymmetricPatchifier(patch_size=1)
-    precision = config.get("precision", "bfloat16")
     if precision == "bfloat16":
-        vae.to(torch.bfloat16)
-        transformer.to(torch.bfloat16)
         text_encoder.to(torch.bfloat16)
     pipeline = LTXVideoPipeline(
-        transformer=transformer, patchifier=patchifier, text_encoder=text_encoder,
-        tokenizer=tokenizer, scheduler=scheduler, vae=vae,
         allowed_inference_steps=allowed_inference_steps,
-        prompt_enhancer_image_caption_model=None, prompt_enhancer_image_caption_processor=None,
-        prompt_enhancer_llm_model=None, prompt_enhancer_llm_tokenizer=None,
     )
-    vae = CausalVideoAutoencoder.from_pretrained(ckpt_path).to("cpu")
-    if precision == "bfloat16":
-        vae.to(torch.bfloat16)
-    latent_upsampler = None
-    if config.get("spatial_upscaler_model_path"):
-        spatial_path = config["spatial_upscaler_model_path"]
-        spatial_path_str = hf_hub_download(repo_id=LTX_REPO_ID, filename=config["spatial_upscaler_model_path"], cache_dir=CACHE_DIR)
-        spatial_path = Path(spatial_path_str)
-        if not spatial_path.is_file():
-            raise FileNotFoundError(f"Main checkpoint upscaler file not found: {spatial_path_str}")
-        logging.info(f"Building UPSCALER pipeline ckpt:{spatial_path_str}")
-        latent_upsampler = create_latent_upsampler(spatial_path, device="cpu")
-        if precision == "bfloat16":
-            latent_upsampler.to(torch.bfloat16)
-    logging.info(f"LTX pipeline built on CPU in {time.perf_counter() - t0:.2f}s")
-    return pipeline, latent_upsampler, vae
 # ==============================================================================
-# --- FUNÇÕES AUXILIARES (Seed, Preparação de Imagem) ---
 # ==============================================================================
 def seed_everything(seed: int):
-    """Sets the seed for reproducibility."""
     random.seed(seed)
     os.environ['PYTHONHASHSEED'] = str(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
 def load_image_to_tensor_with_resize_and_crop(
     image_input: Union[str, Image.Image],
     target_height: int,
     target_width: int,
 ) -> torch.Tensor:
-    """Loads and processes an image into a 5D pixel tensor compatible with the LTX pipeline."""
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
-        image = image_input
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
@@ -169,22 +267,38 @@ def load_image_to_tensor_with_resize_and_crop(
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
-        x_start, y_start = (input_width - new_width) // 2, 0
     else:
-        new_width, new_height = input_width, int(input_width / aspect_ratio_target)
-        x_start, y_start = 0, (input_height - new_height) // 2
-    image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
-    frame_tensor = TVF.to_tensor(image)  # PIL -> tensor (C, H, W) in [0, 1] range
-    frame_tensor = TVF.gaussian_blur(frame_tensor, kernel_size=(3, 3))
-    frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
-    frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
-    frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
-    # Normalize to [-1, 1] range, which the VAE expects for encoding
-    frame_tensor = (frame_tensor * 2.0) - 1.0
-    # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
-    return frame_tensor.unsqueeze(0).unsqueeze(2)

 # FILE: api/ltx/ltx_utils.py
+# DESCRIPTION: A pure utility library for the LTX ecosystem.
+# Contains the official low-level builder function for the complete pipeline
+# and other stateless helper functions.
 import os
 import random
 import json
 import logging
 import sys
 from pathlib import Path
+from typing import Dict, Tuple, Union
 import torchvision.transforms.functional as TVF
 from PIL import Image
+import torch
 from safetensors import safe_open
 from transformers import T5EncoderModel, T5Tokenizer
 # ==============================================================================
+# --- CONFIGURAÇÃO DE PATH E IMPORTS DA BIBLIOTECA LTX ---
 # ==============================================================================
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 def add_deps_to_path():
+    """Adiciona o diretório do repositório LTX ao sys.path para importação de suas bibliotecas."""
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
         logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
 add_deps_to_path()
 try:
     from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
     from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
     from ltx_video.models.transformers.transformer3d import Transformer3DModel
     from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
     from ltx_video.schedulers.rf import RectifiedFlowScheduler
 except ImportError as e:
+    logging.critical("Failed to import a core LTX-Video library component.", exc_info=True)
+    raise ImportError(f"Could not import from LTX-Video library. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
 # ==============================================================================
+# --- FUNÇÃO HELPER 'create_transformer' (Essencial) ---
 # ==============================================================================
+def create_transformer(ckpt_path: str, precision: str) -> Transformer3DModel:
+    """
+    Cria e carrega o modelo Transformer3D com a lógica de precisão correta,
+    incluindo suporte para a otimização float8_e4m3fn.
+    """
+    if precision == "float8_e4m3fn":
+        try:
+            from q8_kernels.integration.patch_transformer import patch_diffusers_transformer as patch_transformer_for_q8_kernels
+            transformer = Transformer3DModel.from_pretrained(ckpt_path, dtype=torch.float8_e4m3fn)
+            patch_transformer_for_q8_kernels(transformer)
+            return transformer
+        except ImportError:
+            raise ValueError("Q8-Kernels not found. To use FP8 checkpoint, please install Q8 kernels from the project's wheels.")
+    elif precision == "bfloat16":
+        return Transformer3DModel.from_pretrained(ckpt_path).to(torch.bfloat16)
+    else:
+        return Transformer3DModel.from_pretrained(ckpt_path)
+# ==============================================================================
+# --- BUILDER DE BAIXO NÍVEL OFICIAL ---
+# ==============================================================================
+def build_complete_pipeline_on_cpu(checkpoint_path: str, config: Dict) -> LTXVideoPipeline:
+    """
+    Constrói o pipeline LTX COMPLETO, incluindo o VAE, e o mantém na CPU.
+    Esta é a função de construção fundamental usada pelo LTXAducManager.
+    """
+    logging.info(f"Building complete LTX pipeline from checkpoint: {Path(checkpoint_path).name}")
+    with safe_open(checkpoint_path, framework="pt") as f:
         metadata = f.metadata() or {}
         config_str = metadata.get("config", "{}")
+        allowed_inference_steps = json.loads(config_str).get("allowed_inference_steps")
+    precision = config.get("precision", "bfloat16")
+    # Usa a função helper correta para criar o transformer
+    transformer = create_transformer(checkpoint_path, precision).to("cpu")
+    scheduler = RectifiedFlowScheduler.from_pretrained(checkpoint_path)
+    text_encoder = T5EncoderModel.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="text_encoder").to("cpu")
+    tokenizer = T5Tokenizer.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="tokenizer")
     patchifier = SymmetricPatchifier(patch_size=1)
+    vae = CausalVideoAutoencoder.from_pretrained(checkpoint_path).to("cpu")
     if precision == "bfloat16":
         text_encoder.to(torch.bfloat16)
+        vae.to(torch.bfloat16)
+        # O transformer já foi convertido para bfloat16 dentro de create_transformer, se aplicável
     pipeline = LTXVideoPipeline(
+        transformer=transformer,
+        patchifier=patchifier,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        scheduler=scheduler,
+        vae=vae, # VAE é incluído para que o pipeline possa ser auto-suficiente
         allowed_inference_steps=allowed_inference_steps,
+        prompt_enhancer_image_caption_model=None,
+        prompt_enhancer_image_caption_processor=None,
+        prompt_enhancer_llm_model=None,
+        prompt_enhancer_llm_tokenizer=None,
     )
+    return pipeline
+# ==============================================================================
+# --- FUNÇÕES AUXILIARES GENÉRICAS ---
+# ==============================================================================
+# # FILE: api/ltx/ltx_utils.py
+# DESCRIPTION: A pure utility library for the LTX ecosystem.
+# Contains the official low-level builder function for the complete pipeline
+# and other stateless helper functions.
+import os
+import random
+import json
+import logging
+import sys
+from pathlib import Path
+from typing import Dict, Tuple
+import torch
+from safetensors import safe_open
+from transformers import T5EncoderModel, T5Tokenizer
 # ==============================================================================
+# --- CONFIGURAÇÃO DE PATH E IMPORTS DA BIBLIOTECA LTX ---
+# ==============================================================================
+LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
+def add_deps_to_path():
+    """Adiciona o diretório do repositório LTX ao sys.path para importação de suas bibliotecas."""
+    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+    if repo_path not in sys.path:
+        sys.path.insert(0, repo_path)
+        logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
+add_deps_to_path()
+try:
+    from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
+    from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
+    from ltx_video.models.transformers.transformer3d import Transformer3DModel
+    from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
+    from ltx_video.schedulers.rf import RectifiedFlowScheduler
+except ImportError as e:
+    logging.critical("Failed to import a core LTX-Video library component.", exc_info=True)
+    raise ImportError(f"Could not import from LTX-Video library. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
+# ==============================================================================
+# --- FUNÇÃO HELPER 'create_transformer' (Essencial) ---
+# ==============================================================================
+def create_transformer(ckpt_path: str, precision: str) -> Transformer3DModel:
+    """
+    Cria e carrega o modelo Transformer3D com a lógica de precisão correta,
+    incluindo suporte para a otimização float8_e4m3fn.
+    """
+    if precision == "float8_e4m3fn":
+        try:
+            from q8_kernels.integration.patch_transformer import patch_diffusers_transformer as patch_transformer_for_q8_kernels
+            transformer = Transformer3DModel.from_pretrained(ckpt_path, dtype=torch.float8_e4m3fn)
+            patch_transformer_for_q8_kernels(transformer)
+            return transformer
+        except ImportError:
+            raise ValueError("Q8-Kernels not found. To use FP8 checkpoint, please install Q8 kernels from the project's wheels.")
+    elif precision == "bfloat16":
+        return Transformer3DModel.from_pretrained(ckpt_path).to(torch.bfloat16)
+    else:
+        return Transformer3DModel.from_pretrained(ckpt_path)
+# ==============================================================================
+# --- BUILDER DE BAIXO NÍVEL OFICIAL ---
+# ==============================================================================
+def build_complete_pipeline_on_cpu(checkpoint_path: str, config: Dict) -> LTXVideoPipeline:
+    """
+    Constrói o pipeline LTX COMPLETO, incluindo o VAE, e o mantém na CPU.
+    Esta é a função de construção fundamental usada pelo LTXAducManager.
+    """
+    logging.info(f"Building complete LTX pipeline from checkpoint: {Path(checkpoint_path).name}")
+    with safe_open(checkpoint_path, framework="pt") as f:
+        metadata = f.metadata() or {}
+        config_str = metadata.get("config", "{}")
+        allowed_inference_steps = json.loads(config_str).get("allowed_inference_steps")
+    precision = config.get("precision", "bfloat16")
+    # Usa a função helper correta para criar o transformer
+    transformer = create_transformer(checkpoint_path, precision).to("cpu")
+    scheduler = RectifiedFlowScheduler.from_pretrained(checkpoint_path)
+    text_encoder = T5EncoderModel.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="text_encoder").to("cpu")
+    tokenizer = T5Tokenizer.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="tokenizer")
+    patchifier = SymmetricPatchifier(patch_size=1)
+    vae = CausalVideoAutoencoder.from_pretrained(checkpoint_path).to("cpu")
+    if precision == "bfloat16":
+        text_encoder.to(torch.bfloat16)
+        vae.to(torch.bfloat16)
+        # O transformer já foi convertido para bfloat16 dentro de create_transformer, se aplicável
+    pipeline = LTXVideoPipeline(
+        transformer=transformer,
+        patchifier=patchifier,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        scheduler=scheduler,
+        vae=vae, # VAE é incluído para que o pipeline possa ser auto-suficiente
+        allowed_inference_steps=allowed_inference_steps,
+        prompt_enhancer_image_caption_model=None,
+        prompt_enhancer_image_caption_processor=None,
+        prompt_enhancer_llm_model=None,
+        prompt_enhancer_llm_tokenizer=None,
+    )
+    return pipeline
+# ==============================================================================
+# --- FUNÇÕES AUXILIARES GENÉRICAS ---
 # ==============================================================================
 def seed_everything(seed: int):
+    """
+    Define a semente para PyTorch, NumPy e Python para garantir reprodutibilidade.
+    """
     random.seed(seed)
     os.environ['PYTHONHASHSEED'] = str(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = Fals
 def load_image_to_tensor_with_resize_and_crop(
     image_input: Union[str, Image.Image],
     target_height: int,
     target_width: int,
 ) -> torch.Tensor:
+    """
+    Carrega, redimensiona, corta e processa uma imagem para um tensor de pixel 5D,
+    normalizado para [-1, 1], pronto para ser enviado ao VAE para encoding.
+    """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
+        image = image_input.convert("RGB")
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
+        x_start = (input_width - new_width) // 2
+        image = image.crop((x_start, 0, x_start + new_width, new_height))
     else:
+        new_height = int(input_width / aspect_ratio_target)
+        y_start = (input_height - new_height) // 2
+        image = image.crop((0, y_start, input_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
+    frame_tensor = TVF.to_tensor(image)
+    # Esta parte depende de 'crf_compressor', então precisamos importá-lo aqui também
+    try:
+        from ltx_video.pipelines import crf_compressor
+        frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
+        frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
+        frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
+    except ImportError:
+        logging.warning("CRF Compressor not found. Skipping compression step.")
+    frame_tensor = (frame_tensor * 2.0) - 1.0
+    return frame_tensor.unsqueeze(0).unsqueeze(2)
+def seed_everything(seed: int):
+    """
+    Define a semente para PyTorch, NumPy e Python para garantir reprodutibilidade.
+    """
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False

api/ltx/vae_aduc_pipeline.py CHANGED Viewed

@@ -5,36 +5,39 @@
 import logging
 import time
 import torch
 import torchvision.transforms.functional as TVF
 from PIL import Image
-from typing import List, Union, Tuple, Literal
 from dataclasses import dataclass
-import os
-import subprocess
-import sys
 from pathlib import Path
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
-DEPS_DIR = Path("/data")
-LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
-repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
-    sys.path.insert(0, repo_path)
-    print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
-    from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
-    from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
-    import ltx_video.pipelines.crf_compressor as crf_compressor
 # ==============================================================================
-# --- DEFINIÇÕES DE ESTRUTURA E HELPERS (Importadas ou movidas para cá) ---
 # ==============================================================================
 @dataclass
 class LatentConditioningItem:
     """
     Estrutura de dados para passar latentes condicionados entre serviços.
-    O tensor latente é mantido na CPU para economizar VRAM.
     """
     latent_tensor: torch.Tensor
     media_frame_number: int
@@ -47,32 +50,32 @@ def load_image_to_tensor_with_resize_and_crop(
 ) -> torch.Tensor:
     """
     Carrega e processa uma imagem para um tensor de pixel 5D, normalizado para [-1, 1],
-    pronto para ser enviado ao VAE.
     """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
-        image = image_input
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
     input_width, input_height = image.size
     aspect_ratio_target = target_width / target_height
     aspect_ratio_frame = input_width / input_height
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
-        x_start, y_start = (input_width - new_width) // 2, 0
     else:
-        new_width, new_height = input_width, int(input_width / aspect_ratio_target)
-        x_start, y_start = 0, (input_height - new_height) // 2
-    image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
-    frame_tensor = TVF.to_tensor(image)
-    frame_tensor = TVF.gaussian_blur(frame_tensor, kernel_size=(3, 3))
     frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
     frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
     frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
@@ -80,21 +83,20 @@ def load_image_to_tensor_with_resize_and_crop(
     frame_tensor = (frame_tensor * 2.0) - 1.0
     return frame_tensor.unsqueeze(0).unsqueeze(2)
 # ==============================================================================
-# --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool) ---
 # ==============================================================================
 def _job_encode_media(vae: CausalVideoAutoencoder, pixel_tensor: torch.Tensor) -> torch.Tensor:
-    """Função de trabalho genérica para codificar um tensor de pixel."""
     device = vae.device
     dtype = vae.dtype
     pixel_tensor_gpu = pixel_tensor.to(device, dtype=dtype)
     latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
     return latents.cpu()
-def _job_decode_latent_to_pixels(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor) -> torch.Tensor:
-    """Função de trabalho para decodificar um tensor latente."""
     device = vae.device
     dtype = vae.dtype
     latent_tensor_gpu = latent_tensor.to(device, dtype=dtype)
@@ -106,14 +108,17 @@ def _job_decode_latent_to_pixels(vae: CausalVideoAutoencoder, latent_tensor: tor
 # ==============================================================================
 class VaeAducPipeline:
-    """Cliente de alto nível para orquestrar todas as tarefas de VAE."""
     def __init__(self):
         logging.info("✅ VAE ADUC Pipeline (Client) initialized and ready to submit jobs.")
         pass
     def __call__(
         self,
-        media: Union[torch.Tensor, List[Union[Image.Image, torch.Tensor]]],
         task: Literal['encode', 'decode', 'create_conditioning_items'],
         target_resolution: Optional[Tuple[int, int]] = (512, 512),
         conditioning_params: Optional[List[Tuple[int, float]]] = None
@@ -126,7 +131,7 @@ class VaeAducPipeline:
             task: A tarefa a executar ('encode', 'decode', 'create_conditioning_items').
             target_resolution: A resolução (altura, largura) para o pré-processamento.
             conditioning_params: Para 'create_conditioning_items', uma lista de tuplas
-                                 (frame_number, strength) correspondente a cada item de mídia.
         Returns:
             O resultado da tarefa, sempre na CPU.
@@ -137,16 +142,13 @@ class VaeAducPipeline:
         if task == 'encode':
             if not isinstance(media, list): media = [media]
             pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, target_resolution[0], target_resolution[1]) for m in media]
-            results = []
-            for pt in pixel_tensors:
-                latent = ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_encode_media, pixel_tensor=pt)
-                results.append(latent)
             return results
         elif task == 'decode':
             if not isinstance(media, torch.Tensor):
-                raise TypeError("Para 'decode', 'media' deve ser um único tensor latente.")
-            return ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_decode_latent_to_pixels, latent_tensor=media)
         elif task == 'create_conditioning_items':
             if not isinstance(media, list) or not isinstance(conditioning_params, list) or len(media) != len(conditioning_params):

 import logging
 import time
 import torch
+import os
 import torchvision.transforms.functional as TVF
 from PIL import Image
+from typing import List, Union, Tuple, Literal, Optional
 from dataclasses import dataclass
 from pathlib import Path
+import sys
+# O cliente importa o MANAGER para submeter os trabalhos ao pool de workers.
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
+# --- Adiciona o path do LTX-Video para importações de baixo nível ---
+LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
+def add_deps_to_path():
+    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+    if repo_path not in sys.path:
+        sys.path.insert(0, repo_path)
+add_deps_to_path()
+# Importações para anotação de tipos e para as funções de trabalho (jobs).
+from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
+from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
+import ltx_video.pipelines.crf_compressor as crf_compressor
 # ==============================================================================
+# --- DEFINIÇÕES DE ESTRUTURA E HELPERS ---
 # ==============================================================================
 @dataclass
 class LatentConditioningItem:
     """
     Estrutura de dados para passar latentes condicionados entre serviços.
+    O tensor latente é mantido na CPU para economizar VRAM entre as etapas.
     """
     latent_tensor: torch.Tensor
     media_frame_number: int
 ) -> torch.Tensor:
     """
     Carrega e processa uma imagem para um tensor de pixel 5D, normalizado para [-1, 1],
+    pronto para ser enviado ao VAE para encoding.
     """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
+        image = image_input.convert("RGB")
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
+    # Lógica de corte e redimensionamento para manter a proporção
     input_width, input_height = image.size
     aspect_ratio_target = target_width / target_height
     aspect_ratio_frame = input_width / input_height
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
+        x_start = (input_width - new_width) // 2
+        image = image.crop((x_start, 0, x_start + new_width, new_height))
     else:
+        new_height = int(input_width / aspect_ratio_target)
+        y_start = (input_height - new_height) // 2
+        image = image.crop((0, y_start, input_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
+    # Conversão para tensor e normalização
+    frame_tensor = TVF.to_tensor(image)
     frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
     frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
     frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
     frame_tensor = (frame_tensor * 2.0) - 1.0
     return frame_tensor.unsqueeze(0).unsqueeze(2)
 # ==============================================================================
+# --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool de VAE) ---
 # ==============================================================================
 def _job_encode_media(vae: CausalVideoAutoencoder, pixel_tensor: torch.Tensor) -> torch.Tensor:
+    """Job que codifica um tensor de pixel em um tensor latente."""
     device = vae.device
     dtype = vae.dtype
     pixel_tensor_gpu = pixel_tensor.to(device, dtype=dtype)
     latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
     return latents.cpu()
+def _job_decode_latent(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor) -> torch.Tensor:
+    """Job que decodifica um tensor latente em um tensor de pixels."""
     device = vae.device
     dtype = vae.dtype
     latent_tensor_gpu = latent_tensor.to(device, dtype=dtype)
 # ==============================================================================
 class VaeAducPipeline:
+    """
+    Cliente de alto nível para orquestrar todas as tarefas relacionadas ao VAE.
+    Ele define a lógica de negócios e submete os trabalhos ao LTXAducManager.
+    """
     def __init__(self):
         logging.info("✅ VAE ADUC Pipeline (Client) initialized and ready to submit jobs.")
         pass
     def __call__(
         self,
+        media: Union[torch.Tensor, List[Union[Image.Image, str]]],
         task: Literal['encode', 'decode', 'create_conditioning_items'],
         target_resolution: Optional[Tuple[int, int]] = (512, 512),
         conditioning_params: Optional[List[Tuple[int, float]]] = None
             task: A tarefa a executar ('encode', 'decode', 'create_conditioning_items').
             target_resolution: A resolução (altura, largura) para o pré-processamento.
             conditioning_params: Para 'create_conditioning_items', uma lista de tuplas
+                                 (frame_number, strength) para cada item de mídia.
         Returns:
             O resultado da tarefa, sempre na CPU.
         if task == 'encode':
             if not isinstance(media, list): media = [media]
             pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, target_resolution[0], target_resolution[1]) for m in media]
+            results = [ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_encode_media, pixel_tensor=pt) for pt in pixel_tensors]
             return results
         elif task == 'decode':
             if not isinstance(media, torch.Tensor):
+                raise TypeError("Para a tarefa 'decode', 'media' deve ser um único tensor latente.")
+            return ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_decode_latent, latent_tensor=media)
         elif task == 'create_conditioning_items':
             if not isinstance(media, list) or not isinstance(conditioning_params, list) or len(media) != len(conditioning_params):