Test

Paused

App Files Files Community

eeuuia commited on Oct 12

Commit

e13ea4b

verified ·

1 Parent(s): 8569f9a

Upload 5 files

Browse files

Files changed (5) hide show

api/ltx/ltx_aduc_manager.py +148 -93
api/ltx/ltx_aduc_orchestrator.py +58 -61
api/ltx/ltx_aduc_pipeline.py +128 -101
api/ltx/ltx_utils.py +102 -216
api/ltx/vae_aduc_pipeline.py +40 -42

api/ltx/ltx_aduc_manager.py CHANGED Viewed

@@ -1,7 +1,6 @@
 # FILE: api/ltx/ltx_aduc_manager.py
-# DESCRIPTION: A simplified, robust pool manager for a unified LTX worker.
-# This worker handles all tasks, including Transformer generation and VAE operations,
-# while still respecting the GPU separation defined by the GPUManager.
 import logging
 import torch
@@ -10,14 +9,11 @@ from pathlib import Path
 import threading
 import queue
 import time
-import yaml
-import os
-from huggingface_hub import hf_hub_download
-from typing import List, Optional, Callable, Any, Tuple, Dict
-# --- Importa o gerenciador de GPUs e o builder de baixo nível ---
 from managers.gpu_manager import gpu_manager
-from api.ltx.ltx_utils import build_complete_pipeline_on_cpu, create_transformer
 # --- Adiciona o path do LTX-Video para importação de tipos ---
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
@@ -28,138 +24,168 @@ def add_deps_to_path():
 add_deps_to_path()
 from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
 # ==============================================================================
-# --- FUNÇÃO DE ORQUESTRAÇÃO DA CONSTRUÇÃO (Interna ao Manager) ---
 # ==============================================================================
-def get_complete_pipeline() -> LTXVideoPipeline:
-    """
-    Orquestra a construção do pipeline LTX COMPLETO, incluindo o VAE, na CPU.
-    """
-    config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
-    with open(config_path, "r") as file:
-        config = yaml.safe_load(file)
-    ckpt_path = hf_hub_download(
-        repo_id="Lightricks/LTX-Video",
-        filename=config["checkpoint_path"],
-        cache_dir=os.environ.get("HF_HOME")
-    )
-    return build_complete_pipeline_on_cpu(ckpt_path, config)
-# ==============================================================================
-# --- CLASSE DE WORKER UNIFICADO ---
-# ==============================================================================
-class LTXWorker(threading.Thread):
-    """
-    Um worker unificado que gerencia uma instância completa do pipeline LTX.
-    Ele carrega o modelo e distribui seus componentes (Transformer/VAE) para as GPUs corretas.
-    """
-    def __init__(self, worker_id: int):
         super().__init__()
         self.worker_id = worker_id
-        self.pipeline: Optional[LTXVideoPipeline] = None
         self.is_healthy = False
         self.is_busy = False
-        self.daemon = True
-        self.autocast_dtype: torch.dtype = torch.float32
     def run(self):
-        """Inicializa o worker: carrega o pipeline e o move para as GPUs."""
         try:
-            self.pipeline = get_complete_pipeline()
-            self._set_precision_policy()
-            main_device = gpu_manager.get_ltx_device()
-            vae_device = gpu_manager.get_ltx_vae_device()
-            logging.info(f"[LTXWorker-{self.worker_id}] Moving components -> Main: {main_device}, VAE: {vae_device}")
-            self.pipeline.to(main_device)      # Move tudo para a GPU principal primeiro
-            self.pipeline.vae.to(vae_device)   # Move especificamente o VAE para sua GPU dedicada
             self.is_healthy = True
-            logging.info(f"✅ LTXWorker {self.worker_id} is healthy. Main on {main_device}, VAE on {vae_device}.")
         except Exception:
             self.is_healthy = False
-            logging.error(f"❌ LTXWorker {self.worker_id} FAILED to initialize!", exc_info=True)
-    def _set_precision_policy(self):
-        """Define a política de precisão para operações de autocast."""
         try:
-            config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
-            with open(config_path, "r") as file: config = yaml.safe_load(file)
-            precision = str(config.get("precision", "bfloat16")).lower()
-            if precision in ["float8_e4m3fn", "bfloat16"]: self.autocast_dtype = torch.bfloat16
-            elif precision == "mixed_precision": self.autocast_dtype = torch.float16
-        except Exception:
-            logging.warning(f"[LTXWorker-{self.worker_id}] Could not set precision policy, defaulting to float32.", exc_info=True)
     def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
         self.is_busy = True
         try:
-            # O job recebe o pipeline completo e o dtype para o autocast
-            result = job_func(self.pipeline, self.autocast_dtype, *args, **kwargs)
             return result
-        except Exception:
             self.is_healthy = False
             raise
         finally:
             self.is_busy = False
 # ==============================================================================
-# --- O GERENCIADOR DE POOL (SINGLETON) ---
 # ==============================================================================
 class LTXAducManager:
     _instance = None
     _initialized = False
     def __new__(cls, *args, **kwargs):
-        if cls._instance is None: cls._instance = super().__new__(cls)
         return cls._instance
     def __init__(self):
         if self._initialized: return
-        logging.info("🏭 Initializing Simplified Pool Manager for LTX...")
-        self.workers: List[LTXWorker] = []
-        self.job_queue = queue.Queue()
         self.pool_lock = threading.Lock()
         self._initialize_workers()
-        self.dispatcher = threading.Thread(target=self._dispatch_jobs, daemon=True)
         self.health_monitor = threading.Thread(target=self._health_check_loop, daemon=True)
-        self.dispatcher.start()
         self.health_monitor.start()
         self._initialized = True
-        logging.info("✅ Simplified Pool Manager is running.")
     def _initialize_workers(self):
-        with self.pool_lock:
-            # Por enquanto, criamos um único worker unificado.
-            # No futuro, este loop pode criar múltiplos workers se houver mais GPUs.
-            worker = LTXWorker(worker_id=0)
-            self.workers.append(worker)
-            worker.start()
-    def _get_available_worker(self) -> Optional[LTXWorker]:
         with self.pool_lock:
-            for worker in self.workers:
-                if worker.is_healthy and not worker.is_busy:
                     return worker
         return None
-    def _dispatch_jobs(self):
         while True:
-            job_func, args, kwargs, future = self.job_queue.get()
             worker = None
             while worker is None:
-                worker = self._get_available_worker()
-                if worker is None: time.sleep(0.1)
             try:
                 result = worker.execute(job_func, args, kwargs)
                 future.put(result)
@@ -167,22 +193,51 @@ class LTXAducManager:
                 future.put(e)
     def _health_check_loop(self):
         while True:
             time.sleep(30)
             with self.pool_lock:
-                for i, worker in enumerate(self.workers):
                     if not worker.is_alive() or not worker.is_healthy:
-                        logging.warning(f"LTX Worker {worker.worker_id} is UNHEALTHY. Restarting...")
-                        new_worker = LTXWorker(worker_id=worker.worker_id)
-                        self.workers[i] = new_worker
                         new_worker.start()
-    def submit_job(self, job_func: Callable, *args, **kwargs) -> Any:
-        future = queue.Queue(1)
-        self.job_queue.put((job_func, args, kwargs, future))
         result = future.get()
-        if isinstance(result, Exception): raise result
         return result
 # --- INSTANCIAÇÃO GLOBAL ---
-ltx_aduc_manager = LTXAducManager()

 # FILE: api/ltx/ltx_aduc_manager.py
+# DESCRIPTION: An advanced, fault-tolerant pool manager for LTX and VAE workers.
+# It handles job queuing, load balancing, and health monitoring for production-grade stability.
 import logging
 import torch
 import threading
 import queue
 import time
+from typing import List, Optional, Callable, Any, Tuple
+# Imports dos builders e do gpu_manager
+from api.ltx.ltx_utils import get_main_ltx_pipeline, get_main_vae
 from managers.gpu_manager import gpu_manager
 # --- Adiciona o path do LTX-Video para importação de tipos ---
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 add_deps_to_path()
 from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
+from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
 # ==============================================================================
+# --- CLASSES DE WORKER (Especialistas em Tarefas) ---
 # ==============================================================================
+class BaseWorker(threading.Thread):
+    """Classe base para nossos workers com gerenciamento de estado e saúde."""
+    def __init__(self, worker_id: int, device: torch.device):
         super().__init__()
         self.worker_id = worker_id
+        self.device = device
         self.is_healthy = False
         self.is_busy = False
+        self.daemon = True # Permite que o programa principal saia
     def run(self):
+        """O loop de vida do worker, responsável por carregar os modelos."""
         try:
+            self._load_models()
             self.is_healthy = True
+            logging.info(f"✅ Worker {self.worker_id} ({self.__class__.__name__}) on {self.device} is healthy and ready.")
         except Exception:
             self.is_healthy = False
+            logging.error(f"❌ Worker {self.worker_id} on {self.device} FAILED to initialize!", exc_info=True)
+    def _load_models(self):
+        """Método a ser implementado pelas classes filhas."""
+        raise NotImplementedError
+    def get_status(self) -> Tuple[bool, bool]:
+        """Retorna (is_healthy, is_busy)."""
+        return self.is_healthy, self.is_busy
+class LTXMainWorker(BaseWorker):
+    """Worker especialista para o pipeline principal do LTX."""
+    def __init__(self, worker_id: int, device: torch.device):
+        super().__init__(worker_id, device)
+        self.pipeline: Optional[LTXVideoPipeline] = None
+    def _load_models(self):
+        logging.info(f"[LTXWorker-{self.worker_id}] Loading models to CPU...")
+        self.pipeline = get_main_ltx_pipeline()
+        logging.info(f"[LTXWorker-{self.worker_id}] Moving pipeline to {self.device}...")
+        self.pipeline.to(self.device)
+    def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
+        """Executa um trabalho, gerenciando o estado 'busy'."""
+        self.is_busy = True
+        logging.info(f"Worker {self.worker_id} (LTX) starting job: {job_func.__name__}")
         try:
+            result = job_func(self.pipeline, *args, **kwargs)
+            logging.info(f"Worker {self.worker_id} (LTX) finished job successfully.")
+            return result
+        except Exception as e:
+            logging.error(f"Worker {self.worker_id} (LTX) job failed!", exc_info=True)
+            self.is_healthy = False # Falha em um job marca o worker como não saudável
+            raise
+        finally:
+            self.is_busy = False
+class VAEWorker(BaseWorker):
+    """Worker especialista para o modelo VAE."""
+    def __init__(self, worker_id: int, device: torch.device):
+        super().__init__(worker_id, device)
+        self.vae: Optional[CausalVideoAutoencoder] = None
+    def _load_models(self):
+        logging.info(f"[VAEWorker-{self.worker_id}] Loading VAE model to CPU...")
+        self.vae = get_main_vae()
+        logging.info(f"[VAEWorker-{self.worker_id}] Moving VAE to {self.device}...")
+        self.vae.to(self.device)
+        self.vae.eval()
     def execute(self, job_func: Callable, args: tuple, kwargs: dict) -> Any:
+        """Executa um trabalho, gerenciando o estado 'busy'."""
         self.is_busy = True
+        logging.info(f"Worker {self.worker_id} (VAE) starting job: {job_func.__name__}")
         try:
+            result = job_func(self.vae, *args, **kwargs)
+            logging.info(f"Worker {self.worker_id} (VAE) finished job successfully.")
             return result
+        except Exception as e:
+            logging.error(f"Worker {self.worker_id} (VAE) job failed!", exc_info=True)
             self.is_healthy = False
             raise
         finally:
             self.is_busy = False
 # ==============================================================================
+# --- O GERENCIADOR DE POOL AVANÇADO (SINGLETON) ---
 # ==============================================================================
 class LTXAducManager:
     _instance = None
     _initialized = False
     def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
         return cls._instance
     def __init__(self):
         if self._initialized: return
+        logging.info("🏭 Initializing Advanced Pool Manager for LTX...")
+        self.ltx_workers: List[LTXMainWorker] = []
+        self.vae_workers: List[VAEWorker] = []
+        self.ltx_job_queue = queue.Queue()
+        self.vae_job_queue = queue.Queue()
         self.pool_lock = threading.Lock()
         self._initialize_workers()
+        # Inicia threads consumidores para processar as filas
+        self.ltx_dispatcher = threading.Thread(target=self._dispatch_jobs, args=(self.ltx_job_queue, self.ltx_workers), daemon=True)
+        self.vae_dispatcher = threading.Thread(target=self._dispatch_jobs, args=(self.vae_job_queue, self.vae_workers), daemon=True)
         self.health_monitor = threading.Thread(target=self._health_check_loop, daemon=True)
+        self.ltx_dispatcher.start()
+        self.vae_dispatcher.start()
         self.health_monitor.start()
         self._initialized = True
+        logging.info("✅ Advanced Pool Manager is running with all threads started.")
     def _initialize_workers(self):
+        """Cria e inicia os workers com base nas GPUs alocadas."""
+        # Supondo que gpu_manager agora tenha get_ltx_devices() e get_seedvr_devices() que retornam listas
+        ltx_gpus = gpu_manager.get_ltx_device() # Ajuste se o nome for diferente
+        vae_gpus = gpu_manager.get_ltx_vae_device() # Ajuste se o nome for diferente
         with self.pool_lock:
+            for i, device_id in enumerate([ltx_gpus]): # Assumindo que retorna uma lista
+                worker = LTXMainWorker(worker_id=i, device=torch.device(f"cuda:{device_id}"))
+                self.ltx_workers.append(worker)
+                worker.start()
+            for i, device_id in enumerate([vae_gpus]): # Assumindo que retorna uma lista
+                worker = VAEWorker(worker_id=i, device=torch.device(f"cuda:{device_id}"))
+                self.vae_workers.append(worker)
+                worker.start()
+    def _get_available_worker(self, worker_pool: List[BaseWorker]) -> Optional[BaseWorker]:
+        """Encontra um worker saudável e desocupado no pool."""
+        with self.pool_lock:
+            for worker in worker_pool:
+                healthy, busy = worker.get_status()
+                if healthy and not busy:
                     return worker
         return None
+    def _dispatch_jobs(self, job_queue: queue.Queue, worker_pool: List[BaseWorker]):
+        """Loop do thread consumidor que pega trabalhos da fila e os despacha."""
         while True:
+            job_func, args, kwargs, future = job_queue.get()
             worker = None
             while worker is None:
+                worker = self._get_available_worker(worker_pool)
+                if worker is None:
+                    time.sleep(0.1) # Espera por um worker ficar livre
             try:
                 result = worker.execute(job_func, args, kwargs)
                 future.put(result)
                 future.put(e)
     def _health_check_loop(self):
+        """Thread que periodicamente verifica e reinicia workers não saudáveis."""
         while True:
             time.sleep(30)
+            logging.debug("Running health check on all workers...")
             with self.pool_lock:
+                for i, worker in enumerate(self.ltx_workers):
                     if not worker.is_alive() or not worker.is_healthy:
+                        logging.warning(f"LTX Worker {worker.worker_id} on {worker.device} is UNHEALTHY. Restarting...")
+                        new_worker = LTXMainWorker(worker.worker_id, worker.device)
+                        self.ltx_workers[i] = new_worker
                         new_worker.start()
+                # Repetir o laço para VAE workers
+                for i, worker in enumerate(self.vae_workers):
+                    if not worker.is_alive() or not worker.is_healthy:
+                        logging.warning(f"VAE Worker {worker.worker_id} on {worker.device} is UNHEALTHY. Restarting...")
+                        new_worker = VAEWorker(worker.worker_id, worker.device)
+                        self.vae_workers[i] = new_worker
+                        new_worker.start()
+    def submit_job(self, job_type: str, job_func: Callable, *args, **kwargs) -> Any:
+        """
+        Ponto de entrada público para submeter um trabalho ao pool.
+        Esta função é síncrona: ela espera pelo resultado.
+        """
+        if job_type not in ['ltx', 'vae']:
+            raise ValueError("Invalid job_type. Must be 'ltx' or 'vae'.")
+        job_queue = self.ltx_job_queue if job_type == 'ltx' else self.vae_job_queue
+        future = queue.Queue() # Usamos uma fila como um 'future' para obter o resultado de volta
+        job_queue.put((job_func, args, kwargs, future))
+        # Bloqueia e espera pelo resultado ser colocado no 'future' pelo dispatcher
         result = future.get()
+        if isinstance(result, Exception):
+            raise result # Se o job falhou, re-lança a exceção no thread principal
         return result
+# ==============================================================================
 # --- INSTANCIAÇÃO GLOBAL ---
+# ==============================================================================
+try:
+    ltx_aduc_manager = LTXAducManager()
+except Exception as e:
+    logging.critical("CRITICAL ERROR: Failed to initialize the LTXAducManager pool.", exc_info=True)
+    ltx_aduc_manager = None

api/ltx/ltx_aduc_orchestrator.py CHANGED Viewed

@@ -1,26 +1,21 @@
 # FILE: api/ltx_aduc_orchestrator.py
 # DESCRIPTION: The main workflow orchestrator for the ADUC-SDR LTX suite.
-# In this simplified architecture, it coordinates a single unified client (LtxAducPipeline)
-# to execute the complete video generation pipeline from prompt to MP4.
 import logging
 import time
-import yaml
-import os
-import sys
 from PIL import Image
-from typing import Optional, Dict, Union
-# O Orquestrador importa o CLIENTE UNIFICADO que ele vai coordenar.
 from api.ltx.ltx_aduc_pipeline import ltx_aduc_pipeline
 # O Orquestrador importa as FERRAMENTAS de que precisa para as tarefas finais.
 from tools.video_encode_tool import video_encode_tool_singleton
-# Importa o Path para carregar a configuração.
-from pathlib import Path
-LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 # ==============================================================================
 # --- A CLASSE ORQUESTRADORA (Cérebro do Workflow) ---
 # ==============================================================================
@@ -28,26 +23,16 @@ LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 class LtxAducOrchestrator:
     """
     Orquestra o fluxo de trabalho completo de geração de vídeo,
-    coordenando o cliente unificado LTX. É o ponto de entrada principal para a UI.
     """
     def __init__(self):
         """
-        Inicializa o orquestrador, carregando a configuração base uma única vez.
         """
-        self.output_dir = "/app/output"
-        self.base_config = self._load_base_config()
         logging.info("✅ LTX ADUC Orchestrator initialized and ready.")
-    def _load_base_config(self) -> Dict:
-        """Carrega a configuração base do arquivo YAML, que contém os parâmetros padrão."""
-        try:
-            config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
-            with open(config_path, "r") as file:
-                return yaml.safe_load(file)
-        except Exception as e:
-            logging.error(f"Failed to load base config file. Orchestrator may not function correctly. Error: {e}")
-            return {}
     def __call__(
         self,
         prompt: str,
@@ -59,80 +44,87 @@ class LtxAducOrchestrator:
         output_filename_base: str = "ltx_aduc_video"
     ) -> Optional[str]:
         """
-        Ponto de entrada principal do Orquestrador. Executa o pipeline completo de geração de vídeo.
         Args:
-            prompt (str): O prompt de texto completo, onde cada nova linha é uma cena.
-            initial_image (Optional[Image.Image]): Imagem PIL para condicionar a primeira cena.
             height (int): Altura do vídeo final.
             width (int): Largura do vídeo final.
             duration_in_seconds (float): Duração total desejada do vídeo.
-            ltx_configs (Optional[Dict]): Configurações avançadas da UI para sobrescrever os padrões.
-            output_filename_base (str): Nome base para o arquivo de vídeo de saída.
         Returns:
-            Optional[str]: O caminho para o arquivo .mp4 gerado, ou None em caso de falha.
         """
         t0 = time.time()
         logging.info(f"Orchestrator starting new job for prompt: '{prompt.splitlines()[0]}...'")
         try:
             # =================================================================
-            # --- ETAPA 1: PREPARAÇÃO DOS INPUTS E CONFIGURAÇÕES ---
             # =================================================================
             prompt_list = [line.strip() for line in prompt.splitlines() if line.strip()]
             if not prompt_list:
                 raise ValueError("O prompt está vazio ou não contém linhas válidas.")
             initial_conditioning_items = []
             if initial_image:
-                logging.info("Orchestrator delegating: create conditioning item.")
-                conditioning_params = [(0, 1.0)] # (frame_number, strength)
-                initial_conditioning_items = ltx_aduc_pipeline.encode_to_conditioning_items(
-                    media_list=[initial_image],
-                    params=conditioning_params,
-                    resolution=(height, width)
                 )
-            common_ltx_args = self.base_config.get("first_pass", {}).copy()
-            common_ltx_args.update({
-                'negative_prompt': "blurry, low quality, bad anatomy, deformed",
-                'height': height,
-                'width': width
-            })
-            if ltx_configs:
-                common_ltx_args.update(ltx_configs)
             # =================================================================
-            # --- ETAPA 2: DELEGAR GERAÇÃO DO VÍDEO LATENTE ---
             # =================================================================
-            logging.info("Orchestrator delegating: generate latent video.")
-            final_latents, used_seed = ltx_aduc_pipeline.generate_latents(
                 prompt_list=prompt_list,
                 duration_in_seconds=duration_in_seconds,
-                common_ltx_args=common_ltx_args,
-                initial_conditioning_items=initial_conditioning_items
             )
             if final_latents is None:
                 raise RuntimeError("LTX client failed to generate a latent tensor.")
-            logging.info(f"Orchestrator received latent tensor with shape: {final_latents.shape}")
             # =================================================================
-            # --- ETAPA 3: DELEGAR DECODIFICAÇÃO PARA PIXELS ---
             # =================================================================
-            logging.info("Orchestrator delegating: decode latent to pixels.")
-            pixel_tensor = ltx_aduc_pipeline.decode_to_pixels(final_latents)
             if pixel_tensor is None:
-                raise RuntimeError("LTX client failed to decode the latent tensor.")
-            logging.info(f"Orchestrator received pixel tensor with shape: {pixel_tensor.shape}")
             # =================================================================
-            # --- ETAPA 4: TAREFA FINAL - CODIFICAR PARA MP4 ---
             # =================================================================
             video_filename = f"{output_filename_base}_{int(time.time())}_{used_seed}.mp4"
             output_path = f"{self.output_dir}/{video_filename}"
-            logging.info(f"Orchestrator executing final task: saving tensor to MP4 at {output_path}")
             video_encode_tool_singleton.save_video_from_tensor(
                 pixel_5d=pixel_tensor,
                 path=output_path,
@@ -150,5 +142,10 @@ class LtxAducOrchestrator:
 # ==============================================================================
 # --- INSTÂNCIA SINGLETON DO ORQUESTRADOR ---
 # ==============================================================================
-ltx_aduc_orchestrator = LtxAducOrchestrator()

 # FILE: api/ltx_aduc_orchestrator.py
 # DESCRIPTION: The main workflow orchestrator for the ADUC-SDR LTX suite.
+# It acts as the primary entry point for the UI, coordinating the specialized
+# LTX and VAE clients to execute a complete video generation pipeline from prompt to MP4.
 import logging
 import time
 from PIL import Image
+from typing import Optional, Dict
+# O Orquestrador importa os CLIENTES especialistas que ele vai coordenar.
+# Estes clientes são responsáveis por submeter os trabalhos ao pool de workers.
 from api.ltx.ltx_aduc_pipeline import ltx_aduc_pipeline
+from api.ltx.vae_aduc_pipeline import vae_aduc_pipeline
 # O Orquestrador importa as FERRAMENTAS de que precisa para as tarefas finais.
 from tools.video_encode_tool import video_encode_tool_singleton
 # ==============================================================================
 # --- A CLASSE ORQUESTRADORA (Cérebro do Workflow) ---
 # ==============================================================================
 class LtxAducOrchestrator:
     """
     Orquestra o fluxo de trabalho completo de geração de vídeo,
+    coordenando os clientes LTX e VAE. É o ponto de entrada principal para a UI.
     """
     def __init__(self):
         """
+        Inicializa o orquestrador. A inicialização é leve, pois os modelos
+        pesados são gerenciados pelo LTXAducManager em segundo plano.
         """
+        self.output_dir = "/app/output"  # Diretório padrão para salvar os vídeos
         logging.info("✅ LTX ADUC Orchestrator initialized and ready.")
     def __call__(
         self,
         prompt: str,
         output_filename_base: str = "ltx_aduc_video"
     ) -> Optional[str]:
         """
+        Ponto de entrada principal do Orquestrador. Executa o pipeline completo.
         Args:
+            prompt (str): O prompt de texto completo. Cada nova linha é tratada como uma cena.
+            initial_image (Optional[Image.Image]): Uma imagem PIL para condicionar a primeira cena.
             height (int): Altura do vídeo final.
             width (int): Largura do vídeo final.
             duration_in_seconds (float): Duração total desejada do vídeo.
+            ltx_configs (Optional[Dict]): Configurações avançadas para a geração LTX (steps, guidance, etc.).
+            output_filename_base (str): O nome base para o arquivo de vídeo final.
         Returns:
+            Optional[str]: O caminho do arquivo de vídeo .mp4 gerado, ou None em caso de falha.
         """
         t0 = time.time()
         logging.info(f"Orchestrator starting new job for prompt: '{prompt.splitlines()[0]}...'")
         try:
             # =================================================================
+            # --- ETAPA 1: PREPARAÇÃO DO INPUT ---
             # =================================================================
+            # Converte a string do prompt em uma lista de cenas.
             prompt_list = [line.strip() for line in prompt.splitlines() if line.strip()]
             if not prompt_list:
                 raise ValueError("O prompt está vazio ou não contém linhas válidas.")
+            # Prepara o item de condicionamento inicial, se uma imagem for fornecida.
             initial_conditioning_items = []
             if initial_image:
+                logging.info("Preparing initial conditioning item via VAE client...")
+                # Define os parâmetros: aplicar no frame 0 com força total (1.0).
+                conditioning_params = [(0, 1.0)]
+                # Chama o cliente VAE para fazer o trabalho pesado de conversão de imagem para LatentConditioningItem.
+                initial_conditioning_items = vae_aduc_pipeline(
+                    media=[initial_image],
+                    task='create_conditioning_items',
+                    target_resolution=(height, width),
+                    conditioning_params=conditioning_params
                 )
+                logging.info(f"Successfully created {len(initial_conditioning_items)} conditioning item(s).")
             # =================================================================
+            # --- ETAPA 2: GERAÇÃO DO VÍDEO LATENTE ---
             # =================================================================
+            logging.info("Submitting job to LTX client for latent video generation...")
+            # Chama o cliente LTX para gerar o tensor latente completo.
+            final_latents, used_seed = ltx_aduc_pipeline(
                 prompt_list=prompt_list,
+                initial_conditioning_items=initial_conditioning_items,
+                height=height,
+                width=width,
                 duration_in_seconds=duration_in_seconds,
+                ltx_configs=ltx_configs
             )
             if final_latents is None:
                 raise RuntimeError("LTX client failed to generate a latent tensor.")
+            logging.info(f"LTX client returned latent tensor with shape: {final_latents.shape}")
             # =================================================================
+            # --- ETAPA 3: DECODIFICAÇÃO DO LATENTE PARA PIXELS ---
             # =================================================================
+            logging.info("Submitting job to VAE client for latent-to-pixel decoding...")
+            # Chama o cliente VAE para converter o resultado em um vídeo visível (tensor de pixels).
+            pixel_tensor = vae_aduc_pipeline(
+                media=final_latents,
+                task='decode'
+            )
             if pixel_tensor is None:
+                raise RuntimeError("VAE client failed to decode the latent tensor.")
+            logging.info(f"VAE client returned pixel tensor with shape: {pixel_tensor.shape}")
             # =================================================================
+            # --- ETAPA 4: CODIFICAÇÃO PARA ARQUIVO DE VÍDEO MP4 ---
             # =================================================================
             video_filename = f"{output_filename_base}_{int(time.time())}_{used_seed}.mp4"
             output_path = f"{self.output_dir}/{video_filename}"
+            logging.info(f"Submitting job to VideoEncodeTool to save final MP4 to: {output_path}")
+            # Usa a ferramenta de vídeo para salvar o tensor de pixels no arquivo final.
             video_encode_tool_singleton.save_video_from_tensor(
                 pixel_5d=pixel_tensor,
                 path=output_path,
 # ==============================================================================
 # --- INSTÂNCIA SINGLETON DO ORQUESTRADOR ---
+# Este é o ponto de entrada principal que a UI (app.py) irá chamar.
 # ==============================================================================
+try:
+    ltx_aduc_orchestrator = LtxAducOrchestrator()
+except Exception as e:
+    logging.critical("CRITICAL: Failed to initialize the LtxAducOrchestrator.", exc_info=True)
+    ltx_aduc_orchestrator = None

api/ltx/ltx_aduc_pipeline.py CHANGED Viewed

@@ -1,130 +1,142 @@
 # FILE: api/ltx/ltx_aduc_pipeline.py
-# DESCRIPTION: A unified high-level client for submitting ALL LTX-related jobs (generation and VAE)
-# to the LTXAducManager pool.
 import logging
 import time
 import torch
 import random
-from typing import List, Optional, Tuple, Dict
-from PIL import Image
-from dataclasses import dataclass
-from pathlib import Path
-import sys
-from api.ltx.ltx_utils import load_image_to_tensor_with_resize_and_crop # Importa o helper de ltx_utils
-# O cliente importa o MANAGER para submeter todos os trabalhos.
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
-# Adiciona o path do LTX-Video para importações de baixo nível e tipos.
-LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
-def add_deps_to_path():
-    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if repo_path not in sys.path:
-        sys.path.insert(0, repo_path)
-add_deps_to_path()
-from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
-from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
-# ==============================================================================
-# --- DEFINIÇÕES DE ESTRUTURA ---
-# ==============================================================================
-@dataclass
-class LatentConditioningItem:
-    """Estrutura de dados para passar latentes condicionados ao job de geração."""
-    latent_tensor: torch.Tensor
-    media_frame_number: int
-    conditioning_strength: float
 # ==============================================================================
 # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
 # ==============================================================================
-def _job_encode_media(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, pixel_tensor: torch.Tensor) -> torch.Tensor:
-    """Job que usa o VAE do pipeline para codificar um tensor de pixel."""
-    vae = pipeline.vae
-    pixel_tensor_gpu = pixel_tensor.to(vae.device, dtype=vae.dtype)
-    latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
-    return latents.cpu()
-def _job_decode_latent(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, latent_tensor: torch.Tensor) -> torch.Tensor:
-    """Job que usa o VAE do pipeline para decodificar um tensor latente."""
-    vae = pipeline.vae
-    latent_tensor_gpu = latent_tensor.to(vae.device, dtype=vae.dtype)
-    pixels = vae_decode(latent_tensor_gpu, vae, is_video=True, vae_per_channel_normalize=True)
-    return pixels.cpu()
-def _job_generate_latent_chunk(pipeline: LTXVideoPipeline, autocast_dtype: torch.dtype, **kwargs) -> torch.Tensor:
-    """Job que usa o pipeline principal para gerar um chunk de vídeo latente."""
-    generator = torch.Generator(device=pipeline.device).manual_seed(kwargs['seed'])
-    pipeline_kwargs = {"generator": generator, "output_type": "latent", **kwargs}
-    with torch.autocast(device_type=pipeline.device.type, dtype=autocast_dtype):
-        latents_raw = pipeline(**pipeline_kwargs).images
     return latents_raw.cpu()
 # ==============================================================================
-# --- A CLASSE CLIENTE UNIFICADA ---
 # ==============================================================================
 class LtxAducPipeline:
     """
-    Cliente unificado para orquestrar todas as tarefas LTX, incluindo geração e VAE.
     """
     def __init__(self):
-        logging.info("✅ Unified LTX/VAE ADUC Pipeline (Client) initialized.")
         self.FRAMES_ALIGNMENT = 8
     def _get_random_seed(self) -> int:
         return random.randint(0, 2**32 - 1)
     def _align(self, dim: int, alignment: int = 8) -> int:
         return ((dim + alignment - 1) // alignment) * alignment
-    # --- Métodos de API para o Orquestrador ---
-    def encode_to_conditioning_items(self, media_list: List, params: List, resolution: Tuple[int, int]) -> List[LatentConditioningItem]:
-        """Converte uma lista de imagens em uma lista de LatentConditioningItem."""
-        pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, resolution[0], resolution[1]) for m in media_list]
-        items = []
-        for i, pt in enumerate(pixel_tensors):
-            latent_tensor = ltx_aduc_manager.submit_job(_job_encode_media, pixel_tensor=pt)
-            frame_number, strength = params[i]
-            items.append(LatentConditioningItem(
-                latent_tensor=latent_tensor,
-                media_frame_number=frame_number,
-                conditioning_strength=strength
-            ))
-        return items
-    def decode_to_pixels(self, latent_tensor: torch.Tensor) -> torch.Tensor:
-        """Decodifica um tensor latente em um tensor de pixels."""
-        return ltx_aduc_manager.submit_job(_job_decode_latent, latent_tensor=latent_tensor)
-    def generate_latents(
         self,
         prompt_list: List[str],
-        duration_in_seconds: float,
-        common_ltx_args: Dict,
-        initial_conditioning_items: Optional[List[LatentConditioningItem]] = None
     ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
-        """Gera um vídeo latente completo a partir de uma lista de prompts."""
         t0 = time.time()
-        logging.info(f"LTX Client received a generation job for {len(prompt_list)} scenes.")
         used_seed = self._get_random_seed()
         num_chunks = len(prompt_list)
         total_frames = self._align(int(duration_in_seconds * 24))
-        frames_per_chunk_base = total_frames // num_chunks if num_chunks > 0 else total_frames
         overlap_frames = self._align(9) if num_chunks > 1 else 0
         final_latents_list = []
-        overlap_condition_item = None
         for i, chunk_prompt in enumerate(prompt_list):
             current_conditions = []
@@ -133,43 +145,58 @@ class LtxAducPipeline:
             if overlap_condition_item:
                 current_conditions.append(overlap_condition_item)
             num_frames_for_chunk = frames_per_chunk_base
-            if i == num_chunks - 1:
                 processed_frames = sum(f.shape[2] for f in final_latents_list)
                 num_frames_for_chunk = total_frames - processed_frames
-            num_frames_for_chunk = self._align(num_frames_for_chunk)
-            if num_frames_for_chunk <= 0: continue
-            job_specific_args = {
-                "prompt": chunk_prompt,
-                "num_frames": num_frames_for_chunk,
-                "seed": used_seed + i,
-                "conditioning_items": current_conditions
-            }
-            final_job_args = {**common_ltx_args, **job_specific_args}
-            chunk_latents = ltx_aduc_manager.submit_job(_job_generate_latent_chunk, **final_job_args)
             if chunk_latents is None:
-                logging.error(f"Failed to generate latents for scene {i+1}. Aborting.")
                 return None, used_seed
             if i < num_chunks - 1:
                 overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
                 overlap_condition_item = LatentConditioningItem(
-                    latent_tensor=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
                 final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
             else:
                 final_latents_list.append(chunk_latents)
-        if not final_latents_list:
-            logging.warning("No latent chunks were generated.")
-            return None, used_seed
         final_latents = torch.cat(final_latents_list, dim=2)
         logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
         return final_latents, used_seed
 # --- INSTÂNCIA SINGLETON DO CLIENTE ---
-ltx_aduc_pipeline = LtxAducPipeline()

 # FILE: api/ltx/ltx_aduc_pipeline.py
+# DESCRIPTION: A high-level client for submitting LTX video generation jobs to the pool manager.
+# Its sole responsibility is to orchestrate the generation of a final LATENT tensor from prompts
+# and initial conditions, without handling pixel decoding.
 import logging
 import time
 import torch
 import random
+import json
+from typing import List, Optional, Tuple, Union, Dict
+# O cliente importa o MANAGER para submeter trabalhos
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
+# O cliente precisa da definição de LatentConditioningItem para os seus inputs
+from api.ltx.vae_aduc_pipeline import LatentConditioningItem
+DEPS_DIR = Path("/data")
+LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
+    sys.path.insert(0, repo_path)
+    print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
+    from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
 # ==============================================================================
 # --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool LTX) ---
 # ==============================================================================
+def _job_generate_latent_chunk(
+    pipeline: LTXVideoPipeline,
+    prompt: str,
+    negative_prompt: str,
+    height: int,
+    width: int,
+    num_frames: int,
+    seed: int,
+    conditioning_items: Optional[List[LatentConditioningItem]],
+    ltx_configs: Dict
+) -> torch.Tensor:
+    """
+    Função de trabalho que executa a geração de um único chunk (cena) de vídeo latente.
+    Esta função é executada DENTRO de um LTXMainWorker.
+    """
+    generator = torch.Generator(device=pipeline.device).manual_seed(seed)
+    # Monta os argumentos para a chamada do pipeline
+    pipeline_kwargs = {
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "height": height,
+        "width": width,
+        "num_frames": num_frames,
+        "frame_rate": 24, # Padrão, pode ser parametrizado se necessário
+        "generator": generator,
+        "output_type": "latent", # Ponto chave: sempre pedimos latentes
+        "conditioning_items": conditioning_items if conditioning_items else None,
+        **ltx_configs # Aplica configurações avançadas (guidance, steps, etc.)
+    }
+    logging.info(f"[LTX Job] Gerando chunk com {num_frames} frames para o prompt: '{prompt[:50]}...'")
+    # O pipeline já está na GPU correta dentro do worker
+    with torch.autocast(device_type=pipeline.device.type, dtype=torch.bfloat16):
+         latents_raw = pipeline(**pipeline_kwargs).images
+    # Retorna o tensor latente na CPU para liberar VRAM do worker para o próximo job
     return latents_raw.cpu()
 # ==============================================================================
+# --- A CLASSE CLIENTE (Interface Pública para Geração de Vídeo Latente) ---
 # ==============================================================================
 class LtxAducPipeline:
     """
+    Cliente de alto nível para orquestrar a geração de vídeo latente.
+    Submete trabalhos de geração de chunks de vídeo ao LTXAducManager.
     """
     def __init__(self):
+        logging.info("✅ LTX ADUC Pipeline (Client) initialized and ready to submit jobs.")
+        # O __init__ é limpo, sem carregar modelos.
         self.FRAMES_ALIGNMENT = 8
+        pass
     def _get_random_seed(self) -> int:
+        """Sempre gera e retorna uma nova semente aleatória."""
         return random.randint(0, 2**32 - 1)
     def _align(self, dim: int, alignment: int = 8) -> int:
+        """Alinha uma dimensão para o múltiplo mais próximo."""
         return ((dim + alignment - 1) // alignment) * alignment
+    def __call__(
         self,
         prompt_list: List[str],
+        initial_conditioning_items: Optional[List[LatentConditioningItem]] = None,
+        height: int = 432,
+        width: int = 768,
+        duration_in_seconds: float = 4.0,
+        ltx_configs: Optional[Dict] = None
     ) -> Tuple[Optional[torch.Tensor], Optional[int]]:
+        """
+        Ponto de entrada principal para gerar um vídeo latente completo.
+        Args:
+            prompt_list: Lista de prompts, onde cada prompt é uma cena.
+            initial_conditioning_items: Lista de `LatentConditioningItem` para condicionar
+                                        a primeira cena.
+            height: Altura do vídeo.
+            width: Largura do vídeo.
+            duration_in_seconds: Duração total desejada do vídeo.
+            ltx_configs: Dicionário com configurações avançadas para o pipeline LTX
+                         (guidance_scale, num_inference_steps, etc.).
+        Returns:
+            Uma tupla contendo:
+            - O tensor latente final completo (na CPU).
+            - A semente principal usada para a geração.
+        """
         t0 = time.time()
+        logging.info(f"LTX Client received a generation job with {len(prompt_list)} scenes.")
+        if not prompt_list:
+            raise ValueError("A lista de prompts não pode estar vazia.")
         used_seed = self._get_random_seed()
+        logging.info(f"Generation seed set to: {used_seed}")
+        # --- Lógica de Divisão de Chunks e Sobreposição ---
         num_chunks = len(prompt_list)
         total_frames = self._align(int(duration_in_seconds * 24))
+        frames_per_chunk_base = total_frames // num_chunks
         overlap_frames = self._align(9) if num_chunks > 1 else 0
         final_latents_list = []
+        overlap_condition_item: Optional[LatentConditioningItem] = None
         for i, chunk_prompt in enumerate(prompt_list):
             current_conditions = []
             if overlap_condition_item:
                 current_conditions.append(overlap_condition_item)
+            # Calcula o número de frames para o chunk atual
             num_frames_for_chunk = frames_per_chunk_base
+            if i == num_chunks - 1: # Último chunk pega o resto
                 processed_frames = sum(f.shape[2] for f in final_latents_list)
                 num_frames_for_chunk = total_frames - processed_frames
+            num_frames_for_chunk = self._align(num_frames_for_chunk)
+            # --- Submissão do Job para o Chunk Atual ---
+            chunk_latents = ltx_aduc_manager.submit_job(
+                job_type='ltx',
+                job_func=_job_generate_latent_chunk,
+                # Passa todos os argumentos necessários para a função de trabalho
+                prompt=chunk_prompt,
+                negative_prompt="blurry, low quality, bad anatomy, deformed", # Pode ser parametrizado
+                height=height,
+                width=width,
+                num_frames=num_frames_for_chunk,
+                seed=used_seed + i, # Semente diferente para cada chunk para variedade
+                conditioning_items=current_conditions,
+                ltx_configs=ltx_configs or {}
+            )
             if chunk_latents is None:
+                logging.error(f"Failed to generate latents for scene {i+1}. Aborting generation.")
                 return None, used_seed
+            # --- Gerenciamento do "Eco Cinético" (Sobreposição) ---
             if i < num_chunks - 1:
+                # Salva os últimos frames do chunk atual para condicionar o próximo
                 overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
                 overlap_condition_item = LatentConditioningItem(
+                    latent_tensor=overlap_latents,
+                    media_frame_number=0, # Sempre condiciona o início do próximo chunk
+                    conditioning_strength=1.0 # Condicionamento forte
+                )
+                # Adiciona o chunk atual sem a sobreposição
                 final_latents_list.append(chunk_latents[:, :, :-overlap_frames, :, :])
             else:
+                # Adiciona o último chunk completo
                 final_latents_list.append(chunk_latents)
+        # Concatena todos os chunks de latentes em um único tensor
         final_latents = torch.cat(final_latents_list, dim=2)
         logging.info(f"LTX Client job finished in {time.time() - t0:.2f}s. Final latent shape: {final_latents.shape}")
         return final_latents, used_seed
 # --- INSTÂNCIA SINGLETON DO CLIENTE ---
+try:
+    ltx_aduc_pipeline = LtxAducPipeline()
+except Exception as e:
+    logging.critical("CRITICAL: Failed to initialize the LtxAducPipeline client.", exc_info=True)
+    ltx_aduc_pipeline = None

api/ltx/ltx_utils.py CHANGED Viewed

@@ -1,263 +1,165 @@
 # FILE: api/ltx/ltx_utils.py
-# DESCRIPTION: A pure utility library for the LTX ecosystem.
-# Contains the official low-level builder function for the complete pipeline
-# and other stateless helper functions.
 import os
 import random
 import json
 import logging
 import sys
 from pathlib import Path
-from typing import Dict, Tuple, Union
-import torchvision.transforms.functional as TVF
-from PIL import Image
 import torch
 from safetensors import safe_open
 from transformers import T5EncoderModel, T5Tokenizer
 # ==============================================================================
-# --- CONFIGURAÇÃO DE PATH E IMPORTS DA BIBLIOTECA LTX ---
 # ==============================================================================
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
 def add_deps_to_path():
-    """Adiciona o diretório do repositório LTX ao sys.path para importação de suas bibliotecas."""
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
         logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
 add_deps_to_path()
 try:
     from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
     from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
     from ltx_video.models.transformers.transformer3d import Transformer3DModel
     from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
     from ltx_video.schedulers.rf import RectifiedFlowScheduler
 except ImportError as e:
-    logging.critical("Failed to import a core LTX-Video library component.", exc_info=True)
-    raise ImportError(f"Could not import from LTX-Video library. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
 # ==============================================================================
-# --- FUNÇÃO HELPER 'create_transformer' (Essencial) ---
 # ==============================================================================
-def create_transformer(ckpt_path: str, precision: str) -> Transformer3DModel:
-    """
-    Cria e carrega o modelo Transformer3D com a lógica de precisão correta,
-    incluindo suporte para a otimização float8_e4m3fn.
-    """
-    if precision == "float8_e4m3fn":
-        try:
-            from q8_kernels.integration.patch_transformer import patch_diffusers_transformer as patch_transformer_for_q8_kernels
-            transformer = Transformer3DModel.from_pretrained(ckpt_path, dtype=torch.float8_e4m3fn)
-            patch_transformer_for_q8_kernels(transformer)
-            return transformer
-        except ImportError:
-            raise ValueError("Q8-Kernels not found. To use FP8 checkpoint, please install Q8 kernels from the project's wheels.")
-    elif precision == "bfloat16":
-        return Transformer3DModel.from_pretrained(ckpt_path).to(torch.bfloat16)
-    else:
-        return Transformer3DModel.from_pretrained(ckpt_path)
-# ==============================================================================
-# --- BUILDER DE BAIXO NÍVEL OFICIAL ---
-# ==============================================================================
-def build_complete_pipeline_on_cpu(checkpoint_path: str, config: Dict) -> LTXVideoPipeline:
-    """
-    Constrói o pipeline LTX COMPLETO, incluindo o VAE, e o mantém na CPU.
-    Esta é a função de construção fundamental usada pelo LTXAducManager.
-    """
-    logging.info(f"Building complete LTX pipeline from checkpoint: {Path(checkpoint_path).name}")
-    with safe_open(checkpoint_path, framework="pt") as f:
         metadata = f.metadata() or {}
         config_str = metadata.get("config", "{}")
-        allowed_inference_steps = json.loads(config_str).get("allowed_inference_steps")
-    precision = config.get("precision", "bfloat16")
-    # Usa a função helper correta para criar o transformer
-    transformer = create_transformer(checkpoint_path, precision).to("cpu")
-    scheduler = RectifiedFlowScheduler.from_pretrained(checkpoint_path)
-    text_encoder = T5EncoderModel.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="text_encoder").to("cpu")
-    tokenizer = T5Tokenizer.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="tokenizer")
     patchifier = SymmetricPatchifier(patch_size=1)
-    vae = CausalVideoAutoencoder.from_pretrained(checkpoint_path).to("cpu")
     if precision == "bfloat16":
-        text_encoder.to(torch.bfloat16)
         vae.to(torch.bfloat16)
-        # O transformer já foi convertido para bfloat16 dentro de create_transformer, se aplicável
     pipeline = LTXVideoPipeline(
-        transformer=transformer,
-        patchifier=patchifier,
-        text_encoder=text_encoder,
-        tokenizer=tokenizer,
-        scheduler=scheduler,
-        vae=vae, # VAE é incluído para que o pipeline possa ser auto-suficiente
         allowed_inference_steps=allowed_inference_steps,
-        prompt_enhancer_image_caption_model=None,
-        prompt_enhancer_image_caption_processor=None,
-        prompt_enhancer_llm_model=None,
-        prompt_enhancer_llm_tokenizer=None,
     )
-    return pipeline
-# ==============================================================================
-# --- FUNÇÕES AUXILIARES GENÉRICAS ---
-# ==============================================================================
-# # FILE: api/ltx/ltx_utils.py
-# DESCRIPTION: A pure utility library for the LTX ecosystem.
-# Contains the official low-level builder function for the complete pipeline
-# and other stateless helper functions.
-import os
-import random
-import json
-import logging
-import sys
-from pathlib import Path
-from typing import Dict, Tuple
-import torch
-from safetensors import safe_open
-from transformers import T5EncoderModel, T5Tokenizer
-# ==============================================================================
-# --- CONFIGURAÇÃO DE PATH E IMPORTS DA BIBLIOTECA LTX ---
-# ==============================================================================
-LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
-def add_deps_to_path():
-    """Adiciona o diretório do repositório LTX ao sys.path para importação de suas bibliotecas."""
-    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if repo_path not in sys.path:
-        sys.path.insert(0, repo_path)
-        logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
-add_deps_to_path()
-try:
-    from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
-    from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
-    from ltx_video.models.transformers.transformer3d import Transformer3DModel
-    from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
-    from ltx_video.schedulers.rf import RectifiedFlowScheduler
-except ImportError as e:
-    logging.critical("Failed to import a core LTX-Video library component.", exc_info=True)
-    raise ImportError(f"Could not import from LTX-Video library. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
-# ==============================================================================
-# --- FUNÇÃO HELPER 'create_transformer' (Essencial) ---
-# ==============================================================================
-def create_transformer(ckpt_path: str, precision: str) -> Transformer3DModel:
-    """
-    Cria e carrega o modelo Transformer3D com a lógica de precisão correta,
-    incluindo suporte para a otimização float8_e4m3fn.
-    """
-    if precision == "float8_e4m3fn":
-        try:
-            from q8_kernels.integration.patch_transformer import patch_diffusers_transformer as patch_transformer_for_q8_kernels
-            transformer = Transformer3DModel.from_pretrained(ckpt_path, dtype=torch.float8_e4m3fn)
-            patch_transformer_for_q8_kernels(transformer)
-            return transformer
-        except ImportError:
-            raise ValueError("Q8-Kernels not found. To use FP8 checkpoint, please install Q8 kernels from the project's wheels.")
-    elif precision == "bfloat16":
-        return Transformer3DModel.from_pretrained(ckpt_path).to(torch.bfloat16)
-    else:
-        return Transformer3DModel.from_pretrained(ckpt_path)
-# ==============================================================================
-# --- BUILDER DE BAIXO NÍVEL OFICIAL ---
-# ==============================================================================
-def build_complete_pipeline_on_cpu(checkpoint_path: str, config: Dict) -> LTXVideoPipeline:
-    """
-    Constrói o pipeline LTX COMPLETO, incluindo o VAE, e o mantém na CPU.
-    Esta é a função de construção fundamental usada pelo LTXAducManager.
-    """
-    logging.info(f"Building complete LTX pipeline from checkpoint: {Path(checkpoint_path).name}")
-    with safe_open(checkpoint_path, framework="pt") as f:
-        metadata = f.metadata() or {}
-        config_str = metadata.get("config", "{}")
-        allowed_inference_steps = json.loads(config_str).get("allowed_inference_steps")
-    precision = config.get("precision", "bfloat16")
-    # Usa a função helper correta para criar o transformer
-    transformer = create_transformer(checkpoint_path, precision).to("cpu")
-    scheduler = RectifiedFlowScheduler.from_pretrained(checkpoint_path)
-    text_encoder = T5EncoderModel.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="text_encoder").to("cpu")
-    tokenizer = T5Tokenizer.from_pretrained(config["text_encoder_model_name_or_path"], subfolder="tokenizer")
-    patchifier = SymmetricPatchifier(patch_size=1)
-    vae = CausalVideoAutoencoder.from_pretrained(checkpoint_path).to("cpu")
     if precision == "bfloat16":
-        text_encoder.to(torch.bfloat16)
         vae.to(torch.bfloat16)
-        # O transformer já foi convertido para bfloat16 dentro de create_transformer, se aplicável
-    pipeline = LTXVideoPipeline(
-        transformer=transformer,
-        patchifier=patchifier,
-        text_encoder=text_encoder,
-        tokenizer=tokenizer,
-        scheduler=scheduler,
-        vae=vae, # VAE é incluído para que o pipeline possa ser auto-suficiente
-        allowed_inference_steps=allowed_inference_steps,
-        prompt_enhancer_image_caption_model=None,
-        prompt_enhancer_image_caption_processor=None,
-        prompt_enhancer_llm_model=None,
-        prompt_enhancer_llm_tokenizer=None,
-    )
-    return pipeline
 # ==============================================================================
-# --- FUNÇÕES AUXILIARES GENÉRICAS ---
 # ==============================================================================
 def seed_everything(seed: int):
-    """
-    Define a semente para PyTorch, NumPy e Python para garantir reprodutibilidade.
-    """
     random.seed(seed)
     os.environ['PYTHONHASHSEED'] = str(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = Fals
 def load_image_to_tensor_with_resize_and_crop(
     image_input: Union[str, Image.Image],
     target_height: int,
     target_width: int,
 ) -> torch.Tensor:
-    """
-    Carrega, redimensiona, corta e processa uma imagem para um tensor de pixel 5D,
-    normalizado para [-1, 1], pronto para ser enviado ao VAE para encoding.
-    """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
-        image = image_input.convert("RGB")
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
@@ -267,38 +169,22 @@ def load_image_to_tensor_with_resize_and_crop(
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
-        x_start = (input_width - new_width) // 2
-        image = image.crop((x_start, 0, x_start + new_width, new_height))
     else:
-        new_height = int(input_width / aspect_ratio_target)
-        y_start = (input_height - new_height) // 2
-        image = image.crop((0, y_start, input_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
-    frame_tensor = TVF.to_tensor(image)
-    # Esta parte depende de 'crf_compressor', então precisamos importá-lo aqui também
-    try:
-        from ltx_video.pipelines import crf_compressor
-        frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
-        frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
-        frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
-    except ImportError:
-        logging.warning("CRF Compressor not found. Skipping compression step.")
     frame_tensor = (frame_tensor * 2.0) - 1.0
-    return frame_tensor.unsqueeze(0).unsqueeze(2)
-def seed_everything(seed: int):
-    """
-    Define a semente para PyTorch, NumPy e Python para garantir reprodutibilidade.
-    """
-    random.seed(seed)
-    os.environ['PYTHONHASHSEED'] = str(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False

 # FILE: api/ltx/ltx_utils.py
+# DESCRIPTION: Comprehensive, self-contained utility module for the LTX pipeline.
+# Handles dependency path injection, model loading, pipeline creation, and tensor preparation.
 import os
 import random
 import json
 import logging
+import time
 import sys
 from pathlib import Path
+from typing import Dict, Optional, Tuple, Union
+from huggingface_hub import hf_hub_download
+import numpy as np
 import torch
+import torchvision.transforms.functional as TVF
+from PIL import Image
 from safetensors import safe_open
 from transformers import T5EncoderModel, T5Tokenizer
 # ==============================================================================
+# --- CRITICAL: DEPENDENCY PATH INJECTION ---
 # ==============================================================================
+# Define o caminho para o repositório clonado
 LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
+LTX_REPO_ID = "Lightricks/LTX-Video"
+CACHE_DIR = os.environ.get("HF_HOME")
 def add_deps_to_path():
+    """
+    Adiciona o diretório do repositório LTX ao sys.path para garantir que suas
+    bibliotecas possam ser importadas.
+    """
     repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
     if repo_path not in sys.path:
         sys.path.insert(0, repo_path)
         logging.info(f"[ltx_utils] LTX-Video repository added to sys.path: {repo_path}")
+# Executa a função imediatamente para configurar o ambiente antes de qualquer importação.
 add_deps_to_path()
+# ==============================================================================
+# --- IMPORTAÇÕES DA BIBLIOTECA LTX-VIDEO (Após configuração do path) ---
+# ==============================================================================
 try:
     from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline
+    from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
     from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
     from ltx_video.models.transformers.transformer3d import Transformer3DModel
     from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
     from ltx_video.schedulers.rf import RectifiedFlowScheduler
+    import ltx_video.pipelines.crf_compressor as crf_compressor
 except ImportError as e:
+    raise ImportError(f"Could not import from LTX-Video library even after setting sys.path. Check repo integrity at '{LTX_VIDEO_REPO_DIR}'. Error: {e}")
 # ==============================================================================
+# --- FUNÇÕES DE CONSTRUÇÃO DE MODELO E PIPELINE ---
 # ==============================================================================
+def create_latent_upsampler(latent_upsampler_model_path: str, device: str) -> LatentUpsampler:
+    """Loads the Latent Upsampler model from a checkpoint path."""
+    logging.info(f"Loading Latent Upsampler from: {latent_upsampler_model_path} to device: {device}")
+    latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
+    latent_upsampler.to(device)
+    latent_upsampler.eval()
+    return latent_upsampler
+def build_ltx_pipeline_on_cpu(config: Dict) -> Tuple[LTXVideoPipeline, Optional[torch.nn.Module]]:
+    """Builds the complete LTX pipeline and upsampler on the CPU."""
+    t0 = time.perf_counter()
+    logging.info("Building LTX pipeline on CPU...")
+    ckpt_path_str = hf_hub_download(repo_id=LTX_REPO_ID, filename=config["checkpoint_path"], cache_dir=CACHE_DIR)
+    ckpt_path = Path(ckpt_path_str)
+    if not ckpt_path.is_file():
+        raise FileNotFoundError(f"Main checkpoint file not found: {ckpt_path}")
+    logging.info(f"Building LTX pipeline ckpt:{ckpt_path_str}")
+    with safe_open(ckpt_path, framework="pt") as f:
         metadata = f.metadata() or {}
         config_str = metadata.get("config", "{}")
+        configs = json.loads(config_str)
+        allowed_inference_steps = configs.get("allowed_inference_steps")
+    vae = CausalVideoAutoencoder.from_pretrained(ckpt_path).to("cpu")
+    transformer = Transformer3DModel.from_pretrained(ckpt_path).to("cpu")
+    scheduler = RectifiedFlowScheduler.from_pretrained(ckpt_path)
+    text_encoder_path = config["text_encoder_model_name_or_path"]
+    text_encoder = T5EncoderModel.from_pretrained(text_encoder_path, subfolder="text_encoder").to("cpu")
+    tokenizer = T5Tokenizer.from_pretrained(text_encoder_path, subfolder="tokenizer")
     patchifier = SymmetricPatchifier(patch_size=1)
+    precision = config.get("precision", "bfloat16")
     if precision == "bfloat16":
         vae.to(torch.bfloat16)
+        transformer.to(torch.bfloat16)
+        text_encoder.to(torch.bfloat16)
     pipeline = LTXVideoPipeline(
+        transformer=transformer, patchifier=patchifier, text_encoder=text_encoder,
+        tokenizer=tokenizer, scheduler=scheduler, vae=vae,
         allowed_inference_steps=allowed_inference_steps,
+        prompt_enhancer_image_caption_model=None, prompt_enhancer_image_caption_processor=None,
+        prompt_enhancer_llm_model=None, prompt_enhancer_llm_tokenizer=None,
     )
+    vae = CausalVideoAutoencoder.from_pretrained(ckpt_path).to("cpu")
     if precision == "bfloat16":
         vae.to(torch.bfloat16)
+    latent_upsampler = None
+    if config.get("spatial_upscaler_model_path"):
+        spatial_path = config["spatial_upscaler_model_path"]
+        spatial_path_str = hf_hub_download(repo_id=LTX_REPO_ID, filename=config["spatial_upscaler_model_path"], cache_dir=CACHE_DIR)
+        spatial_path = Path(spatial_path_str)
+        if not spatial_path.is_file():
+            raise FileNotFoundError(f"Main checkpoint upscaler file not found: {spatial_path_str}")
+        logging.info(f"Building UPSCALER pipeline ckpt:{spatial_path_str}")
+        latent_upsampler = create_latent_upsampler(spatial_path, device="cpu")
+        if precision == "bfloat16":
+            latent_upsampler.to(torch.bfloat16)
+    logging.info(f"LTX pipeline built on CPU in {time.perf_counter() - t0:.2f}s")
+    return pipeline, latent_upsampler, vae
 # ==============================================================================
+# --- FUNÇÕES AUXILIARES (Seed, Preparação de Imagem) ---
 # ==============================================================================
 def seed_everything(seed: int):
+    """Sets the seed for reproducibility."""
     random.seed(seed)
     os.environ['PYTHONHASHSEED'] = str(seed)
     np.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
 def load_image_to_tensor_with_resize_and_crop(
     image_input: Union[str, Image.Image],
     target_height: int,
     target_width: int,
 ) -> torch.Tensor:
+    """Loads and processes an image into a 5D pixel tensor compatible with the LTX pipeline."""
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
+        image = image_input
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
+        x_start, y_start = (input_width - new_width) // 2, 0
     else:
+        new_width, new_height = input_width, int(input_width / aspect_ratio_target)
+        x_start, y_start = 0, (input_height - new_height) // 2
+    image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
+    frame_tensor = TVF.to_tensor(image)  # PIL -> tensor (C, H, W) in [0, 1] range
+    frame_tensor = TVF.gaussian_blur(frame_tensor, kernel_size=(3, 3))
+    frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
+    frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
+    frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
+    # Normalize to [-1, 1] range, which the VAE expects for encoding
     frame_tensor = (frame_tensor * 2.0) - 1.0
+    # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
+    return frame_tensor.unsqueeze(0).unsqueeze(2)

api/ltx/vae_aduc_pipeline.py CHANGED Viewed

@@ -5,39 +5,36 @@
 import logging
 import time
 import torch
-import os
 import torchvision.transforms.functional as TVF
 from PIL import Image
-from typing import List, Union, Tuple, Literal, Optional
 from dataclasses import dataclass
-from pathlib import Path
 import sys
-# O cliente importa o MANAGER para submeter os trabalhos ao pool de workers.
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
-# --- Adiciona o path do LTX-Video para importações de baixo nível ---
-LTX_VIDEO_REPO_DIR = Path("/data/LTX-Video")
-def add_deps_to_path():
-    repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
-    if repo_path not in sys.path:
-        sys.path.insert(0, repo_path)
-add_deps_to_path()
-# Importações para anotação de tipos e para as funções de trabalho (jobs).
-from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
-from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
-import ltx_video.pipelines.crf_compressor as crf_compressor
 # ==============================================================================
-# --- DEFINIÇÕES DE ESTRUTURA E HELPERS ---
 # ==============================================================================
 @dataclass
 class LatentConditioningItem:
     """
     Estrutura de dados para passar latentes condicionados entre serviços.
-    O tensor latente é mantido na CPU para economizar VRAM entre as etapas.
     """
     latent_tensor: torch.Tensor
     media_frame_number: int
@@ -50,32 +47,32 @@ def load_image_to_tensor_with_resize_and_crop(
 ) -> torch.Tensor:
     """
     Carrega e processa uma imagem para um tensor de pixel 5D, normalizado para [-1, 1],
-    pronto para ser enviado ao VAE para encoding.
     """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
-        image = image_input.convert("RGB")
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
-    # Lógica de corte e redimensionamento para manter a proporção
     input_width, input_height = image.size
     aspect_ratio_target = target_width / target_height
     aspect_ratio_frame = input_width / input_height
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
-        x_start = (input_width - new_width) // 2
-        image = image.crop((x_start, 0, x_start + new_width, new_height))
     else:
-        new_height = int(input_width / aspect_ratio_target)
-        y_start = (input_height - new_height) // 2
-        image = image.crop((0, y_start, input_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
-    # Conversão para tensor e normalização
     frame_tensor = TVF.to_tensor(image)
     frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
     frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
     frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
@@ -83,20 +80,21 @@ def load_image_to_tensor_with_resize_and_crop(
     frame_tensor = (frame_tensor * 2.0) - 1.0
     return frame_tensor.unsqueeze(0).unsqueeze(2)
 # ==============================================================================
-# --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool de VAE) ---
 # ==============================================================================
 def _job_encode_media(vae: CausalVideoAutoencoder, pixel_tensor: torch.Tensor) -> torch.Tensor:
-    """Job que codifica um tensor de pixel em um tensor latente."""
     device = vae.device
     dtype = vae.dtype
     pixel_tensor_gpu = pixel_tensor.to(device, dtype=dtype)
     latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
     return latents.cpu()
-def _job_decode_latent(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor) -> torch.Tensor:
-    """Job que decodifica um tensor latente em um tensor de pixels."""
     device = vae.device
     dtype = vae.dtype
     latent_tensor_gpu = latent_tensor.to(device, dtype=dtype)
@@ -108,17 +106,14 @@ def _job_decode_latent(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor)
 # ==============================================================================
 class VaeAducPipeline:
-    """
-    Cliente de alto nível para orquestrar todas as tarefas relacionadas ao VAE.
-    Ele define a lógica de negócios e submete os trabalhos ao LTXAducManager.
-    """
     def __init__(self):
         logging.info("✅ VAE ADUC Pipeline (Client) initialized and ready to submit jobs.")
         pass
     def __call__(
         self,
-        media: Union[torch.Tensor, List[Union[Image.Image, str]]],
         task: Literal['encode', 'decode', 'create_conditioning_items'],
         target_resolution: Optional[Tuple[int, int]] = (512, 512),
         conditioning_params: Optional[List[Tuple[int, float]]] = None
@@ -131,7 +126,7 @@ class VaeAducPipeline:
             task: A tarefa a executar ('encode', 'decode', 'create_conditioning_items').
             target_resolution: A resolução (altura, largura) para o pré-processamento.
             conditioning_params: Para 'create_conditioning_items', uma lista de tuplas
-                                 (frame_number, strength) para cada item de mídia.
         Returns:
             O resultado da tarefa, sempre na CPU.
@@ -142,13 +137,16 @@ class VaeAducPipeline:
         if task == 'encode':
             if not isinstance(media, list): media = [media]
             pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, target_resolution[0], target_resolution[1]) for m in media]
-            results = [ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_encode_media, pixel_tensor=pt) for pt in pixel_tensors]
             return results
         elif task == 'decode':
             if not isinstance(media, torch.Tensor):
-                raise TypeError("Para a tarefa 'decode', 'media' deve ser um único tensor latente.")
-            return ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_decode_latent, latent_tensor=media)
         elif task == 'create_conditioning_items':
             if not isinstance(media, list) or not isinstance(conditioning_params, list) or len(media) != len(conditioning_params):

 import logging
 import time
 import torch
 import torchvision.transforms.functional as TVF
 from PIL import Image
+from typing import List, Union, Tuple, Literal
 from dataclasses import dataclass
+import os
+import subprocess
 import sys
+from pathlib import Path
 from api.ltx.ltx_aduc_manager import ltx_aduc_manager
+DEPS_DIR = Path("/data")
+LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
+repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
+if str(LTX_VIDEO_REPO_DIR.resolve()) not in sys.path:
+    sys.path.insert(0, repo_path)
+    print(f"[DEBUG] Repo adicionado ao sys.path: {repo_path}")
+    from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
+    from ltx_video.models.autoencoders.vae_encode import vae_encode, vae_decode
+    import ltx_video.pipelines.crf_compressor as crf_compressor
 # ==============================================================================
+# --- DEFINIÇÕES DE ESTRUTURA E HELPERS (Importadas ou movidas para cá) ---
 # ==============================================================================
 @dataclass
 class LatentConditioningItem:
     """
     Estrutura de dados para passar latentes condicionados entre serviços.
+    O tensor latente é mantido na CPU para economizar VRAM.
     """
     latent_tensor: torch.Tensor
     media_frame_number: int
 ) -> torch.Tensor:
     """
     Carrega e processa uma imagem para um tensor de pixel 5D, normalizado para [-1, 1],
+    pronto para ser enviado ao VAE.
     """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
+        image = image_input
     else:
         raise ValueError("image_input must be a file path or a PIL Image object")
     input_width, input_height = image.size
     aspect_ratio_target = target_width / target_height
     aspect_ratio_frame = input_width / input_height
     if aspect_ratio_frame > aspect_ratio_target:
         new_width, new_height = int(input_height * aspect_ratio_target), input_height
+        x_start, y_start = (input_width - new_width) // 2, 0
     else:
+        new_width, new_height = input_width, int(input_width / aspect_ratio_target)
+        x_start, y_start = 0, (input_height - new_height) // 2
+    image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     image = image.resize((target_width, target_height), Image.Resampling.LANCZOS)
     frame_tensor = TVF.to_tensor(image)
+    frame_tensor = TVF.gaussian_blur(frame_tensor, kernel_size=(3, 3))
     frame_tensor_hwc = frame_tensor.permute(1, 2, 0)
     frame_tensor_hwc = crf_compressor.compress(frame_tensor_hwc)
     frame_tensor = frame_tensor_hwc.permute(2, 0, 1)
     frame_tensor = (frame_tensor * 2.0) - 1.0
     return frame_tensor.unsqueeze(0).unsqueeze(2)
 # ==============================================================================
+# --- FUNÇÕES DE TRABALHO (Jobs a serem executados no Pool) ---
 # ==============================================================================
 def _job_encode_media(vae: CausalVideoAutoencoder, pixel_tensor: torch.Tensor) -> torch.Tensor:
+    """Função de trabalho genérica para codificar um tensor de pixel."""
     device = vae.device
     dtype = vae.dtype
     pixel_tensor_gpu = pixel_tensor.to(device, dtype=dtype)
     latents = vae_encode(pixel_tensor_gpu, vae, vae_per_channel_normalize=True)
     return latents.cpu()
+def _job_decode_latent_to_pixels(vae: CausalVideoAutoencoder, latent_tensor: torch.Tensor) -> torch.Tensor:
+    """Função de trabalho para decodificar um tensor latente."""
     device = vae.device
     dtype = vae.dtype
     latent_tensor_gpu = latent_tensor.to(device, dtype=dtype)
 # ==============================================================================
 class VaeAducPipeline:
+    """Cliente de alto nível para orquestrar todas as tarefas de VAE."""
     def __init__(self):
         logging.info("✅ VAE ADUC Pipeline (Client) initialized and ready to submit jobs.")
         pass
     def __call__(
         self,
+        media: Union[torch.Tensor, List[Union[Image.Image, torch.Tensor]]],
         task: Literal['encode', 'decode', 'create_conditioning_items'],
         target_resolution: Optional[Tuple[int, int]] = (512, 512),
         conditioning_params: Optional[List[Tuple[int, float]]] = None
             task: A tarefa a executar ('encode', 'decode', 'create_conditioning_items').
             target_resolution: A resolução (altura, largura) para o pré-processamento.
             conditioning_params: Para 'create_conditioning_items', uma lista de tuplas
+                                 (frame_number, strength) correspondente a cada item de mídia.
         Returns:
             O resultado da tarefa, sempre na CPU.
         if task == 'encode':
             if not isinstance(media, list): media = [media]
             pixel_tensors = [load_image_to_tensor_with_resize_and_crop(m, target_resolution[0], target_resolution[1]) for m in media]
+            results = []
+            for pt in pixel_tensors:
+                latent = ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_encode_media, pixel_tensor=pt)
+                results.append(latent)
             return results
         elif task == 'decode':
             if not isinstance(media, torch.Tensor):
+                raise TypeError("Para 'decode', 'media' deve ser um único tensor latente.")
+            return ltx_aduc_manager.submit_job(job_type='vae', job_func=_job_decode_latent_to_pixels, latent_tensor=media)
         elif task == 'create_conditioning_items':
             if not isinstance(media, list) or not isinstance(conditioning_params, list) or len(media) != len(conditioning_params):