eeuuia commited on
Commit
3b5ff37
·
verified ·
1 Parent(s): 3f8bf2e

Update api/ltx_server_refactored_complete.py

Browse files
Files changed (1) hide show
  1. api/ltx_server_refactored_complete.py +419 -190
api/ltx_server_refactored_complete.py CHANGED
@@ -1,214 +1,443 @@
1
- # FILE: api/vince_pool_manager.py
2
- # DESCRIPTION: Singleton manager for a pool of VINCIE workers, integrated with a central GPU manager.
 
3
 
4
- import os
5
- import sys
6
  import gc
 
 
 
 
 
 
7
  import subprocess
8
- import threading
 
 
 
 
9
  from pathlib import Path
10
- from typing import List
11
 
12
  import torch
13
- from omegaconf import open_dict
 
 
 
 
 
 
 
14
 
15
- # --- Import do Gerenciador Central de GPUs ---
16
- # Esta é a peça chave da integração. O Pool Manager perguntará a ele quais GPUs usar.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  try:
18
  from api.gpu_manager import gpu_manager
 
 
 
 
 
19
  except ImportError as e:
20
- print(f"ERRO CRÍTICO: Não foi possível importar o gpu_manager. {e}", file=sys.stderr)
21
  sys.exit(1)
22
 
23
- # --- Configurações Globais (Lidas do Ambiente) ---
24
- VINCIE_DIR = Path(os.getenv("VINCIE_DIR", "/data/VINCIE"))
25
- VINCIE_CKPT_DIR = Path(os.getenv("VINCIE_CKPT_DIR", "/data/ckpt/VINCIE-3B"))
26
-
27
- # --- Classe Worker (Gerencia uma única GPU de forma isolada) ---
28
- class VinceWorker:
29
- """
30
- Gerencia uma única instância da pipeline VINCIE em um dispositivo GPU específico.
31
- Opera em um ambiente "isolado" para garantir que só veja sua própria GPU.
32
- """
33
- def __init__(self, device_id: str, config_path: str):
34
- self.device_id_str = device_id
35
- self.gpu_index_str = self.device_id_str.split(':')[-1]
36
- self.config_path = config_path
37
- self.gen = None
38
- self.config = None
39
- print(f"[VinceWorker-{self.device_id_str}] Inicializado. Mapeado para o índice de GPU físico {self.gpu_index_str}.")
40
-
41
- def _execute_in_isolated_env(self, function_to_run, *args, **kwargs):
42
- """
43
- Wrapper crucial que define CUDA_VISIBLE_DEVICES para isolar a visibilidade da GPU.
44
- Isso garante que o PyTorch e o VINCIE só possam usar a GPU designada para este worker.
45
- """
46
- original_cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES')
47
- try:
48
- os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_index_str
49
- if torch.cuda.is_available():
50
- # Dentro deste contexto, 'cuda:0' refere-se à nossa GPU alvo, pois é a única visível.
51
- torch.cuda.set_device(0)
52
- return function_to_run(*args, **kwargs)
53
- finally:
54
- # Restaura o ambiente original para não afetar outros threads/processos.
55
- if original_cuda_visible is not None:
56
- os.environ['CUDA_VISIBLE_DEVICES'] = original_cuda_visible
57
- elif 'CUDA_VISIBLE_DEVICES' in os.environ:
58
- del os.environ['CUDA_VISIBLE_DEVICES']
59
-
60
- def _load_model_task(self):
61
- """Tarefa de carregamento do modelo, executada no ambiente isolado."""
62
- print(f"[VinceWorker-{self.device_id_str}] Carregando modelo para VRAM (GPU física visível: {self.gpu_index_str})...")
63
- # O dispositivo para o VINCIE será 'cuda:0' porque é a única GPU que este processo pode ver.
64
- device_for_vincie = 'cuda:0' if torch.cuda.is_available() else 'cpu'
65
-
66
- original_cwd = Path.cwd()
67
  try:
68
- # O código do VINCIE pode precisar ser executado de seu próprio diretório.
69
- os.chdir(str(VINCIE_DIR))
70
- # Adiciona o diretório ao path do sistema para encontrar os módulos do VINCIE.
71
- if str(VINCIE_DIR) not in sys.path: sys.path.insert(0, str(VINCIE_DIR))
 
 
 
 
72
 
73
- from common.config import load_config, create_object
74
-
75
- cfg = load_config(self.config_path, [f"device='{device_for_vincie}'"])
76
- self.gen = create_object(cfg)
77
- self.config = cfg
78
 
79
- # Executa os passos de configuração internos do VINCIE.
80
- for name in ("configure_persistence", "configure_models", "configure_diffusion"):
81
- getattr(self.gen, name)()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- self.gen.to(torch.device(device_for_vincie))
84
- print(f"[VinceWorker-{self.device_id_str}] ✅ Modelo VINCIE 'quente' e pronto na GPU física {self.gpu_index_str}.")
85
- finally:
86
- os.chdir(original_cwd) # Restaura o diretório de trabalho original.
87
-
88
- def load_model_to_gpu(self):
89
- """Método público para carregar o modelo, garantindo o isolamento da GPU."""
90
- if self.gen is None:
91
- self._execute_in_isolated_env(self._load_model_task)
92
-
93
- def _infer_task(self, **kwargs) -> Path:
94
- """Tarefa de inferência, executada no ambiente isolado."""
95
- original_cwd = Path.cwd()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  try:
97
- os.chdir(str(VINCIE_DIR))
98
-
99
- # Atualiza a configuração do gerador com os parâmetros da chamada atual.
100
- with open_dict(self.gen.config):
101
- self.gen.config.generation.output.dir = str(kwargs["output_dir"])
102
- image_paths = kwargs.get("image_path", [])
103
- self.gen.config.generation.positive_prompt.image_path = [str(p) for p in image_paths] if isinstance(image_paths, list) else [str(image_paths)]
104
- if "prompts" in kwargs:
105
- self.gen.config.generation.positive_prompt.prompts = list(kwargs["prompts"])
106
- if "cfg_scale" in kwargs and kwargs["cfg_scale"] is not None:
107
- self.gen.config.diffusion.cfg.scale = float(kwargs["cfg_scale"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # Inicia o loop de inferência do VINCIE.
110
- self.gen.inference_loop()
111
- return Path(kwargs["output_dir"])
 
 
112
  finally:
113
- os.chdir(original_cwd)
114
- # Limpeza de memória após a inferência.
115
- gc.collect()
116
- if torch.cuda.is_available():
117
- torch.cuda.empty_cache()
118
-
119
- def infer(self, **kwargs) -> Path:
120
- """Método público para iniciar a inferência, garantindo o isolamento da GPU."""
121
- if self.gen is None:
122
- raise RuntimeError(f"Modelo no worker {self.device_id_str} não foi carregado.")
123
- return self._execute_in_isolated_env(self._infer_task, **kwargs)
124
-
125
-
126
- # --- Classe Pool Manager (A Orquestradora Singleton) ---
127
- class VincePoolManager:
128
- _instance = None
129
- _lock = threading.Lock()
130
-
131
- def __new__(cls, *args, **kwargs):
132
- with cls._lock:
133
- if cls._instance is None:
134
- cls._instance = super().__new__(cls)
135
- cls._instance._initialized = False
136
- return cls._instance
137
-
138
- def __init__(self, output_root: str = "/app/outputs"):
139
- if self._initialized: return
140
- with self._lock:
141
- if self._initialized: return
142
-
143
- print("⚙️ Inicializando o VincePoolManager Singleton...")
144
- self.output_root = Path(output_root)
145
- self.output_root.mkdir(parents=True, exist_ok=True)
146
- self.worker_lock = threading.Lock()
147
- self.next_worker_idx = 0
148
-
149
- # Pergunta ao gerenciador central quais GPUs ele pode usar.
150
- self.allocated_gpu_indices = gpu_manager.get_vincie_devices()
151
 
152
- if not self.allocated_gpu_indices:
153
- # Se não houver GPUs alocadas, não podemos continuar.
154
- # O setup.py já deve ter sido executado, então não precisamos verificar dependências aqui.
155
- print("AVISO: Nenhuma GPU alocada para o VINCIE pelo GPUManager. O serviço VINCIE estará inativo.")
156
- self.workers = []
157
- self._initialized = True
158
- return
159
-
160
- devices = [f'cuda:{i}' for i in self.allocated_gpu_indices]
161
- vincie_config_path = VINCIE_DIR / "configs/generate.yaml"
162
- if not vincie_config_path.exists():
163
- raise FileNotFoundError(f"Arquivo de configuração do VINCIE não encontrado em {vincie_config_path}")
164
-
165
- self.workers = [VinceWorker(dev_id, str(vincie_config_path)) for dev_id in devices]
166
-
167
- print(f"Iniciando carregamento dos modelos em paralelo para {len(self.workers)} GPUs VINCIE...")
168
- threads = [threading.Thread(target=worker.load_model_to_gpu) for worker in self.workers]
169
- for t in threads: t.start()
170
- for t in threads: t.join()
171
 
172
- self._initialized = True
173
- print(f"✅ VincePoolManager pronto com {len(self.workers)} workers 'quentes'.")
174
-
175
- def _get_next_worker(self) -> VinceWorker:
176
- """Seleciona o próximo worker disponível usando uma estratégia round-robin."""
177
- if not self.workers:
178
- raise RuntimeError("Não workers VINCIE disponíveis para processar a tarefa.")
179
-
180
- with self.worker_lock:
181
- worker = self.workers[self.next_worker_idx]
182
- self.next_worker_idx = (self.next_worker_idx + 1) % len(self.workers)
183
- print(f"Tarefa despachada para o worker: {worker.device_id_str}")
184
- return worker
185
-
186
- def generate_multi_turn(self, input_image: str, turns: List[str], **kwargs) -> Path:
187
- """Gera um vídeo a partir de uma imagem e uma sequência de prompts (turnos)."""
188
- worker = self._get_next_worker()
189
- out_dir = self.output_root / f"vince_multi_turn_{Path(input_image).stem}_{os.urandom(4).hex()}"
190
- out_dir.mkdir(parents=True)
191
-
192
- infer_kwargs = {"output_dir": out_dir, "image_path": input_image, "prompts": turns, **kwargs}
193
- return worker.infer(**infer_kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- def generate_multi_concept(self, concept_images: List[str], concept_prompts: List[str], final_prompt: str, **kwargs) -> Path:
196
- """Gera um vídeo a partir de múltiplas imagens-conceito e um prompt final."""
197
- worker = self._get_next_worker()
198
- out_dir = self.output_root / f"vince_multi_concept_{os.urandom(4).hex()}"
199
- out_dir.mkdir(parents=True)
200
-
201
- all_prompts = concept_prompts + [final_prompt]
202
- infer_kwargs = {"output_dir": out_dir, "image_path": concept_images, "prompts": all_prompts, **kwargs}
203
- return worker.infer(**infer_kwargs)
204
-
205
- # --- Instância Singleton Global ---
206
- # A inicialização é envolvida em um try-except para evitar que a aplicação inteira quebre
207
- # se o VINCIE não puder ser inicializado por algum motivo.
 
 
 
 
 
 
 
 
 
 
 
 
208
  try:
209
- output_root_path = os.getenv("OUTPUT_ROOT", "/app/outputs")
210
- vince_pool_manager_singleton = VincePoolManager(output_root=output_root_path)
211
  except Exception as e:
212
- print(f"ERRO CRÍTICO ao inicializar o VincePoolManager: {e}", file=sys.stderr)
213
- traceback.print_exc()
214
- vince_pool_manager_singleton = None
 
1
+ # FILE: api/ltx_server_refactored_complete.py
2
+ # DESCRIPTION: Final backend service for LTX-Video generation.
3
+ # Features dedicated VAE device logic, robust initialization, and narrative chunking.
4
 
 
 
5
  import gc
6
+ import io
7
+ import json
8
+ import logging
9
+ import os
10
+ import random
11
+ import shutil
12
  import subprocess
13
+ import sys
14
+ import tempfile
15
+ import time
16
+ import traceback
17
+ import warnings
18
  from pathlib import Path
19
+ from typing import Dict, List, Optional, Tuple
20
 
21
  import torch
22
+ import yaml
23
+ import numpy as np
24
+ from einops import rearrange
25
+ from huggingface_hub import hf_hub_download
26
+
27
+ # ==============================================================================
28
+ # --- INITIAL SETUP & CONFIGURATION ---
29
+ # ==============================================================================
30
 
31
+ warnings.filterwarnings("ignore")
32
+ logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
33
+ logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
34
+
35
+ # --- CONSTANTS ---
36
+ DEPS_DIR = Path("/data")
37
+ LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
38
+ BASE_CONFIG_PATH = LTX_VIDEO_REPO_DIR / "configs"
39
+ DEFAULT_CONFIG_FILE = BASE_CONFIG_PATH / "ltxv-13b-0.9.8-distilled-fp8.yaml"
40
+ LTX_REPO_ID = "Lightricks/LTX-Video"
41
+ RESULTS_DIR = Path("/app/output")
42
+ DEFAULT_FPS = 24.0
43
+ FRAMES_ALIGNMENT = 8
44
+
45
+ # --- CRITICAL: DEPENDENCY PATH INJECTION ---
46
+ def add_deps_to_path():
47
+ """Adds the LTX repository directory to the Python system path for imports."""
48
+ repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
49
+ if repo_path not in sys.path:
50
+ sys.path.insert(0, repo_path)
51
+ logging.info(f"LTX-Video repository added to sys.path: {repo_path}")
52
+
53
+ add_deps_to_path()
54
+
55
+ # --- PROJECT IMPORTS ---
56
  try:
57
  from api.gpu_manager import gpu_manager
58
+ from ltx_video.models.autoencoders.vae_encode import (normalize_latents, un_normalize_latents)
59
+ from ltx_video.pipelines.pipeline_ltx_video import (ConditioningItem, LTXMultiScalePipeline, adain_filter_latent, create_latent_upsampler, create_ltx_video_pipeline)
60
+ from ltx_video.utils.inference_utils import load_image_to_tensor_with_resize_and_crop
61
+ from managers.vae_manager import vae_manager_singleton
62
+ from tools.video_encode_tool import video_encode_tool_singleton
63
  except ImportError as e:
64
+ logging.critical(f"A crucial LTX import failed. Check LTX-Video repo integrity. Error: {e}")
65
  sys.exit(1)
66
 
67
+ # ==============================================================================
68
+ # --- UTILITY & HELPER FUNCTIONS ---
69
+ # ==============================================================================
70
+
71
+ def seed_everything(seed: int):
72
+ """Sets the seed for reproducibility."""
73
+ random.seed(seed)
74
+ os.environ['PYTHONHASHSEED'] = str(seed)
75
+ np.random.seed(seed)
76
+ torch.manual_seed(seed)
77
+ torch.cuda.manual_seed_all(seed)
78
+ torch.backends.cudnn.deterministic = True
79
+ torch.backends.cudnn.benchmark = False
80
+
81
+ def calculate_padding(orig_h: int, orig_w: int, target_h: int, target_w: int) -> Tuple[int, int, int, int]:
82
+ """Calculates symmetric padding values."""
83
+ pad_h = target_h - orig_h
84
+ pad_w = target_w - orig_w
85
+ pad_top = pad_h // 2
86
+ pad_bottom = pad_h - pad_top
87
+ pad_left = pad_w // 2
88
+ pad_right = pad_w - pad_left
89
+ return (pad_left, pad_right, pad_top, pad_bottom)
90
+
91
+ def log_tensor_info(tensor: torch.Tensor, name: str = "Tensor"):
92
+ """Logs detailed debug information about a PyTorch tensor."""
93
+ if not isinstance(tensor, torch.Tensor):
94
+ logging.debug(f"'{name}' is not a tensor.")
95
+ return
96
+
97
+ info_str = (
98
+ f"--- Tensor: {name} ---\n"
99
+ f" - Shape: {tuple(tensor.shape)}\n"
100
+ f" - Dtype: {tensor.dtype}\n"
101
+ f" - Device: {tensor.device}\n"
102
+ )
103
+ if tensor.numel() > 0:
 
 
 
 
 
 
 
104
  try:
105
+ info_str += (
106
+ f" - Min: {tensor.min().item():.4f} | "
107
+ f"Max: {tensor.max().item():.4f} | "
108
+ f"Mean: {tensor.mean().item():.4f}\n"
109
+ )
110
+ except Exception:
111
+ pass # Fails on some dtypes
112
+ logging.debug(info_str + "----------------------")
113
 
 
 
 
 
 
114
 
115
+ # ==============================================================================
116
+ # --- VIDEO SERVICE CLASS ---
117
+ # ==============================================================================
118
+
119
+ class VideoService:
120
+ """Backend service for orchestrating video generation using the LTX-Video pipeline."""
121
+
122
+ def __init__(self):
123
+ """Initializes the service with dedicated GPU logic for main pipeline and VAE."""
124
+ t0 = time.perf_counter()
125
+ logging.info("Initializing VideoService...")
126
+ RESULTS_DIR.mkdir(parents=True, exist_ok=True)
127
+
128
+ target_main_device_str = str(gpu_manager.get_ltx_device())
129
+ target_vae_device_str = str(gpu_manager.get_ltx_vae_device())
130
+
131
+ logging.info(f"LTX allocated to devices: Main='{target_main_device_str}', VAE='{target_vae_device_str}'")
132
+
133
+ self.config = self._load_config()
134
+ self.pipeline, self.latent_upsampler = self._load_models()
135
+
136
+ self.main_device = torch.device("cpu")
137
+ self.vae_device = torch.device("cpu")
138
+
139
+ self.move_to_device(main_device_str=target_main_device_str, vae_device_str=target_vae_device_str)
140
+
141
+ self._apply_precision_policy()
142
+ vae_manager_singleton.attach_pipeline(
143
+ self.pipeline,
144
+ device=self.vae_device,
145
+ autocast_dtype=self.runtime_autocast_dtype
146
+ )
147
+ self._tmp_dirs = set()
148
+ logging.info(f"VideoService ready. Startup time: {time.perf_counter()-t0:.2f}s")
149
+
150
+ # ==========================================================================
151
+ # --- LIFECYCLE & MODEL MANAGEMENT ---
152
+ # ==========================================================================
153
+
154
+ def _load_config(self) -> Dict:
155
+ """Loads the YAML configuration file."""
156
+ config_path = DEFAULT_CONFIG_FILE
157
+ logging.info(f"Loading config from: {config_path}")
158
+ with open(config_path, "r") as file:
159
+ return yaml.safe_load(file)
160
+
161
+ def _load_models(self) -> Tuple[LTXMultiScalePipeline, Optional[torch.nn.Module]]:
162
+ """Loads models from cache to CPU."""
163
+ t0 = time.perf_counter()
164
+ logging.info("Loading LTX models from cache to CPU...")
165
+
166
+ pipeline = create_ltx_video_pipeline(
167
+ ckpt_path=self.config["checkpoint_path"],
168
+ precision=self.config["precision"],
169
+ text_encoder_model_name_or_path=self.config["text_encoder_model_name_or_path"],
170
+ sampler=self.config["sampler"],
171
+ device="cpu",
172
+ enhance_prompt=False,
173
+ )
174
+
175
+ latent_upsampler = None
176
+ if self.config.get("spatial_upscaler_model_path"):
177
+ spatial_path = self.config["spatial_upscaler_model_path"]
178
+ latent_upsampler = create_latent_upsampler(spatial_path, device="cpu")
179
+
180
+ logging.info(f"Models loaded on CPU in {time.perf_counter()-t0:.2f}s")
181
+ return pipeline, latent_upsampler
182
+
183
+ def move_to_device(self, main_device_str: str, vae_device_str: str):
184
+ """Moves pipeline components to their target devices."""
185
+ target_main_device = torch.device(main_device_str)
186
+ target_vae_device = torch.device(vae_device_str)
187
+
188
+ logging.info(f"Moving LTX models -> Main Pipeline: {target_main_device}, VAE: {target_vae_device}")
189
+
190
+ self.main_device = target_main_device
191
+ self.pipeline.to(self.main_device)
192
+
193
+ self.vae_device = target_vae_device
194
+ self.pipeline.vae.to(self.vae_device)
195
+
196
+ if self.latent_upsampler:
197
+ self.latent_upsampler.to(self.main_device)
198
 
199
+ logging.info("LTX models successfully moved to target devices.")
200
+
201
+ def move_to_cpu(self):
202
+ """Moves all LTX components to CPU to free VRAM."""
203
+ self.move_to_device(main_device_str="cpu", vae_device_str="cpu")
204
+ if torch.cuda.is_available():
205
+ torch.cuda.empty_cache()
206
+
207
+ def finalize(self):
208
+ """Cleans up GPU memory after a generation task."""
209
+ gc.collect()
210
+ if torch.cuda.is_available():
211
+ torch.cuda.empty_cache()
212
+ try: torch.cuda.ipc_collect();
213
+ except Exception: pass
214
+
215
+ # ==========================================================================
216
+ # --- PUBLIC ORCHESTRATORS ---
217
+ # ==========================================================================
218
+
219
+ def generate_narrative_low(self, prompt: str, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
220
+ """[ORCHESTRATOR] Generates a video from a multi-line prompt (sequence of scenes)."""
221
+ logging.info("Starting narrative low-res generation...")
222
+ used_seed = self._resolve_seed(kwargs.get("seed"))
223
+ seed_everything(used_seed)
224
+
225
+ prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
226
+ if not prompt_list:
227
+ raise ValueError("Prompt is empty or contains no valid lines.")
228
+
229
+ num_chunks = len(prompt_list)
230
+ total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0))
231
+ frames_per_chunk = (total_frames // num_chunks // FRAMES_ALIGNMENT) * FRAMES_ALIGNMENT
232
+ overlap_frames = self.config.get("overlap_frames", 8)
233
+
234
+ all_latents_paths = []
235
+ overlap_condition_item = None
236
+
237
  try:
238
+ for i, chunk_prompt in enumerate(prompt_list):
239
+ logging.info(f"Generating narrative chunk {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
240
+
241
+ current_frames = frames_per_chunk
242
+ if i > 0: current_frames += overlap_frames
243
+
244
+ current_conditions = kwargs.get("initial_conditions", []) if i == 0 else []
245
+ if overlap_condition_item: current_conditions.append(overlap_condition_item)
246
+
247
+ chunk_latents = self._generate_single_chunk_low(
248
+ prompt=chunk_prompt,
249
+ num_frames=current_frames,
250
+ seed=used_seed + i,
251
+ conditioning_items=current_conditions,
252
+ **kwargs
253
+ )
254
+
255
+ if chunk_latents is None: raise RuntimeError(f"Failed to generate latents for chunk {i+1}.")
256
+
257
+ if i < num_chunks - 1:
258
+ overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
259
+ overlap_condition_item = ConditioningItem(media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
260
+
261
+ if i > 0: chunk_latents = chunk_latents[:, :, overlap_frames:, :, :]
262
+
263
+ chunk_path = RESULTS_DIR / f"temp_chunk_{i}_{used_seed}.pt"
264
+ torch.save(chunk_latents.cpu(), chunk_path)
265
+ all_latents_paths.append(chunk_path)
266
 
267
+ return self._finalize_generation(all_latents_paths, "narrative_video", used_seed)
268
+
269
+ except Exception as e:
270
+ logging.error(f"Error during narrative generation: {e}", exc_info=True)
271
+ return None, None, None
272
  finally:
273
+ for path in all_latents_paths:
274
+ if path.exists(): path.unlink()
275
+ self.finalize()
276
+
277
+
278
+ def generate_single_low(self, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
279
+ """[ORCHESTRATOR] Generates a video from a single prompt in one go."""
280
+ logging.info("Starting single-prompt low-res generation...")
281
+ used_seed = self._resolve_seed(kwargs.get("seed"))
282
+ seed_everything(used_seed)
283
+
284
+ try:
285
+ total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0), min_frames=9)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
+ final_latents = self._generate_single_chunk_low(
288
+ num_frames=total_frames,
289
+ seed=used_seed,
290
+ conditioning_items=kwargs.get("initial_conditions", []),
291
+ **kwargs
292
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
293
 
294
+ if final_latents is None: raise RuntimeError("Failed to generate latents.")
295
+
296
+ latents_path = RESULTS_DIR / f"temp_single_{used_seed}.pt"
297
+ torch.save(final_latents.cpu(), latents_path)
298
+ return self._finalize_generation([latents_path], "single_video", used_seed)
299
+
300
+ except Exception as e:
301
+ logging.error(f"Error during single generation: {e}", exc_info=True)
302
+ return None, None, None
303
+ finally:
304
+ self.finalize()
305
+
306
+ # ==========================================================================
307
+ # --- INTERNAL WORKER & HELPER METHODS ---
308
+ # ==========================================================================
309
+
310
+ def _generate_single_chunk_low(
311
+ self, prompt: str, negative_prompt: str, height: int, width: int, num_frames: int, seed: int,
312
+ conditioning_items: List[ConditioningItem], ltx_configs_override: Optional[Dict], **kwargs
313
+ ) -> Optional[torch.Tensor]:
314
+ """[WORKER] Generates a single chunk of latents. This is the core generation unit."""
315
+ height_padded, width_padded = (self._align(d) for d in (height, width))
316
+ downscale_factor = self.config.get("downscale_factor", 0.6666666)
317
+ vae_scale_factor = self.pipeline.vae_scale_factor
318
+
319
+ downscaled_height = self._align(int(height_padded * downscale_factor), vae_scale_factor)
320
+ downscaled_width = self._align(int(width_padded * downscale_factor), vae_scale_factor)
321
+
322
+ first_pass_config = self.config.get("first_pass", {}).copy()
323
+ if ltx_configs_override:
324
+ first_pass_config.update(self._prepare_guidance_overrides(ltx_configs_override))
325
+
326
+ pipeline_kwargs = {
327
+ "prompt": prompt, "negative_prompt": negative_prompt,
328
+ "height": downscaled_height, "width": downscaled_width,
329
+ "num_frames": num_frames, "frame_rate": DEFAULT_FPS,
330
+ "generator": torch.Generator(device=self.main_device).manual_seed(seed),
331
+ "output_type": "latent", "conditioning_items": conditioning_items,
332
+ **first_pass_config
333
+ }
334
+
335
+ with torch.autocast(device_type=self.main_device.type, dtype=self.runtime_autocast_dtype, enabled="cuda" in self.main_device.type):
336
+ latents_raw = self.pipeline(**pipeline_kwargs).images
337
+
338
+ log_tensor_info(latents_raw, f"Raw Latents for '{prompt[:40]}...'")
339
+ return latents_raw
340
+
341
+ def _finalize_generation(self, latents_paths: List[Path], base_filename: str, seed: int) -> Tuple[str, str, int]:
342
+ """Loads latents, concatenates, decodes to video, and saves both."""
343
+ logging.info("Finalizing generation: decoding latents to video.")
344
+ all_tensors_cpu = [torch.load(p) for p in latents_paths]
345
+ final_latents = torch.cat(all_tensors_cpu, dim=2)
346
+
347
+ final_latents_path = RESULTS_DIR / f"latents_{base_filename}_{seed}.pt"
348
+ torch.save(final_latents, final_latents_path)
349
+ logging.info(f"Final latents saved to: {final_latents_path}")
350
+
351
+ # The decode method in vae_manager now handles moving the tensor to the correct VAE device.
352
+ pixel_tensor = vae_manager_singleton.decode(
353
+ final_latents,
354
+ decode_timestep=float(self.config.get("decode_timestep", 0.05))
355
+ )
356
+
357
+ video_path = self._save_and_log_video(pixel_tensor, f"{base_filename}_{seed}")
358
+ return str(video_path), str(final_latents_path), seed
359
+
360
+ def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int) -> List[ConditioningItem]:
361
+ """Prepares a list of ConditioningItem objects from file paths or tensors."""
362
+ if not items_list: return []
363
+ height_padded, width_padded = self._align(height), self._align(width)
364
+ padding_values = calculate_padding(height, width, height_padded, width_padded)
365
+
366
+ conditioning_items = []
367
+ for media, frame, weight in items_list:
368
+ tensor = self._prepare_conditioning_tensor(media, height, width, padding_values)
369
+ safe_frame = max(0, min(int(frame), num_frames - 1))
370
+ conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
371
+ return conditioning_items
372
+
373
+ def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
374
+ """Loads and processes an image to be a conditioning tensor."""
375
+ tensor = load_image_to_tensor_with_resize_and_crop(media_path, height, width)
376
+ tensor = torch.nn.functional.pad(tensor, padding)
377
+ # Conditioning tensors are needed on the main device for the transformer pass
378
+ return tensor.to(self.main_device, dtype=self.runtime_autocast_dtype)
379
+
380
+ def _prepare_guidance_overrides(self, ltx_configs: Dict) -> Dict:
381
+ """Parses UI presets for guidance into pipeline-compatible arguments."""
382
+ overrides = {}
383
+ preset = ltx_configs.get("guidance_preset", "Padrão (Recomendado)")
384
+
385
+ if preset == "Agressivo":
386
+ overrides["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
387
+ overrides["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
388
+ elif preset == "Suave":
389
+ overrides["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
390
+ overrides["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
391
+ elif preset == "Customizado":
392
+ try:
393
+ overrides["guidance_scale"] = json.loads(ltx_configs["guidance_scale_list"])
394
+ overrides["stg_scale"] = json.loads(ltx_configs["stg_scale_list"])
395
+ except (json.JSONDecodeError, KeyError) as e:
396
+ logging.warning(f"Failed to parse custom guidance values: {e}. Falling back to defaults.")
397
+
398
+ if overrides: logging.info(f"Applying '{preset}' guidance preset overrides.")
399
+ return overrides
400
+
401
+ def _save_and_log_video(self, pixel_tensor: torch.Tensor, base_filename: str) -> Path:
402
+ """Saves a pixel tensor (on CPU) to an MP4 file."""
403
+ with tempfile.TemporaryDirectory() as temp_dir:
404
+ temp_path = os.path.join(temp_dir, f"{base_filename}.mp4")
405
+ video_encode_tool_singleton.save_video_from_tensor(
406
+ pixel_tensor, temp_path, fps=DEFAULT_FPS
407
+ )
408
+ final_path = RESULTS_DIR / f"{base_filename}.mp4"
409
+ shutil.move(temp_path, final_path)
410
+ logging.info(f"Video saved successfully to: {final_path}")
411
+ return final_path
412
 
413
+ def _apply_precision_policy(self):
414
+ """Sets the autocast dtype based on the configuration file."""
415
+ precision = str(self.config.get("precision", "bfloat16")).lower()
416
+ if precision in ["float8_e4m3fn", "bfloat16"]: self.runtime_autocast_dtype = torch.bfloat16
417
+ elif precision == "mixed_precision": self.runtime_autocast_dtype = torch.float16
418
+ else: self.runtime_autocast_dtype = torch.float32
419
+ logging.info(f"Runtime precision policy set for autocast: {self.runtime_autocast_dtype}")
420
+
421
+ def _align(self, dim: int, alignment: int = FRAMES_ALIGNMENT) -> int:
422
+ """Aligns a dimension to the nearest multiple of `alignment`."""
423
+ return ((dim - 1) // alignment + 1) * alignment
424
+
425
+ def _calculate_aligned_frames(self, duration_s: float, min_frames: int = 1) -> int:
426
+ """Calculates total frames based on duration, ensuring alignment."""
427
+ num_frames = int(round(duration_s * DEFAULT_FPS))
428
+ aligned_frames = self._align(num_frames)
429
+ return max(aligned_frames + 1, min_frames)
430
+
431
+ def _resolve_seed(self, seed: Optional[int]) -> int:
432
+ """Returns the given seed or generates a new random one."""
433
+ return random.randint(0, 2**32 - 1) if seed is None else int(seed)
434
+
435
+ # ==============================================================================
436
+ # --- SINGLETON INSTANTIATION ---
437
+ # ==============================================================================
438
  try:
439
+ video_generation_service = VideoService()
440
+ logging.info("Global VideoService instance created successfully.")
441
  except Exception as e:
442
+ logging.critical(f"Failed to initialize VideoService: {e}", exc_info=True)
443
+ sys.exit(1)