Spaces:
Paused
Paused
Update api/ltx_server_refactored_complete.py
Browse files
api/ltx_server_refactored_complete.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
# FILE: api/ltx_server_refactored_complete.py
|
| 2 |
# DESCRIPTION: Final high-level orchestrator for LTX-Video generation.
|
| 3 |
-
#
|
| 4 |
-
# focusing solely on the business logic of video generation workflows.
|
| 5 |
|
| 6 |
import gc
|
| 7 |
import json
|
|
@@ -34,6 +33,7 @@ def add_deps_to_path():
|
|
| 34 |
repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
|
| 35 |
if repo_path not in sys.path:
|
| 36 |
sys.path.insert(0, repo_path)
|
|
|
|
| 37 |
logging.info(f"[ltx_server] LTX-Video repository added to sys.path: {repo_path}")
|
| 38 |
|
| 39 |
add_deps_to_path()
|
|
@@ -52,6 +52,9 @@ try:
|
|
| 52 |
ConditioningItem,
|
| 53 |
)
|
| 54 |
|
|
|
|
|
|
|
|
|
|
| 55 |
except ImportError as e:
|
| 56 |
logging.critical(f"A crucial import from the local API/architecture failed. Error: {e}", exc_info=True)
|
| 57 |
sys.exit(1)
|
|
@@ -60,9 +63,10 @@ except ImportError as e:
|
|
| 60 |
# --- FUNÇÕES AUXILIARES DO ORQUESTRADOR ---
|
| 61 |
# ==============================================================================
|
| 62 |
|
|
|
|
| 63 |
def calculate_padding(orig_h: int, orig_w: int, target_h: int, target_w: int) -> Tuple[int, int, int, int]:
|
| 64 |
"""Calculates symmetric padding required to meet target dimensions."""
|
| 65 |
-
pad_h = target_h -
|
| 66 |
pad_w = target_w - orig_w
|
| 67 |
pad_top = pad_h // 2
|
| 68 |
pad_bottom = pad_h - pad_top
|
|
@@ -80,8 +84,10 @@ class VideoService:
|
|
| 80 |
tasks to specialized managers and utility modules.
|
| 81 |
"""
|
| 82 |
|
|
|
|
| 83 |
def __init__(self):
|
| 84 |
t0 = time.perf_counter()
|
|
|
|
| 85 |
logging.info("Initializing VideoService Orchestrator...")
|
| 86 |
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 87 |
|
|
@@ -100,6 +106,7 @@ class VideoService:
|
|
| 100 |
vae_manager_singleton.attach_pipeline(self.pipeline, device=self.vae_device, autocast_dtype=self.runtime_autocast_dtype)
|
| 101 |
logging.info(f"VideoService ready. Startup time: {time.perf_counter()-t0:.2f}s")
|
| 102 |
|
|
|
|
| 103 |
def _load_config(self) -> Dict:
|
| 104 |
"""Loads the YAML configuration file."""
|
| 105 |
config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
|
|
@@ -107,6 +114,7 @@ class VideoService:
|
|
| 107 |
with open(config_path, "r") as file:
|
| 108 |
return yaml.safe_load(file)
|
| 109 |
|
|
|
|
| 110 |
def move_to_device(self, main_device_str: str, vae_device_str: str):
|
| 111 |
"""Moves pipeline components to their designated target devices."""
|
| 112 |
target_main_device = torch.device(main_device_str)
|
|
@@ -137,6 +145,7 @@ class VideoService:
|
|
| 137 |
# --- LÓGICA DE NEGÓCIO: ORQUESTRADORES PÚBLICOS ---
|
| 138 |
# ==========================================================================
|
| 139 |
|
|
|
|
| 140 |
def generate_narrative_low(self, prompt: str, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
|
| 141 |
"""Orchestrates the generation of a video from a multi-line prompt (sequence of scenes)."""
|
| 142 |
logging.info("Starting narrative low-res generation...")
|
|
@@ -186,6 +195,7 @@ class VideoService:
|
|
| 186 |
if path.exists(): path.unlink()
|
| 187 |
self.finalize()
|
| 188 |
|
|
|
|
| 189 |
def generate_single_low(self, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
|
| 190 |
"""Orchestrates the generation of a video from a single prompt in one go."""
|
| 191 |
logging.info("Starting single-prompt low-res generation...")
|
|
@@ -213,6 +223,7 @@ class VideoService:
|
|
| 213 |
# --- UNIDADES DE TRABALHO E HELPERS INTERNOS ---
|
| 214 |
# ==========================================================================
|
| 215 |
|
|
|
|
| 216 |
def _generate_single_chunk_low(self, **kwargs) -> Optional[torch.Tensor]:
|
| 217 |
"""Calls the pipeline to generate a single chunk of latents."""
|
| 218 |
height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
|
|
@@ -237,6 +248,7 @@ class VideoService:
|
|
| 237 |
|
| 238 |
return latents_raw.to(self.main_device)
|
| 239 |
|
|
|
|
| 240 |
def _finalize_generation(self, temp_latent_paths: List[Path], base_filename: str, seed: int) -> Tuple[str, str, int]:
|
| 241 |
"""Consolidates latents, decodes them to video, and saves final artifacts."""
|
| 242 |
logging.info("Finalizing generation: decoding latents to video.")
|
|
@@ -253,6 +265,7 @@ class VideoService:
|
|
| 253 |
video_path = self._save_and_log_video(pixel_tensor, f"{base_filename}_{seed}")
|
| 254 |
return str(video_path), str(final_latents_path), seed
|
| 255 |
|
|
|
|
| 256 |
def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int) -> List[ConditioningItem]:
|
| 257 |
if not items_list: return []
|
| 258 |
height_padded, width_padded = self._align(height), self._align(width)
|
|
@@ -265,6 +278,7 @@ class VideoService:
|
|
| 265 |
conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
|
| 266 |
return conditioning_items
|
| 267 |
|
|
|
|
| 268 |
def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
|
| 269 |
tensor = load_image_to_tensor_with_resize_and_crop(media_path, height, width)
|
| 270 |
tensor = torch.nn.functional.pad(tensor, padding)
|
|
@@ -273,7 +287,19 @@ class VideoService:
|
|
| 273 |
def _prepare_guidance_overrides(self, ltx_configs: Dict) -> Dict:
|
| 274 |
overrides = {}
|
| 275 |
preset = ltx_configs.get("guidance_preset", "Padrão (Recomendado)")
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
return overrides
|
| 278 |
|
| 279 |
def _save_and_log_video(self, pixel_tensor: torch.Tensor, base_filename: str) -> Path:
|
|
|
|
| 1 |
# FILE: api/ltx_server_refactored_complete.py
|
| 2 |
# DESCRIPTION: Final high-level orchestrator for LTX-Video generation.
|
| 3 |
+
# Incorporates a debug logging decorator for deep inspection of function I/O.
|
|
|
|
| 4 |
|
| 5 |
import gc
|
| 6 |
import json
|
|
|
|
| 33 |
repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
|
| 34 |
if repo_path not in sys.path:
|
| 35 |
sys.path.insert(0, repo_path)
|
| 36 |
+
# Usamos logging.info aqui, pois é uma informação importante de inicialização
|
| 37 |
logging.info(f"[ltx_server] LTX-Video repository added to sys.path: {repo_path}")
|
| 38 |
|
| 39 |
add_deps_to_path()
|
|
|
|
| 52 |
ConditioningItem,
|
| 53 |
)
|
| 54 |
|
| 55 |
+
# Nosso novo decorador de logging para depuração
|
| 56 |
+
from api.utils.debug_utils import log_function_io
|
| 57 |
+
|
| 58 |
except ImportError as e:
|
| 59 |
logging.critical(f"A crucial import from the local API/architecture failed. Error: {e}", exc_info=True)
|
| 60 |
sys.exit(1)
|
|
|
|
| 63 |
# --- FUNÇÕES AUXILIARES DO ORQUESTRADOR ---
|
| 64 |
# ==============================================================================
|
| 65 |
|
| 66 |
+
@log_function_io
|
| 67 |
def calculate_padding(orig_h: int, orig_w: int, target_h: int, target_w: int) -> Tuple[int, int, int, int]:
|
| 68 |
"""Calculates symmetric padding required to meet target dimensions."""
|
| 69 |
+
pad_h = target_h - orig_w
|
| 70 |
pad_w = target_w - orig_w
|
| 71 |
pad_top = pad_h // 2
|
| 72 |
pad_bottom = pad_h - pad_top
|
|
|
|
| 84 |
tasks to specialized managers and utility modules.
|
| 85 |
"""
|
| 86 |
|
| 87 |
+
@log_function_io
|
| 88 |
def __init__(self):
|
| 89 |
t0 = time.perf_counter()
|
| 90 |
+
# Logging de alto nível para o usuário
|
| 91 |
logging.info("Initializing VideoService Orchestrator...")
|
| 92 |
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 93 |
|
|
|
|
| 106 |
vae_manager_singleton.attach_pipeline(self.pipeline, device=self.vae_device, autocast_dtype=self.runtime_autocast_dtype)
|
| 107 |
logging.info(f"VideoService ready. Startup time: {time.perf_counter()-t0:.2f}s")
|
| 108 |
|
| 109 |
+
@log_function_io
|
| 110 |
def _load_config(self) -> Dict:
|
| 111 |
"""Loads the YAML configuration file."""
|
| 112 |
config_path = LTX_VIDEO_REPO_DIR / "configs" / "ltxv-13b-0.9.8-distilled-fp8.yaml"
|
|
|
|
| 114 |
with open(config_path, "r") as file:
|
| 115 |
return yaml.safe_load(file)
|
| 116 |
|
| 117 |
+
@log_function_io
|
| 118 |
def move_to_device(self, main_device_str: str, vae_device_str: str):
|
| 119 |
"""Moves pipeline components to their designated target devices."""
|
| 120 |
target_main_device = torch.device(main_device_str)
|
|
|
|
| 145 |
# --- LÓGICA DE NEGÓCIO: ORQUESTRADORES PÚBLICOS ---
|
| 146 |
# ==========================================================================
|
| 147 |
|
| 148 |
+
@log_function_io
|
| 149 |
def generate_narrative_low(self, prompt: str, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
|
| 150 |
"""Orchestrates the generation of a video from a multi-line prompt (sequence of scenes)."""
|
| 151 |
logging.info("Starting narrative low-res generation...")
|
|
|
|
| 195 |
if path.exists(): path.unlink()
|
| 196 |
self.finalize()
|
| 197 |
|
| 198 |
+
@log_function_io
|
| 199 |
def generate_single_low(self, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
|
| 200 |
"""Orchestrates the generation of a video from a single prompt in one go."""
|
| 201 |
logging.info("Starting single-prompt low-res generation...")
|
|
|
|
| 223 |
# --- UNIDADES DE TRABALHO E HELPERS INTERNOS ---
|
| 224 |
# ==========================================================================
|
| 225 |
|
| 226 |
+
@log_function_io
|
| 227 |
def _generate_single_chunk_low(self, **kwargs) -> Optional[torch.Tensor]:
|
| 228 |
"""Calls the pipeline to generate a single chunk of latents."""
|
| 229 |
height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
|
|
|
|
| 248 |
|
| 249 |
return latents_raw.to(self.main_device)
|
| 250 |
|
| 251 |
+
@log_function_io
|
| 252 |
def _finalize_generation(self, temp_latent_paths: List[Path], base_filename: str, seed: int) -> Tuple[str, str, int]:
|
| 253 |
"""Consolidates latents, decodes them to video, and saves final artifacts."""
|
| 254 |
logging.info("Finalizing generation: decoding latents to video.")
|
|
|
|
| 265 |
video_path = self._save_and_log_video(pixel_tensor, f"{base_filename}_{seed}")
|
| 266 |
return str(video_path), str(final_latents_path), seed
|
| 267 |
|
| 268 |
+
@log_function_io
|
| 269 |
def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int) -> List[ConditioningItem]:
|
| 270 |
if not items_list: return []
|
| 271 |
height_padded, width_padded = self._align(height), self._align(width)
|
|
|
|
| 278 |
conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
|
| 279 |
return conditioning_items
|
| 280 |
|
| 281 |
+
@log_function_io
|
| 282 |
def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
|
| 283 |
tensor = load_image_to_tensor_with_resize_and_crop(media_path, height, width)
|
| 284 |
tensor = torch.nn.functional.pad(tensor, padding)
|
|
|
|
| 287 |
def _prepare_guidance_overrides(self, ltx_configs: Dict) -> Dict:
|
| 288 |
overrides = {}
|
| 289 |
preset = ltx_configs.get("guidance_preset", "Padrão (Recomendado)")
|
| 290 |
+
if preset == "Agressivo":
|
| 291 |
+
overrides["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
|
| 292 |
+
overrides["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
|
| 293 |
+
elif preset == "Suave":
|
| 294 |
+
overrides["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
|
| 295 |
+
overrides["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
|
| 296 |
+
elif preset == "Customizado":
|
| 297 |
+
try:
|
| 298 |
+
overrides["guidance_scale"] = json.loads(ltx_configs["guidance_scale_list"])
|
| 299 |
+
overrides["stg_scale"] = json.loads(ltx_configs["stg_scale_list"])
|
| 300 |
+
except (json.JSONDecodeError, KeyError) as e:
|
| 301 |
+
logging.warning(f"Failed to parse custom guidance values: {e}. Falling back to defaults.")
|
| 302 |
+
if overrides: logging.info(f"Applying '{preset}' guidance preset overrides.")
|
| 303 |
return overrides
|
| 304 |
|
| 305 |
def _save_and_log_video(self, pixel_tensor: torch.Tensor, base_filename: str) -> Path:
|