Test

Paused

App Files Files Community

eeuuia commited on Oct 11

Commit

7c1bfd4

verified ·

1 Parent(s): 79815c9

Update api/ltx_server_refactored_complete.py

Browse files

Files changed (1) hide show

api/ltx_server_refactored_complete.py +74 -69

api/ltx_server_refactored_complete.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # FILE: api/ltx_server_refactored_complete.py
-# DESCRIPTION: Final orchestrator for LTX-Video generation.
-# Features path resolution for cached models, dedicated VAE device logic,
-# delegation to utility modules, and advanced debug logging.
 import gc
 import json
@@ -13,7 +13,7 @@ import tempfile
 import time
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
-import random
 import torch
 import yaml
 import numpy as np
@@ -24,7 +24,6 @@ from huggingface_hub import hf_hub_download
 # ==============================================================================
 # Configuração de logging e supressão de warnings
-# (Pode ser removido se o logging for configurado globalmente)
 import warnings
 warnings.filterwarnings("ignore")
 logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
@@ -179,31 +178,46 @@ class VideoService:
             except Exception: pass
     # ==========================================================================
-    # --- LÓGICA DE NEGÓCIO: ORQUESTRADORES PÚBLICOS ---
     # ==========================================================================
     @log_function_io
-    def generate_narrative_low(self, prompt: str, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
-        """Orchestrates the generation of a video from a multi-line prompt (sequence of scenes)."""
-        logging.info("Starting narrative low-res generation...")
-        used_seed = self._resolve_seed(kwargs.get("seed"))
         seed_everything(used_seed)
         prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
         if not prompt_list: raise ValueError("Prompt is empty or contains no valid lines.")
         num_chunks = len(prompt_list)
         total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0))
-        frames_per_chunk = (total_frames // num_chunks // FRAMES_ALIGNMENT) * FRAMES_ALIGNMENT
-        overlap_frames = self.config.get("overlap_frames", 8)
         temp_latent_paths = []
         overlap_condition_item = None
         try:
             for i, chunk_prompt in enumerate(prompt_list):
-                logging.info(f"Generating narrative chunk {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
-                current_frames = frames_per_chunk + (overlap_frames if i > 0 else 0)
                 current_conditions = kwargs.get("initial_conditions", []) if i == 0 else []
                 if overlap_condition_item: current_conditions.append(overlap_condition_item)
@@ -211,9 +225,9 @@ class VideoService:
                     prompt=chunk_prompt, num_frames=current_frames, seed=used_seed + i,
                     conditioning_items=current_conditions, **kwargs
                 )
-                if chunk_latents is None: raise RuntimeError(f"Failed to generate latents for chunk {i+1}.")
-                if i < num_chunks - 1:
                     overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
                     overlap_condition_item = ConditioningItem(media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
@@ -223,46 +237,23 @@ class VideoService:
                 torch.save(chunk_latents.cpu(), chunk_path)
                 temp_latent_paths.append(chunk_path)
-            return self._finalize_generation(temp_latent_paths, "narrative_video", used_seed)
         except Exception as e:
-            logging.error(f"Error during narrative generation: {e}", exc_info=True)
             return None, None, None
         finally:
             for path in temp_latent_paths:
                 if path.exists(): path.unlink()
             self.finalize()
-    @log_function_io
-    def generate_single_low(self, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
-        """Orchestrates the generation of a video from a single prompt in one go."""
-        logging.info("Starting single-prompt low-res generation...")
-        used_seed = self._resolve_seed(kwargs.get("seed"))
-        seed_everything(used_seed)
-        try:
-            total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0), min_frames=9)
-            final_latents = self._generate_single_chunk_low(
-                num_frames=total_frames, seed=used_seed,
-                conditioning_items=kwargs.get("initial_conditions", []), **kwargs
-            )
-            if final_latents is None: raise RuntimeError("Failed to generate latents.")
-            temp_latent_path = RESULTS_DIR / f"temp_single_{used_seed}.pt"
-            torch.save(final_latents.cpu(), temp_latent_path)
-            return self._finalize_generation([temp_latent_path], "single_video", used_seed)
-        except Exception as e:
-            logging.error(f"Error during single generation: {e}", exc_info=True)
-            return None, None, None
-        finally:
-            self.finalize()
     # ==========================================================================
     # --- UNIDADES DE TRABALHO E HELPERS INTERNOS ---
     # ==========================================================================
     @log_function_io
     def _generate_single_chunk_low(self, **kwargs) -> Optional[torch.Tensor]:
-        """Calls the pipeline to generate a single chunk of latents."""
         height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
         downscale_factor = self.config.get("downscale_factor", 0.6666666)
         vae_scale_factor = self.pipeline.vae_scale_factor
@@ -271,7 +262,7 @@ class VideoService:
         first_pass_config = self.config.get("first_pass", {}).copy()
         if kwargs.get("ltx_configs_override"):
-            first_pass_config.update(self._prepare_guidance_overrides(kwargs["ltx_configs_override"]))
         pipeline_kwargs = {
             "prompt": kwargs['prompt'], "negative_prompt": kwargs['negative_prompt'],
@@ -304,40 +295,53 @@ class VideoService:
     @log_function_io
     def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int) -> List[ConditioningItem]:
         if not items_list: return []
         height_padded, width_padded = self._align(height), self._align(width)
         padding_values = calculate_padding(height, width, height_padded, width_padded)
         conditioning_items = []
-        for media, frame, weight in items_list:
-            tensor = self._prepare_conditioning_tensor(media, height, width, padding_values)
             safe_frame = max(0, min(int(frame), num_frames - 1))
             conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
         return conditioning_items
-    @log_function_io
-    def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
-        tensor = load_image_to_tensor_with_resize_and_crop(media_path, height, width)
-        tensor = torch.nn.functional.pad(tensor, padding)
-        return tensor.to(self.main_device, dtype=self.runtime_autocast_dtype)
-    def _prepare_guidance_overrides(self, ltx_configs: Dict) -> Dict:
-        overrides = {}
-        preset = ltx_configs.get("guidance_preset", "Padrão (Recomendado)")
         if preset == "Agressivo":
-            overrides["guidance_scale"] = [1, 2, 8, 12, 8, 2, 1]
-            overrides["stg_scale"] = [0, 0, 5, 6, 5, 3, 2]
         elif preset == "Suave":
-            overrides["guidance_scale"] = [1, 1, 4, 5, 4, 1, 1]
-            overrides["stg_scale"] = [0, 0, 2, 2, 2, 1, 0]
         elif preset == "Customizado":
             try:
-                overrides["guidance_scale"] = json.loads(ltx_configs["guidance_scale_list"])
-                overrides["stg_scale"] = json.loads(ltx_configs["stg_scale_list"])
-            except (json.JSONDecodeError, KeyError) as e:
-                logging.warning(f"Failed to parse custom guidance values: {e}. Falling back to defaults.")
-        if overrides: logging.info(f"Applying '{preset}' guidance preset overrides.")
-        return overrides
     def _save_and_log_video(self, pixel_tensor: torch.Tensor, base_filename: str) -> Path:
         with tempfile.TemporaryDirectory() as temp_dir:
@@ -361,10 +365,11 @@ class VideoService:
     def _calculate_aligned_frames(self, duration_s: float, min_frames: int = 1) -> int:
         num_frames = int(round(duration_s * DEFAULT_FPS))
         aligned_frames = self._align(num_frames)
-        return max(aligned_frames + 1, min_frames)
-    def _resolve_seed(self, seed: Optional[int]) -> int:
-        return random.randint(0, 2**32 - 1) if seed is None else int(seed)
 # ==============================================================================
 # --- INSTANCIAÇÃO SINGLETON ---
@@ -374,4 +379,4 @@ try:
     logging.info("Global VideoService orchestrator instance created successfully.")
 except Exception as e:
     logging.critical(f"Failed to initialize VideoService: {e}", exc_info=True)
-    sys.exit(1)

 # FILE: api/ltx_server_refactored_complete.py
+# DESCRIPTION: Final high-level orchestrator for LTX-Video generation.
+# This version features a unified generation workflow, random seed generation,
+# delegation to specialized modules, and advanced debugging capabilities.
 import gc
 import json
 import time
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 import torch
 import yaml
 import numpy as np
 # ==============================================================================
 # Configuração de logging e supressão de warnings
 import warnings
 warnings.filterwarnings("ignore")
 logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
             except Exception: pass
     # ==========================================================================
+    # --- LÓGICA DE NEGÓCIO: ORQUESTRADOR PÚBLICO UNIFICADO ---
     # ==========================================================================
     @log_function_io
+    def generate_low_resolution(self, prompt: str, **kwargs) -> Tuple[Optional[str], Optional[str], Optional[int]]:
+        """
+        [UNIFIED ORCHESTRATOR] Generates a low-resolution video from a prompt.
+        Handles both single-line and multi-line prompts transparently.
+        """
+        logging.info("Starting unified low-resolution generation (random seed)...")
+        used_seed = self._get_random_seed()
         seed_everything(used_seed)
+        logging.info(f"Using randomly generated seed: {used_seed}")
         prompt_list = [p.strip() for p in prompt.splitlines() if p.strip()]
         if not prompt_list: raise ValueError("Prompt is empty or contains no valid lines.")
+        is_narrative = len(prompt_list) > 1
+        logging.info(f"Generation mode detected: {'Narrative' if is_narrative else 'Simple'} ({len(prompt_list)} scene(s)).")
         num_chunks = len(prompt_list)
         total_frames = self._calculate_aligned_frames(kwargs.get("duration", 4.0))
+        frames_per_chunk = max(FRAMES_ALIGNMENT, (total_frames // num_chunks // FRAMES_ALIGNMENT) * FRAMES_ALIGNMENT)
+        overlap_frames = self.config.get("overlap_frames", 8) if is_narrative else 0
         temp_latent_paths = []
         overlap_condition_item = None
         try:
             for i, chunk_prompt in enumerate(prompt_list):
+                logging.info(f"Processing scene {i+1}/{num_chunks}: '{chunk_prompt[:50]}...'")
+                if i == num_chunks - 1:
+                    processed_frames = (num_chunks - 1) * frames_per_chunk
+                    current_frames = total_frames - processed_frames
+                else:
+                    current_frames = frames_per_chunk
+                if i > 0: current_frames += overlap_frames
                 current_conditions = kwargs.get("initial_conditions", []) if i == 0 else []
                 if overlap_condition_item: current_conditions.append(overlap_condition_item)
                     prompt=chunk_prompt, num_frames=current_frames, seed=used_seed + i,
                     conditioning_items=current_conditions, **kwargs
                 )
+                if chunk_latents is None: raise RuntimeError(f"Failed to generate latents for scene {i+1}.")
+                if is_narrative and i < num_chunks - 1:
                     overlap_latents = chunk_latents[:, :, -overlap_frames:, :, :].clone()
                     overlap_condition_item = ConditioningItem(media_item=overlap_latents, media_frame_number=0, conditioning_strength=1.0)
                 torch.save(chunk_latents.cpu(), chunk_path)
                 temp_latent_paths.append(chunk_path)
+            base_filename = "narrative_video" if is_narrative else "single_video"
+            return self._finalize_generation(temp_latent_paths, base_filename, used_seed)
         except Exception as e:
+            logging.error(f"Error during unified generation: {e}", exc_info=True)
             return None, None, None
         finally:
             for path in temp_latent_paths:
                 if path.exists(): path.unlink()
             self.finalize()
     # ==========================================================================
     # --- UNIDADES DE TRABALHO E HELPERS INTERNOS ---
     # ==========================================================================
     @log_function_io
     def _generate_single_chunk_low(self, **kwargs) -> Optional[torch.Tensor]:
+        """[WORKER] Calls the pipeline to generate a single chunk of latents."""
         height_padded, width_padded = (self._align(d) for d in (kwargs['height'], kwargs['width']))
         downscale_factor = self.config.get("downscale_factor", 0.6666666)
         vae_scale_factor = self.pipeline.vae_scale_factor
         first_pass_config = self.config.get("first_pass", {}).copy()
         if kwargs.get("ltx_configs_override"):
+            self._apply_ui_overrides(first_pass_config, kwargs["ltx_configs_override"])
         pipeline_kwargs = {
             "prompt": kwargs['prompt'], "negative_prompt": kwargs['negative_prompt'],
     @log_function_io
     def prepare_condition_items(self, items_list: List, height: int, width: int, num_frames: int) -> List[ConditioningItem]:
+        """[UNIFIED] Prepares ConditioningItems from a mixed list of file paths and tensors."""
         if not items_list: return []
         height_padded, width_padded = self._align(height), self._align(width)
         padding_values = calculate_padding(height, width, height_padded, width_padded)
         conditioning_items = []
+        for media_item, frame, weight in items_list:
+            if isinstance(media_item, str):
+                tensor = load_image_to_tensor_with_resize_and_crop(media_item, height, width)
+                tensor = torch.nn.functional.pad(tensor, padding_values)
+                tensor = tensor.to(self.main_device, dtype=self.runtime_autocast_dtype)
+            elif isinstance(media_item, torch.Tensor):
+                tensor = media_item.to(self.main_device, dtype=self.runtime_autocast_dtype)
+            else:
+                logging.warning(f"Unknown conditioning media type: {type(media_item)}. Skipping.")
+                continue
             safe_frame = max(0, min(int(frame), num_frames - 1))
             conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
         return conditioning_items
+    def _apply_ui_overrides(self, config_dict: Dict, overrides: Dict):
+        """Applies advanced settings from the UI to a config dictionary."""
+        # Override step counts
+        for key in ["num_inference_steps", "skip_initial_inference_steps", "skip_final_inference_steps"]:
+            ui_value = overrides.get(key)
+            if ui_value and ui_value > 0:
+                config_dict[key] = ui_value
+                logging.info(f"Override: '{key}' set to {ui_value} by UI.")
+        # Override guidance settings
+        preset = overrides.get("guidance_preset", "Padrão (Recomendado)")
+        guidance_overrides = {}
         if preset == "Agressivo":
+            guidance_overrides = {"guidance_scale": [1, 2, 8, 12, 8, 2, 1], "stg_scale": [0, 0, 5, 6, 5, 3, 2]}
         elif preset == "Suave":
+            guidance_overrides = {"guidance_scale": [1, 1, 4, 5, 4, 1, 1], "stg_scale": [0, 0, 2, 2, 2, 1, 0]}
         elif preset == "Customizado":
             try:
+                guidance_overrides["guidance_scale"] = json.loads(overrides["guidance_scale_list"])
+                guidance_overrides["stg_scale"] = json.loads(overrides["stg_scale_list"])
+            except Exception as e:
+                logging.warning(f"Failed to parse custom guidance values: {e}. Using defaults.")
+        if guidance_overrides:
+            config_dict.update(guidance_overrides)
+            logging.info(f"Applying '{preset}' guidance preset overrides.")
     def _save_and_log_video(self, pixel_tensor: torch.Tensor, base_filename: str) -> Path:
         with tempfile.TemporaryDirectory() as temp_dir:
     def _calculate_aligned_frames(self, duration_s: float, min_frames: int = 1) -> int:
         num_frames = int(round(duration_s * DEFAULT_FPS))
         aligned_frames = self._align(num_frames)
+        return max(aligned_frames, min_frames)
+    def _get_random_seed(self) -> int:
+        """Always generates and returns a new random seed."""
+        return random.randint(0, 2**32 - 1)
 # ==============================================================================
 # --- INSTANCIAÇÃO SINGLETON ---
     logging.info("Global VideoService orchestrator instance created successfully.")
 except Exception as e:
     logging.critical(f"Failed to initialize VideoService: {e}", exc_info=True)
+    sys.exit(1)```