Spaces:
Paused
Paused
Update api/ltx/ltx_aduc_pipeline.py
Browse files- api/ltx/ltx_aduc_pipeline.py +37 -30
api/ltx/ltx_aduc_pipeline.py
CHANGED
|
@@ -17,12 +17,19 @@ import traceback
|
|
| 17 |
import warnings
|
| 18 |
from pathlib import Path
|
| 19 |
from typing import Dict, List, Optional, Tuple
|
| 20 |
-
|
| 21 |
import torch
|
| 22 |
import yaml
|
| 23 |
import numpy as np
|
| 24 |
from einops import rearrange
|
| 25 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# ==============================================================================
|
| 28 |
# --- INITIAL SETUP & CONFIGURATION ---
|
|
@@ -36,40 +43,27 @@ logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
|
|
| 36 |
DEPS_DIR = Path("/data")
|
| 37 |
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
|
| 38 |
BASE_CONFIG_PATH = LTX_VIDEO_REPO_DIR / "configs"
|
| 39 |
-
DEFAULT_CONFIG_FILE = BASE_CONFIG_PATH / "ltxv-13b-0.9.8-
|
| 40 |
LTX_REPO_ID = "Lightricks/LTX-Video"
|
| 41 |
RESULTS_DIR = Path("/app/output")
|
| 42 |
DEFAULT_FPS = 24.0
|
| 43 |
FRAMES_ALIGNMENT = 8
|
| 44 |
|
| 45 |
# --- CRITICAL: DEPENDENCY PATH INJECTION ---
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
|
| 61 |
-
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 62 |
-
from transformers import T5EncoderModel, T5Tokenizer
|
| 63 |
-
from safetensors import safe_open
|
| 64 |
-
from managers.gpu_manager import gpu_manager
|
| 65 |
-
from ltx_video.models.autoencoders.vae_encode import (normalize_latents, un_normalize_latents)
|
| 66 |
-
from ltx_video.pipelines.pipeline_ltx_video import (ConditioningItem, LTXMultiScalePipeline, adain_filter_latent)
|
| 67 |
-
from ltx_video.utils.inference_utils import load_image_to_tensor_with_resize_and_crop
|
| 68 |
-
from managers.vae_manager import vae_manager_singleton
|
| 69 |
-
from tools.video_encode_tool import video_encode_tool_singleton
|
| 70 |
-
except ImportError as e:
|
| 71 |
-
logging.critical(f"A crucial LTX import failed. Check LTX-Video repo integrity. Error: {e}")
|
| 72 |
-
sys.exit(1)
|
| 73 |
|
| 74 |
# ==============================================================================
|
| 75 |
# --- UTILITY & HELPER FUNCTIONS ---
|
|
@@ -637,9 +631,22 @@ class LtxAducPipeline:
|
|
| 637 |
conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
|
| 638 |
return conditioning_items
|
| 639 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 640 |
def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
|
| 641 |
"""Loads and processes an image to be a conditioning tensor."""
|
| 642 |
-
tensor =
|
| 643 |
tensor = torch.nn.functional.pad(tensor, padding)
|
| 644 |
# Conditioning tensors are needed on the main device for the transformer pass
|
| 645 |
return tensor.to(self.main_device, dtype=self.runtime_autocast_dtype)
|
|
|
|
| 17 |
import warnings
|
| 18 |
from pathlib import Path
|
| 19 |
from typing import Dict, List, Optional, Tuple
|
|
|
|
| 20 |
import torch
|
| 21 |
import yaml
|
| 22 |
import numpy as np
|
| 23 |
from einops import rearrange
|
| 24 |
from huggingface_hub import hf_hub_download
|
| 25 |
+
from transformers import T5EncoderModel, T5Tokenizer
|
| 26 |
+
from safetensors import safe_open
|
| 27 |
+
from managers.gpu_manager import gpu_manager
|
| 28 |
+
from transformers import T5EncoderModel, T5Tokenizer
|
| 29 |
+
from safetensors import safe_open
|
| 30 |
+
from managers.gpu_manager import gpu_manager
|
| 31 |
+
|
| 32 |
+
|
| 33 |
|
| 34 |
# ==============================================================================
|
| 35 |
# --- INITIAL SETUP & CONFIGURATION ---
|
|
|
|
| 43 |
DEPS_DIR = Path("/data")
|
| 44 |
LTX_VIDEO_REPO_DIR = DEPS_DIR / "LTX-Video"
|
| 45 |
BASE_CONFIG_PATH = LTX_VIDEO_REPO_DIR / "configs"
|
| 46 |
+
DEFAULT_CONFIG_FILE = BASE_CONFIG_PATH / "ltxv-13b-0.9.8-dev-fp8.yaml"
|
| 47 |
LTX_REPO_ID = "Lightricks/LTX-Video"
|
| 48 |
RESULTS_DIR = Path("/app/output")
|
| 49 |
DEFAULT_FPS = 24.0
|
| 50 |
FRAMES_ALIGNMENT = 8
|
| 51 |
|
| 52 |
# --- CRITICAL: DEPENDENCY PATH INJECTION ---
|
| 53 |
+
|
| 54 |
+
repo_path = str(LTX_VIDEO_REPO_DIR.resolve())
|
| 55 |
+
if repo_path not in sys.path:
|
| 56 |
+
sys.path.insert(0, repo_path)
|
| 57 |
+
logging.info(f"LTX-Video repository added to sys.path: {repo_path}")
|
| 58 |
+
|
| 59 |
+
from ltx_video.pipelines.pipeline_ltx_video import LTXVideoPipeline # E outros...
|
| 60 |
+
from ltx_video.models.autoencoders.causal_video_autoencoder import CausalVideoAutoencoder
|
| 61 |
+
from ltx_video.models.transformers.transformer3d import Transformer3DModel
|
| 62 |
+
from ltx_video.models.transformers.symmetric_patchifier import SymmetricPatchifier
|
| 63 |
+
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 64 |
+
from ltx_video.models.autoencoders.vae_encode import (normalize_latents, un_normalize_latents)
|
| 65 |
+
from ltx_video.pipelines.pipeline_ltx_video import (ConditioningItem, LTXMultiScalePipeline, adain_filter_latent)
|
| 66 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
# ==============================================================================
|
| 69 |
# --- UTILITY & HELPER FUNCTIONS ---
|
|
|
|
| 631 |
conditioning_items.append(ConditioningItem(tensor, safe_frame, float(weight)))
|
| 632 |
return conditioning_items
|
| 633 |
|
| 634 |
+
def _resize_tensor(self, media_items, height, width):
|
| 635 |
+
n_frames = media_items.shape[2]
|
| 636 |
+
if media_items.shape[-2:] != (height, width):
|
| 637 |
+
media_items = rearrange(media_items, "b c n h w -> (b n) c h w")
|
| 638 |
+
media_items = F.interpolate(
|
| 639 |
+
media_items,
|
| 640 |
+
size=(height, width),
|
| 641 |
+
mode="bilinear",
|
| 642 |
+
align_corners=False,
|
| 643 |
+
)
|
| 644 |
+
media_items = rearrange(media_items, "(b n) c h w -> b c n h w", n=n_frames)
|
| 645 |
+
return media_items
|
| 646 |
+
|
| 647 |
def _prepare_conditioning_tensor(self, media_path: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
|
| 648 |
"""Loads and processes an image to be a conditioning tensor."""
|
| 649 |
+
tensor = self._resize_tensor(media_path, height, width)
|
| 650 |
tensor = torch.nn.functional.pad(tensor, padding)
|
| 651 |
# Conditioning tensors are needed on the main device for the transformer pass
|
| 652 |
return tensor.to(self.main_device, dtype=self.runtime_autocast_dtype)
|