Spaces:
Running
on
Zero
Running
on
Zero
update-inference (#4)
Browse files- Update inference.py (7b9f053b980d37b4af43b808e6cfada81caee869)
- Update app.py (9742f923e6f1247a0c029af3dce3613733f5530c)
- app.py +1 -1
- inference.py +11 -15
app.py
CHANGED
|
@@ -346,7 +346,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 346 |
with gr.Column():
|
| 347 |
with gr.Tab("image-to-video") as image_tab:
|
| 348 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
| 349 |
-
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam"])
|
| 350 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
| 351 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
| 352 |
with gr.Tab("text-to-video") as text_tab:
|
|
|
|
| 346 |
with gr.Column():
|
| 347 |
with gr.Tab("image-to-video") as image_tab:
|
| 348 |
video_i_hidden = gr.Textbox(label="video_i", visible=False, value=None)
|
| 349 |
+
image_i2v = gr.Image(label="Input Image", type="filepath", sources=["upload", "webcam", "clipboard"])
|
| 350 |
i2v_prompt = gr.Textbox(label="Prompt", value="The creature from the image starts to move", lines=3)
|
| 351 |
i2v_button = gr.Button("Generate Image-to-Video", variant="primary")
|
| 352 |
with gr.Tab("text-to-video") as text_tab:
|
inference.py
CHANGED
|
@@ -11,6 +11,7 @@ import imageio
|
|
| 11 |
import json
|
| 12 |
import numpy as np
|
| 13 |
import torch
|
|
|
|
| 14 |
from safetensors import safe_open
|
| 15 |
from PIL import Image
|
| 16 |
from transformers import (
|
|
@@ -35,6 +36,7 @@ from ltx_video.pipelines.pipeline_ltx_video import (
|
|
| 35 |
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 36 |
from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
| 37 |
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
|
|
|
| 38 |
|
| 39 |
MAX_HEIGHT = 720
|
| 40 |
MAX_WIDTH = 1280
|
|
@@ -96,7 +98,12 @@ def load_image_to_tensor_with_resize_and_crop(
|
|
| 96 |
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
| 97 |
if not just_crop:
|
| 98 |
image = image.resize((target_width, target_height))
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
frame_tensor = (frame_tensor / 127.5) - 1.0
|
| 101 |
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
| 102 |
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
|
@@ -266,13 +273,6 @@ def main():
|
|
| 266 |
help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
|
| 267 |
)
|
| 268 |
|
| 269 |
-
parser.add_argument(
|
| 270 |
-
"--strength",
|
| 271 |
-
type=float,
|
| 272 |
-
default=1.0,
|
| 273 |
-
help="Editing strength (noising level) for video-to-video pipeline.",
|
| 274 |
-
)
|
| 275 |
-
|
| 276 |
# Conditioning arguments
|
| 277 |
parser.add_argument(
|
| 278 |
"--conditioning_media_paths",
|
|
@@ -407,7 +407,6 @@ def infer(
|
|
| 407 |
negative_prompt: str,
|
| 408 |
offload_to_cpu: bool,
|
| 409 |
input_media_path: Optional[str] = None,
|
| 410 |
-
strength: Optional[float] = 1.0,
|
| 411 |
conditioning_media_paths: Optional[List[str]] = None,
|
| 412 |
conditioning_strengths: Optional[List[float]] = None,
|
| 413 |
conditioning_start_frames: Optional[List[int]] = None,
|
|
@@ -422,12 +421,10 @@ def infer(
|
|
| 422 |
|
| 423 |
models_dir = "MODEL_DIR"
|
| 424 |
|
| 425 |
-
|
| 426 |
-
ltxv_model_name_or_path = "ltxv-13b-0.9.7-distilled-rc3.safetensors"
|
| 427 |
if not os.path.isfile(ltxv_model_name_or_path):
|
| 428 |
ltxv_model_path = hf_hub_download(
|
| 429 |
-
repo_id="
|
| 430 |
-
#repo_id="Lightricks/LTX-Video",
|
| 431 |
filename=ltxv_model_name_or_path,
|
| 432 |
local_dir=models_dir,
|
| 433 |
repo_type="model",
|
|
@@ -616,7 +613,6 @@ def infer(
|
|
| 616 |
frame_rate=frame_rate,
|
| 617 |
**sample,
|
| 618 |
media_items=media_item,
|
| 619 |
-
strength=strength,
|
| 620 |
conditioning_items=conditioning_items,
|
| 621 |
is_video=True,
|
| 622 |
vae_per_channel_normalize=True,
|
|
@@ -775,4 +771,4 @@ def load_media_file(
|
|
| 775 |
|
| 776 |
|
| 777 |
if __name__ == "__main__":
|
| 778 |
-
main()
|
|
|
|
| 11 |
import json
|
| 12 |
import numpy as np
|
| 13 |
import torch
|
| 14 |
+
import cv2
|
| 15 |
from safetensors import safe_open
|
| 16 |
from PIL import Image
|
| 17 |
from transformers import (
|
|
|
|
| 36 |
from ltx_video.schedulers.rf import RectifiedFlowScheduler
|
| 37 |
from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
| 38 |
from ltx_video.models.autoencoders.latent_upsampler import LatentUpsampler
|
| 39 |
+
import ltx_video.pipelines.crf_compressor as crf_compressor
|
| 40 |
|
| 41 |
MAX_HEIGHT = 720
|
| 42 |
MAX_WIDTH = 1280
|
|
|
|
| 98 |
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
| 99 |
if not just_crop:
|
| 100 |
image = image.resize((target_width, target_height))
|
| 101 |
+
|
| 102 |
+
image = np.array(image)
|
| 103 |
+
image = cv2.GaussianBlur(image, (3, 3), 0)
|
| 104 |
+
frame_tensor = torch.from_numpy(image).float()
|
| 105 |
+
frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
|
| 106 |
+
frame_tensor = frame_tensor.permute(2, 0, 1)
|
| 107 |
frame_tensor = (frame_tensor / 127.5) - 1.0
|
| 108 |
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
| 109 |
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
|
|
|
| 273 |
help="Path to the input video (or imaage) to be modified using the video-to-video pipeline",
|
| 274 |
)
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
# Conditioning arguments
|
| 277 |
parser.add_argument(
|
| 278 |
"--conditioning_media_paths",
|
|
|
|
| 407 |
negative_prompt: str,
|
| 408 |
offload_to_cpu: bool,
|
| 409 |
input_media_path: Optional[str] = None,
|
|
|
|
| 410 |
conditioning_media_paths: Optional[List[str]] = None,
|
| 411 |
conditioning_strengths: Optional[List[float]] = None,
|
| 412 |
conditioning_start_frames: Optional[List[int]] = None,
|
|
|
|
| 421 |
|
| 422 |
models_dir = "MODEL_DIR"
|
| 423 |
|
| 424 |
+
ltxv_model_name_or_path = pipeline_config["checkpoint_path"]
|
|
|
|
| 425 |
if not os.path.isfile(ltxv_model_name_or_path):
|
| 426 |
ltxv_model_path = hf_hub_download(
|
| 427 |
+
repo_id="Lightricks/LTX-Video",
|
|
|
|
| 428 |
filename=ltxv_model_name_or_path,
|
| 429 |
local_dir=models_dir,
|
| 430 |
repo_type="model",
|
|
|
|
| 613 |
frame_rate=frame_rate,
|
| 614 |
**sample,
|
| 615 |
media_items=media_item,
|
|
|
|
| 616 |
conditioning_items=conditioning_items,
|
| 617 |
is_video=True,
|
| 618 |
vae_per_channel_normalize=True,
|
|
|
|
| 771 |
|
| 772 |
|
| 773 |
if __name__ == "__main__":
|
| 774 |
+
main()
|