Spaces:

decart-ai
/

lucy-edit-dev

Running on Zero

File size: 8,581 Bytes

c232be3
 
 
 
 
3848f65
c232be3
 
 
 
 
3848f65
5ec1d9b
c232be3
 
abb9ea5
b54a3fb
abb9ea5
 
 
94439e4
c232be3
 
94439e4
b54a3fb
 
4e257f3
94439e4
c232be3
 
b54a3fb
4e257f3
b54a3fb
94439e4
b54a3fb
4e257f3
 
b54a3fb
 
4e257f3
 
94439e4
abb9ea5
94439e4
b54a3fb
c232be3
b54a3fb
4e257f3
b54a3fb
94439e4
b54a3fb
4e257f3
 
b54a3fb
 
4e257f3
 
94439e4
abb9ea5
b54a3fb
c232be3
170e83c
c232be3
 
 
685442e
 
 
 
 
 
64c0c7c
c232be3
64c0c7c
 
94439e4
64c0c7c
 
 
 
 
94439e4
64c0c7c
 
 
c232be3
64c0c7c
 
 
94439e4
64c0c7c
 
 
 
 
 
 
 
94439e4
64c0c7c
 
94439e4
64c0c7c
 
 
94439e4
64c0c7c
 
 
 
 
 
 
 
 
 
 
94439e4
64c0c7c
 
 
 
94439e4
64c0c7c
94439e4
64c0c7c
 
c232be3
8887124
816a397
8887124
 
c41ebfd
7a0db94
c41ebfd
 
 
a5aca83
 
c41ebfd
 
 
 
94439e4
c232be3
 
 
 
94439e4
c232be3
 
 
ffa8928
c232be3
94439e4
c232be3
 
 
 
 
 
 
 
 
 
 
94439e4
c232be3
 
 
 
 
 
 
 
94439e4
c232be3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94439e4
c232be3
 
 
 
 
 
 
 
94439e4
2c20354
94439e4
c232be3
526013e
1b3a9ec
f92472c
1b3a9ec
94439e4
 
1b3a9ec
18076f6
1b3a9ec
 
 
 
94439e4
c232be3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94439e4

import gradio as gr
import torch
import spaces
from typing import List
from PIL import Image
from diffusers import LucyEditPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video, load_video
import tempfile
import os

model_id = "decart-ai/Lucy-Edit-Dev"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipe = LucyEditPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
pipe.to("cuda")

def calculate_resolution(input_width, input_height, min_dimension=480, max_dimension=832, compatible_round=32):
    """Calculate optimal resolution preserving aspect ratio within bounds"""
    # Ensure dimensions are multiples of the compatible rounding
    def round_to(x, compatible_round):
        return max(min_dimension, min(max_dimension, int(round(x / compatible_round) * compatible_round)))

    # Get aspect ratio
    aspect_ratio = input_width / input_height

    # Square videos (aspect ratio close to 1:1)
    if 0.98 <= aspect_ratio <= 1.02:
        return 640, 640

    # Landscape videos (width > height)
    elif aspect_ratio > 1:
        # Try to use max width
        new_width = max_dimension
        new_height = new_width / aspect_ratio

        # If height would be too small, use min height
        if new_height < min_dimension:
            new_height = min_dimension
            new_width = new_height * aspect_ratio
            # If width exceeds max, clamp it
            if new_width > max_dimension:
                new_width = max_dimension

        return round_to(new_width, compatible_round), round_to(new_height, compatible_round)

    # Portrait videos (height > width)
    else:
        # Try to use max height
        new_height = max_dimension
        new_width = new_height * aspect_ratio

        # If width would be too small, use min width
        if new_width < min_dimension:
            new_width = min_dimension
            new_height = new_width / aspect_ratio
            # If height exceeds max, clamp it
            if new_height > max_dimension:
                new_height = max_dimension

        return round_to(new_width, compatible_round), round_to(new_height, compatible_round)


@spaces.GPU(duration=120)
def process_video(
    video_path,
    prompt,
    negative_prompt="",
    num_frames=81,
    auto_resize=True,
    manual_height=480,
    manual_width=832,
    guidance_scale=5,
    progress=gr.Progress(track_tqdm=True)
):
    # Load and preprocess video
    progress(0.2, desc="Loading video...")

    # Get video dimensions
    temp_video = load_video(video_path)
    print(len(temp_video))
    if temp_video and len(temp_video) > 0:
        original_width, original_height = temp_video[0].size

        # Calculate dimensions
        if auto_resize:
            width, height = calculate_resolution(original_width, original_height)
        else:
            width, height = manual_width, manual_height
    else:
        raise gr.Error("Could not load video or video is empty")

    # Convert video function
    def convert_video(video: List[Image.Image]) -> List[Image.Image]:
        # Ensure we don't exceed the video length
        frames_to_load = min(len(video), num_frames)
        video_frames = video[:frames_to_load]
        # Resize frames
        video_frames = [frame.resize((width, height)) for frame in video_frames]
        return video_frames

    # Load video from file path
    video = load_video(video_path, convert_method=convert_video)

    # Ensure we have the right number of frames
    if len(video) < num_frames:
        num_frames = len(video)

    # Generate edited video
    progress(0.5, desc="Generating edited video...")
    output = pipe(
        prompt=prompt,
        video=video,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        guidance_scale=guidance_scale,
    ).frames[0]

    # Export to temporary file
    progress(0.9, desc="Exporting video...")
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
        output_path = tmp_file.name

    export_to_video(output, output_path, fps=24)

    progress(1.0, desc="Complete!")
    return output_path

css = '''
.fillable{max-width: 1100px !important}
'''
with gr.Blocks(title="Lucy Edit - Video Editing with Text", css=css) as demo:
    gr.HTML(f"""<p align="center">
  <img src="https://huggingface.co/decart-ai/Lucy-Edit-Dev/resolve/main/assets/logo.png" width="480" style="margin-top: -25px" alt="Lucy Edit Dev Logo"/>
</p>

<p align="center">
  🤗 <a href="https://github.com/DecartAI/lucy-edit-comfyui"><b>Model</b></a>
  &nbsp;|&nbsp; 🧪 <a href="https://github.com/DecartAI/lucy-edit-comfyui"><b>ComfyUI</b></a>
  &nbsp;|&nbsp; 📖 <a href="https://platform.decart.ai">Playground</a>
  &nbsp;|&nbsp; 📑 <a href="#">arXiv (Coming soon)</a>
  &nbsp;|&nbsp; 💬 <a href="https://discord.gg/decart">Discord</a>
</p>""")

    with gr.Row():
        with gr.Column(scale=1):
            # Input controls
            video_input = gr.Video(label="Input Video")

            prompt = gr.Textbox(
                label="Edit Prompt",
                placeholder="Describe what you want to change in the video...",
                lines=3
            )

            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative Prompt (optional)",
                    placeholder="Describe what you DON'T want in the video...",
                    lines=2
                )
                auto_resize = gr.Checkbox(
                    label="Auto-resize (preserve aspect ratio)",
                    value=True,
                    info="Automatically calculate dimensions based on input video"
                )

                num_frames = gr.Slider(
                    label="Number of Frames",
                    minimum=1,
                    maximum=120,
                    value=81,
                    step=1,
                    info="More frames = longer processing time"
                )

                with gr.Row():
                    manual_height = gr.Slider(
                        label="Height (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=480,
                        step=32
                    )
                    manual_width = gr.Slider(
                        label="Width (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=832,
                        step=32
                    )

                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0,
                    maximum=20.0,
                    value=5.0,
                    step=0.5,
                    info="Higher values follow the prompt more strictly"
                )

            generate_btn = gr.Button("Edit Video", variant="primary")

        with gr.Column(scale=1):
            video_output = gr.Video(label="Edited Video", autoplay=True)

    gr.Examples(
        examples=[
            ["examples/neon.mp4", "Add a colorful scarlet macaw parrot perched on the man's left shoulder, bright red and blue wing feathers with yellow accents, curved black beak, intelligent dark eyes, talons gripping fabric naturally, long tail feathers extending downward, glossy plumage catching light, slight wing adjustment for balance, natural weight distribution, soft shadow beneath bird."],
            ["examples/painter.mp4",  "Change the hair color to platinum blonde with natural highlights, subtle root shadowing, silky texture, gentle waves, soft shine, dimensional tones, strand definition, natural movement, professional color treatment, salon-quality finish, light-catching shimmer, varied blonde shades from honey to ash, realistic color gradation, healthy glossy appearance, volumetric lighting interaction."],
        ],
        inputs=[video_input, prompt],
        outputs=video_output,
        fn=process_video,
        cache_examples="lazy",
    )

    # Event handlers
    generate_btn.click(
        fn=process_video,
        inputs=[
            video_input,
            prompt,
            negative_prompt,
            num_frames,
            auto_resize,
            manual_height,
            manual_width,
            guidance_scale
        ],
        outputs=video_output
    )

if __name__ == "__main__":
    demo.launch(share=True)