Spaces:

decart-ai
/

lucy-edit-dev

Running on Zero

File size: 8,619 Bytes

import gradio as gr
import torch
import spaces
from typing import List
from PIL import Image
from diffusers import LucyEditPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video, load_video
import tempfile
import os

model_id = "decart-ai/Lucy-Edit-Dev"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipe = LucyEditPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
pipe.to("cuda")

def calculate_resolution(input_width, input_height, min_dimension=480, max_dimension=832):
    """Calculate optimal resolution preserving aspect ratio"""
    # Ensure dimensions are multiples of 16
    def round_to_16(x):
        return int(round(x / 16.0) * 16)
    
    # Get aspect ratio
    aspect_ratio = input_width / input_height
    
    # Square videos
    if 0.95 <= aspect_ratio <= 1.05:
        return 640, 640
    
    # Landscape videos (width > height)
    elif aspect_ratio > 1:
        # Start with max width
        new_width = max_dimension
        new_height = int(new_width / aspect_ratio)
        
        # If height is too small, use min height instead
        if new_height < min_dimension:
            new_height = min_dimension
            new_width = int(new_height * aspect_ratio)
            
            # Clamp width if needed
            if new_width > max_dimension:
                new_width = max_dimension
                new_height = int(new_width / aspect_ratio)
    
    # Portrait videos (height > width)  
    else:
        # Start with max height
        new_height = max_dimension
        new_width = int(new_height * aspect_ratio)
        
        # If width is too small, use min width instead
        if new_width < min_dimension:
            new_width = min_dimension
            new_height = int(new_width / aspect_ratio)
            
            # Clamp height if needed
            if new_height > max_dimension:
                new_height = max_dimension
                new_width = int(new_height / aspect_ratio)
    
    # Round to multiples of 16 and ensure within bounds
    final_width = round_to_16(max(min_dimension, min(max_dimension, new_width)))
    final_height = round_to_16(max(min_dimension, min(max_dimension, new_height)))
    
    return final_width, final_height

@spaces.GPU(duration=90)
def process_video(
    video_path,
    prompt,
    negative_prompt,
    num_frames,
    auto_resize,
    manual_height,
    manual_width,
    guidance_scale,
    progress=gr.Progress(track_tqdm=True)
):
    # Load and preprocess video
    progress(0.2, desc="Loading video...")
    
    # Get video dimensions
    temp_video = load_video(video_path)
    print(len(temp_video))
    if temp_video and len(temp_video) > 0:
        original_width, original_height = temp_video[0].size
        
        # Calculate dimensions
        if auto_resize:
            width, height = calculate_resolution(original_width, original_height)
            gr.Info(f"Auto-resized from {original_width}x{original_height} to {width}x{height} (preserving aspect ratio)")
        else:
            width, height = manual_width, manual_height
            if abs((original_width/original_height) - (width/height)) > 0.1:
                gr.Warning(f"Output aspect ratio ({width}x{height}) differs significantly from input ({original_width}x{original_height}). Video may appear stretched.")
    else:
        raise gr.Error("Could not load video or video is empty")
    
    # Convert video function
    def convert_video(video: List[Image.Image]) -> List[Image.Image]:
        # Ensure we don't exceed the video length
        frames_to_load = min(len(video), num_frames)
        video_frames = video[:frames_to_load]
        # Resize frames
        video_frames = [frame.resize((width, height)) for frame in video_frames]
        return video_frames
    
    # Load video from file path
    video = load_video(video_path, convert_method=convert_video)
    
    # Ensure we have the right number of frames
    if len(video) < num_frames:
        gr.Warning(f"Video has only {len(video)} frames, using all available frames.")
        num_frames = len(video)
    
    # Generate edited video
    progress(0.5, desc="Generating edited video...")
    output = pipe(
        prompt=prompt,
        video=video,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        guidance_scale=guidance_scale,
    ).frames[0]
    
    # Export to temporary file
    progress(0.9, desc="Exporting video...")
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
        output_path = tmp_file.name
    
    export_to_video(output, output_path, fps=24)
    
    progress(1.0, desc="Complete!")
    return output_path

css = '''
.fillable{max-width: 1100px !important}
'''
with gr.Blocks(title="Lucy Edit - Video Editing with Text", css=css) as demo:
    gr.HTML(f"""<p align="center">
  <img src="https://huggingface.co/decart-ai/Lucy-Edit-Dev/resolve/main/assets/logo.png" width="480" style="margin-top: -25px" alt="Lucy Edit Dev Logo"/>
</p>

<p align="center">
  🤗 <a href="https://github.com/DecartAI/lucy-edit-comfyui"><b>Model</b></a>
  &nbsp;|&nbsp; 🧪 <a href="https://github.com/DecartAI/lucy-edit-comfyui"><b>ComfyUI</b></a>
  &nbsp;|&nbsp; 📖 <a href="https://platform.decart.ai">Playground</a>
  &nbsp;|&nbsp; 📑 <a href="#">arXiv (Coming soon)</a>
  &nbsp;|&nbsp; 💬 <a href="https://discord.gg/decart">Discord</a>
</p>""")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input controls
            video_input = gr.Video(label="Input Video")
            
            prompt = gr.Textbox(
                label="Edit Prompt",
                placeholder="Describe what you want to change in the video...",
                lines=3
            )
            
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative Prompt (optional)",
                    placeholder="Describe what you DON'T want in the video...",
                    lines=2
                )
                auto_resize = gr.Checkbox(
                    label="Auto-resize (preserve aspect ratio)",
                    value=True,
                    info="Automatically calculate dimensions based on input video"
                )
                
                num_frames = gr.Slider(
                    label="Number of Frames",
                    minimum=1,
                    maximum=120,
                    value=81,
                    step=1,
                    info="More frames = longer processing time"
                )
                
                with gr.Row():
                    manual_height = gr.Slider(
                        label="Height (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=480,
                        step=32
                    )
                    manual_width = gr.Slider(
                        label="Width (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=832,
                        step=32
                    )
                
                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0,
                    maximum=20.0,
                    value=5.0,
                    step=0.5,
                    info="Higher values follow the prompt more strictly"
                )
            
            generate_btn = gr.Button("Edit Video", variant="primary")
            
        with gr.Column(scale=1):
            video_output = gr.Video(label="Edited Video")

        gr.Examples(
        examples=[
            ["examples/man_walking.mp4", "make the man into an alien"],
            ["examples/leopard.mp4",  "make the leopard into a lion"],
            ["examples/woman.mp4", "make the woman's coat blue"],
        ],
        inputs=[video_path, prompt],
        outputs=video_output,
        fn=process_video,
        cache_examples="lazy",
    )
    
    # Event handlers
    generate_btn.click(
        fn=process_video,
        inputs=[
            video_input,
            prompt,
            negative_prompt,
            num_frames,
            auto_resize,
            manual_height,
            manual_width,
            guidance_scale
        ],
        outputs=video_output
    )

if __name__ == "__main__":
    demo.launch(share=True)