import gradio as gr import torch import spaces from typing import List from PIL import Image from diffusers import LucyEditPipeline, AutoencoderKLWan from diffusers.utils import export_to_video, load_video import tempfile import os model_id = "decart-ai/Lucy-Edit-Dev" vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32) pipe = LucyEditPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16) pipe.to("cuda") def calculate_resolution(input_width, input_height, min_dimension=480, max_dimension=832, compatible_round=32): """Calculate optimal resolution preserving aspect ratio within bounds""" # Ensure dimensions are multiples of the compatible rounding def round_to(x, compatible_round): return max(min_dimension, min(max_dimension, int(round(x / compatible_round) * compatible_round))) # Get aspect ratio aspect_ratio = input_width / input_height # Square videos (aspect ratio close to 1:1) if 0.98 <= aspect_ratio <= 1.02: return 640, 640 # Landscape videos (width > height) elif aspect_ratio > 1: # Try to use max width new_width = max_dimension new_height = new_width / aspect_ratio # If height would be too small, use min height if new_height < min_dimension: new_height = min_dimension new_width = new_height * aspect_ratio # If width exceeds max, clamp it if new_width > max_dimension: new_width = max_dimension return round_to(new_width, compatible_round), round_to(new_height, compatible_round) # Portrait videos (height > width) else: # Try to use max height new_height = max_dimension new_width = new_height * aspect_ratio # If width would be too small, use min width if new_width < min_dimension: new_width = min_dimension new_height = new_width / aspect_ratio # If height exceeds max, clamp it if new_height > max_dimension: new_height = max_dimension return round_to(new_width, compatible_round), round_to(new_height, compatible_round) @spaces.GPU(duration=120) def process_video( video_path, prompt, negative_prompt="", enhance_prompt=True, num_frames=81, auto_resize=True, manual_height=480, manual_width=832, guidance_scale=5, progress=gr.Progress(track_tqdm=True) ): # Load and preprocess video progress(0.2, desc="Loading video...") if(enhance_prompt): #add here the prompt enhancement API call pass # Get video dimensions temp_video = load_video(video_path) print(len(temp_video)) if temp_video and len(temp_video) > 0: original_width, original_height = temp_video[0].size # Calculate dimensions if auto_resize: width, height = calculate_resolution(original_width, original_height) else: width, height = manual_width, manual_height else: raise gr.Error("Could not load video or video is empty") # Convert video function def convert_video(video: List[Image.Image]) -> List[Image.Image]: # Ensure we don't exceed the video length frames_to_load = min(len(video), num_frames) video_frames = video[:frames_to_load] # Resize frames video_frames = [frame.resize((width, height)) for frame in video_frames] return video_frames # Load video from file path video = load_video(video_path, convert_method=convert_video) # Ensure we have the right number of frames if len(video) < num_frames: num_frames = len(video) # Generate edited video progress(0.5, desc="Generating edited video...") output = pipe( prompt=prompt, video=video, negative_prompt=negative_prompt, height=height, width=width, num_frames=num_frames, guidance_scale=guidance_scale, ).frames[0] # Export to temporary file progress(0.9, desc="Exporting video...") with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file: output_path = tmp_file.name export_to_video(output, output_path, fps=24) progress(1.0, desc="Complete!") return output_path css = ''' .fillable{max-width: 1100px !important} ''' with gr.Blocks(title="Lucy Edit - Video Editing with Text", css=css) as demo: gr.HTML(f"""
๐ค Model | ๐งช ComfyUI | ๐ Playground | ๐ arXiv (Coming soon) | ๐ฌ Discord
""") with gr.Row(): with gr.Column(scale=1): # Input controls video_input = gr.Video(label="Input Video") prompt = gr.Textbox( label="Edit Prompt", placeholder="Describe what you want to change in the video...", lines=3 ) with gr.Accordion("Advanced Settings", open=False): enhance_prompt = gr.Checkbox(label="Enhance Prompt", value=True) negative_prompt = gr.Textbox( label="Negative Prompt (optional)", placeholder="Describe what you DON'T want in the video...", lines=2 ) auto_resize = gr.Checkbox( label="Auto-resize (preserve aspect ratio)", value=True, info="Automatically calculate dimensions based on input video" ) num_frames = gr.Slider( label="Number of Frames", minimum=1, maximum=120, value=81, step=1, info="More frames = longer processing time" ) with gr.Row(): manual_height = gr.Slider( label="Height (when auto-resize is off)", minimum=256, maximum=1024, value=480, step=32 ) manual_width = gr.Slider( label="Width (when auto-resize is off)", minimum=256, maximum=1024, value=832, step=32 ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=1.0, maximum=20.0, value=5.0, step=0.5, info="Higher values follow the prompt more strictly" ) generate_btn = gr.Button("Edit Video", variant="primary") with gr.Column(scale=1): video_output = gr.Video(label="Edited Video") gr.Examples( examples=[ ["examples/man_walking.mp4", "make the man into an alien"], ["examples/leopard.mp4", "make the leopard into a lion"], ["examples/woman.mp4", "make the woman's coat blue"], ], inputs=[video_input, prompt], outputs=video_output, fn=process_video, cache_examples="lazy", ) # Event handlers generate_btn.click( fn=process_video, inputs=[ video_input, prompt, negative_prompt, enhance_prompt, num_frames, auto_resize, manual_height, manual_width, guidance_scale ], outputs=video_output ) if __name__ == "__main__": demo.launch(share=True)