File size: 8,619 Bytes
182aa1d
 
 
 
 
44edc82
182aa1d
 
 
 
 
44edc82
2ce5689
182aa1d
 
5ed282c
182aa1d
 
 
 
 
 
 
 
 
 
5ed282c
182aa1d
 
 
5ed282c
 
 
 
 
 
 
 
182aa1d
5ed282c
 
 
 
182aa1d
5ed282c
182aa1d
5ed282c
 
 
 
 
 
 
 
182aa1d
5ed282c
 
 
 
 
 
 
 
182aa1d
5ed282c
182aa1d
 
 
 
 
 
 
 
 
 
 
4917153
182aa1d
4917153
 
 
 
 
 
 
 
182aa1d
4917153
 
 
 
182aa1d
4917153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182aa1d
84979a6
0c2f9b0
84979a6
 
40be853
8f454af
40be853
 
 
98102a6
 
40be853
 
 
 
182aa1d
 
 
 
 
 
 
 
 
9c505a4
182aa1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc70754
182aa1d
 
 
bd17066
 
 
 
 
 
 
 
 
 
 
 
182aa1d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import gradio as gr
import torch
import spaces
from typing import List
from PIL import Image
from diffusers import LucyEditPipeline, AutoencoderKLWan
from diffusers.utils import export_to_video, load_video
import tempfile
import os

model_id = "decart-ai/Lucy-Edit-Dev"
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
pipe = LucyEditPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
pipe.to("cuda")

def calculate_resolution(input_width, input_height, min_dimension=480, max_dimension=832):
    """Calculate optimal resolution preserving aspect ratio"""
    # Ensure dimensions are multiples of 16
    def round_to_16(x):
        return int(round(x / 16.0) * 16)
    
    # Get aspect ratio
    aspect_ratio = input_width / input_height
    
    # Square videos
    if 0.95 <= aspect_ratio <= 1.05:
        return 640, 640
    
    # Landscape videos (width > height)
    elif aspect_ratio > 1:
        # Start with max width
        new_width = max_dimension
        new_height = int(new_width / aspect_ratio)
        
        # If height is too small, use min height instead
        if new_height < min_dimension:
            new_height = min_dimension
            new_width = int(new_height * aspect_ratio)
            
            # Clamp width if needed
            if new_width > max_dimension:
                new_width = max_dimension
                new_height = int(new_width / aspect_ratio)
    
    # Portrait videos (height > width)  
    else:
        # Start with max height
        new_height = max_dimension
        new_width = int(new_height * aspect_ratio)
        
        # If width is too small, use min width instead
        if new_width < min_dimension:
            new_width = min_dimension
            new_height = int(new_width / aspect_ratio)
            
            # Clamp height if needed
            if new_height > max_dimension:
                new_height = max_dimension
                new_width = int(new_height / aspect_ratio)
    
    # Round to multiples of 16 and ensure within bounds
    final_width = round_to_16(max(min_dimension, min(max_dimension, new_width)))
    final_height = round_to_16(max(min_dimension, min(max_dimension, new_height)))
    
    return final_width, final_height

@spaces.GPU(duration=90)
def process_video(
    video_path,
    prompt,
    negative_prompt,
    num_frames,
    auto_resize,
    manual_height,
    manual_width,
    guidance_scale,
    progress=gr.Progress(track_tqdm=True)
):
    # Load and preprocess video
    progress(0.2, desc="Loading video...")
    
    # Get video dimensions
    temp_video = load_video(video_path)
    print(len(temp_video))
    if temp_video and len(temp_video) > 0:
        original_width, original_height = temp_video[0].size
        
        # Calculate dimensions
        if auto_resize:
            width, height = calculate_resolution(original_width, original_height)
            gr.Info(f"Auto-resized from {original_width}x{original_height} to {width}x{height} (preserving aspect ratio)")
        else:
            width, height = manual_width, manual_height
            if abs((original_width/original_height) - (width/height)) > 0.1:
                gr.Warning(f"Output aspect ratio ({width}x{height}) differs significantly from input ({original_width}x{original_height}). Video may appear stretched.")
    else:
        raise gr.Error("Could not load video or video is empty")
    
    # Convert video function
    def convert_video(video: List[Image.Image]) -> List[Image.Image]:
        # Ensure we don't exceed the video length
        frames_to_load = min(len(video), num_frames)
        video_frames = video[:frames_to_load]
        # Resize frames
        video_frames = [frame.resize((width, height)) for frame in video_frames]
        return video_frames
    
    # Load video from file path
    video = load_video(video_path, convert_method=convert_video)
    
    # Ensure we have the right number of frames
    if len(video) < num_frames:
        gr.Warning(f"Video has only {len(video)} frames, using all available frames.")
        num_frames = len(video)
    
    # Generate edited video
    progress(0.5, desc="Generating edited video...")
    output = pipe(
        prompt=prompt,
        video=video,
        negative_prompt=negative_prompt,
        height=height,
        width=width,
        num_frames=num_frames,
        guidance_scale=guidance_scale,
    ).frames[0]
    
    # Export to temporary file
    progress(0.9, desc="Exporting video...")
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
        output_path = tmp_file.name
    
    export_to_video(output, output_path, fps=24)
    
    progress(1.0, desc="Complete!")
    return output_path

css = '''
.fillable{max-width: 1100px !important}
'''
with gr.Blocks(title="Lucy Edit - Video Editing with Text", css=css) as demo:
    gr.HTML(f"""<p align="center">
  <img src="https://huggingface.co/decart-ai/Lucy-Edit-Dev/resolve/main/assets/logo.png" width="480" style="margin-top: -25px" alt="Lucy Edit Dev Logo"/>
</p>

<p align="center">
  🤗 <a href="https://github.com/DecartAI/lucy-edit-comfyui"><b>Model</b></a>
  &nbsp;|&nbsp; 🧪 <a href="https://github.com/DecartAI/lucy-edit-comfyui"><b>ComfyUI</b></a>
  &nbsp;|&nbsp; 📖 <a href="https://platform.decart.ai">Playground</a>
  &nbsp;|&nbsp; 📑 <a href="#">arXiv (Coming soon)</a>
  &nbsp;|&nbsp; 💬 <a href="https://discord.gg/decart">Discord</a>
</p>""")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Input controls
            video_input = gr.Video(label="Input Video")
            
            prompt = gr.Textbox(
                label="Edit Prompt",
                placeholder="Describe what you want to change in the video...",
                lines=3
            )
            
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative Prompt (optional)",
                    placeholder="Describe what you DON'T want in the video...",
                    lines=2
                )
                auto_resize = gr.Checkbox(
                    label="Auto-resize (preserve aspect ratio)",
                    value=True,
                    info="Automatically calculate dimensions based on input video"
                )
                
                num_frames = gr.Slider(
                    label="Number of Frames",
                    minimum=1,
                    maximum=120,
                    value=81,
                    step=1,
                    info="More frames = longer processing time"
                )
                
                with gr.Row():
                    manual_height = gr.Slider(
                        label="Height (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=480,
                        step=32
                    )
                    manual_width = gr.Slider(
                        label="Width (when auto-resize is off)",
                        minimum=256,
                        maximum=1024,
                        value=832,
                        step=32
                    )
                
                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0,
                    maximum=20.0,
                    value=5.0,
                    step=0.5,
                    info="Higher values follow the prompt more strictly"
                )
            
            generate_btn = gr.Button("Edit Video", variant="primary")
            
        with gr.Column(scale=1):
            video_output = gr.Video(label="Edited Video")

        gr.Examples(
        examples=[
            ["examples/man_walking.mp4", "make the man into an alien"],
            ["examples/leopard.mp4",  "make the leopard into a lion"],
            ["examples/woman.mp4", "make the woman's coat blue"],
        ],
        inputs=[video_path, prompt],
        outputs=video_output,
        fn=process_video,
        cache_examples="lazy",
    )
    
    # Event handlers
    generate_btn.click(
        fn=process_video,
        inputs=[
            video_input,
            prompt,
            negative_prompt,
            num_frames,
            auto_resize,
            manual_height,
            manual_width,
            guidance_scale
        ],
        outputs=video_output
    )

if __name__ == "__main__":
    demo.launch(share=True)