File size: 17,763 Bytes
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8bf0e8e
d576102
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
 
2ca8c63
8d5a128
2ca8c63
8d5a128
 
 
2ca8c63
8d5a128
2ca8c63
8d5a128
2ca8c63
 
 
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343dd8b
 
 
 
 
 
0660e53
 
8ab3e1a
 
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
8bf0e8e
 
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a96a4e2
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db2546f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5a128
 
 
 
 
 
 
8ab3e1a
 
 
8d5a128
 
 
 
 
 
cc6f003
 
8d5a128
 
 
 
db2546f
8d5a128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ca8c63
8d5a128
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import os
import random
from typing import Tuple

import numpy as np
import spaces
import torch
from diffusers import FluxPipeline, StableDiffusion3Pipeline
from PIL import Image

import gradio as gr
from utils.flux import flux_editing
from utils.sd3 import sd3_editing

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe_sd3 = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16, token=os.getenv('HF_ACCESS_TOK'))
pipe_flux = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.float16, token=os.getenv('HF_ACCESS_TOK'))


def seed_everything(seed: int) -> None:
    """
    Set the random seed for reproducibility.

    Args:
        seed (int): The seed value to set.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

def on_T_steps_change(T_steps: int, n_max: int) -> gr.update:
    """
    Update the maximum and value of the n_max slider based on T_steps.

    Args:
        T_steps (int): The current value of the T_steps slider.
        n_max (int): The current value of the n_max slider.
    Returns:
        gr.update: An update object to modify the n_max slider.
    """
    # If n_max > T_steps, clamp it down to T_steps
    new_value = min(n_max, T_steps)
    return gr.update(maximum=T_steps, value=new_value)

def on_model_change(model_type: str) -> Tuple[int, int, float]:
    if model_type == 'SD3':
        T_steps_value = 15
        n_max_value = 12
        eta_value = 0.01
    elif model_type == 'FLUX':
        T_steps_value = 15
        n_max_value = 13
        eta_value = 0.0025
    else:
        raise NotImplementedError(f"Model type {model_type} not implemented")

    return T_steps_value, n_max_value, eta_value

def get_examples():
    case = [
        ["inputs/corgi_walking.png", "FLUX", 15, 13, 0.0025, 7, "A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene.", "A cute brown and white guinea pig walking on a sidewalk near a body of water. The guinea pig is wearing a pink vest, adding a touch of color to the scene.", 1.0, 3.5, [(f"example_outputs/corgi_walking/guinea_pig/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/corgi_walking.png", "SD3", 15, 12, 0.01, 7, "A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene.", "A cute brown and white rabbit walking on a sidewalk near a body of water. The rabbit is wearing a pink vest, adding a touch of color to the scene.", 1.0, 3.5, [(f"example_outputs/corgi_walking/rabbit/sd3_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/puppies.png", "FLUX", 15, 13, 0.0025, 7, "Two adorable golden retriever puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting.", "Two adorable crochet golden retriever puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting or enjoying the outdoor environment.", 1.0, 3.5, [(f"example_outputs/puppies/crochet/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/puppies.png", "SD3", 15, 12, 0.01, 5, "Two adorable golden retriever puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting.", "Two adorable husky puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting or enjoying the outdoor environment.", 1.0, 3.5, [(f"example_outputs/puppies/husky/sd3_iterations={i}.png", f"Iteration {i}") for i in range(6)]],
        ["inputs/iguana.png", "FLUX", 15, 13, 0.0025, 7, "A large orange lizard sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", "A large crochet lizard sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", 1.0, 3.5, [(f"example_outputs/iguana/crochet/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/iguana.png", "FLUX", 15, 13, 0.0025, 7, "A large orange lizard sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", "A large lizard made out of lego bricks sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", 1.0, 3.5, [(f"example_outputs/iguana/lego_bricks/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/cow_grass2.png", "FLUX", 15, 12, 0.0025, 6, "A large brown and white cow standing in a grassy field. The cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", "A large cow made out of colorful toy bricks standing in a grassy field. The colorful toy brick cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", 1.0, 3.5, [(f"example_outputs/cow_grass2/colorful_toy_bricks/flux_iterations={i}.png", f"Iteration {i}") for i in range(7)]],
        ["inputs/cow_grass2.png", "FLUX", 15, 13, 0.0025, 5, "A large brown and white cow standing in a grassy field. The cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", "A large cow made out of flowers standing in a grassy field. The flower cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", 1.0, 3.5, [(f"example_outputs/cow_grass2/flowers/flux_iterations={i}.png", f"Iteration {i}") for i in range(6)]],
        ["inputs/cow_grass2.png", "SD3", 15, 12, 0.01, 8, "A large brown and white cow standing in a grassy field. The cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", "A large cow made out of wooden blocks standing in a grassy field. The wooden block cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", 1.0, 3.5, [(f"example_outputs/cow_grass2/wooden_blocks/sd3_iterations={i}.png", f"Iteration {i}") for i in range(9)]],
        ["inputs/cat_fridge.png", "SD3", 15, 12, 0.01, 8, "A cat sitting on top of a counter in a store. The cat is positioned towards the right side of the counter, and it appears to be looking at the camera. The store has a variety of items displayed, including several bottles scattered around the counter.", "A cat sitting on top of a counter in a store, with the cat and counter crafted using origami folded paper art techniques. The cat has a delicate and intricate appearance, with paper folds used to create its fur and features. The store has a variety of items displayed, including several bottles scattered around the counter.", 1.0, 3.5, [(f"example_outputs/cat_fridge/origami/sd3_iterations={i}.png", f"Iteration {i}") for i in range(9)]],
        ["inputs/cat.png", "FLUX", 15, 13, 0.0025, 7, "A small, fluffy kitten sitting in a grassy field. The kitten is positioned in the center of the scene, surrounded by a field. The kitten appears to be looking at something in the field.", "A small bear cub sitting in a grassy field. The bear cub is positioned in the center of the scene, surrounded by a field. The bear cub appears to be looking at something in the field.", 1.0, 3.5, [(f"example_outputs/cat/bear/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/cat.png", "SD3", 15, 12, 0.01, 6, "A small, fluffy kitten sitting in a grassy field. The kitten is positioned in the center of the scene, surrounded by a field. The kitten appears to be looking at something in the field.", "A small puppy sitting in a grassy field. The puppy is positioned in the center of the scene, surrounded by a field. The puppy appears to be looking at something in the field.", 1.0, 3.5, [(f"example_outputs/cat/puppy/sd3_iterations={i}.png", f"Iteration {i}") for i in range(7)]],
        ["inputs/wolf_grass.png", "FLUX", 15, 13, 0.0025, 7, "A wolf standing in a grassy field with yellow flowers. The wolf is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", "A fox standing in a grassy field with yellow flowers. The fox is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", 1.0, 3.5, [(f"example_outputs/wolf_grass/fox/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
        ["inputs/wolf_grass.png", "SD3", 15, 12, 0.01, 4, "A wolf standing in a grassy field with yellow flowers. The wolf is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", "A baby deer standing in a grassy field with yellow flowers. The baby deer is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", 1.0, 3.5, [(f"example_outputs/wolf_grass/deer/sd3_iterations={i}.png", f"Iteration {i}") for i in range(5)]],
    ]
    return case

def get_duration(
    image_src_val: str, model_type_val: str, T_steps_val: int,
    n_max_val: int, eta_val: float, flowopt_iterations_val: int,
    src_prompt_val: str, tar_prompt_val: str,
    src_guidance_scale_val: float, tar_guidance_scale_val: float,
) -> float:
    step_duration = 0.6 if model_type_val == 'FLUX' else 0.2
    return (n_max_val * (flowopt_iterations_val + 2) * step_duration) + 45

@spaces.GPU(duration=get_duration)
def FlowOpt_run(
    image_src_val: str, model_type_val: str, T_steps_val: int,
    n_max_val: int, eta_val: float, flowopt_iterations_val: int,
    src_prompt_val: str, tar_prompt_val: str,
    src_guidance_scale_val: float, tar_guidance_scale_val: float,
):
    if not len(src_prompt_val):
        raise gr.Error("Source prompt cannot be empty")
    if not len(tar_prompt_val):
        raise gr.Error("Target prompt cannot be empty")

    if model_type_val == 'FLUX':
        pipe = pipe_flux.to(device)
    elif model_type_val == 'SD3':
        pipe = pipe_sd3.to(device)
    else:
        raise NotImplementedError(f"Model type {model_type_val} not implemented")

    scheduler = pipe.scheduler

    # set seed
    seed = 1024
    seed_everything(seed)
    # load image
    image = Image.open(image_src_val)
    # crop image to have both dimensions divisibe by 16 - avoids issues with resizing
    image = image.crop((0, 0, image.width - image.width % 16, image.height - image.height % 16))
    image_src_val = pipe.image_processor.preprocess(image)

    # cast image to half precision
    image_src_val = image_src_val.to(device).half()
    with torch.autocast("cuda"), torch.inference_mode():
        x0_src_denorm = pipe.vae.encode(image_src_val).latent_dist.mode()
    x0_src = (x0_src_denorm - pipe.vae.config.shift_factor) * pipe.vae.config.scaling_factor
    # send to cuda
    x0_src = x0_src.to(device)
    negative_prompt =  ""  # (SD3)

    if model_type_val == 'SD3':
        yield from sd3_editing(
            pipe, scheduler, T_steps_val, n_max_val, x0_src,
            src_prompt_val, tar_prompt_val, negative_prompt,
            src_guidance_scale_val, tar_guidance_scale_val,
            flowopt_iterations_val, eta_val,
        )
    elif model_type_val == 'FLUX':
        yield from flux_editing(
            pipe, scheduler, T_steps_val, n_max_val, x0_src,
            src_prompt_val, tar_prompt_val,
            src_guidance_scale_val, tar_guidance_scale_val,
            flowopt_iterations_val, eta_val,
        )
    else:
        raise NotImplementedError(f"Sampler type {model_type_val} not implemented")


intro = """
<h1 style="font-weight: 1000; text-align: center; margin: 0px;">FlowOpt: Fast Optimization Through Whole Flow Processes for Training-Free Editing</h1>
<h3 style="margin-bottom: 10px; text-align: center;">
    <a href="https://arxiv.org/abs/2510.22010">[Paper]</a>&nbsp;|&nbsp;
    <a href="https://orronai.github.io/FlowOpt/">[Project Page]</a>&nbsp;|&nbsp;
    <a href="https://github.com/orronai/FlowOpt">[Code]</a>
</h3>
<br> 🎨 Edit your image using FlowOpt for Flow models! Upload an image, add a description of it, and specify the edits you want to make.
<h3>Notes:</h3>
<ol>
  <li>We use FLUX.1 dev and SD3 for the demo. The models are large and may take a while to load.</li>
  <li>We recommend 1024x1024 images for the best results. If the input images are too large, there may be out-of-memory errors. For other resolutions, we encourage you to find a suitable set of hyperparameters.</li>
  <li>Default hyperparameters for each model used in the paper are provided as examples.</li>
</ol>  
"""

css="""
#col-container {
    margin: 0 auto;
    max-width: 960px;
}

#gallery-image img {
    width: 100%;        /* match column width */
    height: auto;       /* preserve aspect ratio */
    object-fit: contain;
}

/* Hide thumbnails by default */
#gallery-image .gallery-container .preview .thumbnails {
    opacity: 0;
    transition: opacity 0.3s ease-in-out;
}

/* Show thumbnails only when hovering over the gallery */
#gallery-image:hover .gallery-container .preview .thumbnails {
    opacity: 1;
}

/* Hide caption by default */
.gallery-container .preview .caption {
    opacity: 0;
    transition: opacity 0.3s ease-in-out;
}

/* Show caption when hovering over #gallery-image */
#gallery-image:hover .gallery-container .preview .caption {
    opacity: 1;
}
"""
with gr.Blocks(css=css) as demo:
    with gr.Column(elem_id="col-container"):
        gr.HTML(intro)

        with gr.Row():
            with gr.Column():
                image_src = gr.Image(type="filepath", label="Source Image", value="inputs/corgi_walking.png",)
                src_prompt = gr.Textbox(lines=2, label="Source Prompt", value="A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene.")
                tar_prompt = gr.Textbox(lines=2, label="Target Prompt", value="A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene. The dog and sidewalk are constructed entirely out of Lego bricks, showcasing a blocky and geometric appearance.")
                submit_button = gr.Button("Run FlowOpt", variant="primary")

                with gr.Row():
                    model_type = gr.Dropdown(["FLUX", "SD3"], label="Model Type", value="FLUX")
                    T_steps = gr.Slider(value=15, minimum=10, maximum=50, step=1, label="Total Steps", info="Total number of discretization steps.")
                    n_max = gr.Slider(value=13, minimum=1, maximum=15, step=1, label="n_max", info="Control the strength of the edit.")
                    eta = gr.Slider(value=0.0025, minimum=0.0001, maximum=0.05, label="eta", info="Control the optimization step-size (η).")
                    flowopt_iterations = gr.Number(value=8, minimum=1, maximum=15, label="flowopt_iterations", info="Max number of FlowOpt iterations (N).")

            with gr.Column():
                image_tar = gr.Gallery(
                    label="Outputs", show_label=True, format="png",
                    columns=[3], rows=[3], height="auto", elem_id="gallery-image",
                )
        with gr.Accordion(label="Advanced Settings", open=False):
            src_guidance_scale = gr.Slider(value=1.0, minimum=0.0, maximum=15.0, label="src_guidance_scale", info="Source prompt CFG scale.")
            tar_guidance_scale = gr.Slider(value=3.5, minimum=1.0, maximum=15.0, label="tar_guidance_scale", info="Target prompt CFG scale.")

    submit_button.click(
        fn=FlowOpt_run, 
        inputs=[
            image_src, model_type, T_steps, n_max, eta, flowopt_iterations,
            src_prompt, tar_prompt, src_guidance_scale, tar_guidance_scale,
        ],
        outputs=[image_tar],
    )

    gr.Examples(
        label="Examples",
        examples=get_examples(),
        inputs=[
            image_src, model_type, T_steps, n_max, eta,
            flowopt_iterations, src_prompt, tar_prompt,
            src_guidance_scale, tar_guidance_scale, image_tar,
        ],
        outputs=[image_tar],
    )

    model_type.input(fn=on_model_change, inputs=[model_type], outputs=[T_steps, n_max, eta])
    T_steps.change(fn=on_T_steps_change, inputs=[T_steps, n_max], outputs=[n_max])

demo.queue()
demo.launch()