Spaces:
Running
on
Zero
Running
on
Zero
File size: 17,763 Bytes
8d5a128 8bf0e8e d576102 8d5a128 2ca8c63 8d5a128 2ca8c63 8d5a128 2ca8c63 8d5a128 2ca8c63 8d5a128 2ca8c63 8d5a128 343dd8b 0660e53 8ab3e1a 8d5a128 8bf0e8e 8d5a128 a96a4e2 8d5a128 db2546f 8d5a128 8ab3e1a 8d5a128 cc6f003 8d5a128 db2546f 8d5a128 2ca8c63 8d5a128 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
import os
import random
from typing import Tuple
import numpy as np
import spaces
import torch
from diffusers import FluxPipeline, StableDiffusion3Pipeline
from PIL import Image
import gradio as gr
from utils.flux import flux_editing
from utils.sd3 import sd3_editing
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe_sd3 = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float16, token=os.getenv('HF_ACCESS_TOK'))
pipe_flux = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.float16, token=os.getenv('HF_ACCESS_TOK'))
def seed_everything(seed: int) -> None:
"""
Set the random seed for reproducibility.
Args:
seed (int): The seed value to set.
"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def on_T_steps_change(T_steps: int, n_max: int) -> gr.update:
"""
Update the maximum and value of the n_max slider based on T_steps.
Args:
T_steps (int): The current value of the T_steps slider.
n_max (int): The current value of the n_max slider.
Returns:
gr.update: An update object to modify the n_max slider.
"""
# If n_max > T_steps, clamp it down to T_steps
new_value = min(n_max, T_steps)
return gr.update(maximum=T_steps, value=new_value)
def on_model_change(model_type: str) -> Tuple[int, int, float]:
if model_type == 'SD3':
T_steps_value = 15
n_max_value = 12
eta_value = 0.01
elif model_type == 'FLUX':
T_steps_value = 15
n_max_value = 13
eta_value = 0.0025
else:
raise NotImplementedError(f"Model type {model_type} not implemented")
return T_steps_value, n_max_value, eta_value
def get_examples():
case = [
["inputs/corgi_walking.png", "FLUX", 15, 13, 0.0025, 7, "A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene.", "A cute brown and white guinea pig walking on a sidewalk near a body of water. The guinea pig is wearing a pink vest, adding a touch of color to the scene.", 1.0, 3.5, [(f"example_outputs/corgi_walking/guinea_pig/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/corgi_walking.png", "SD3", 15, 12, 0.01, 7, "A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene.", "A cute brown and white rabbit walking on a sidewalk near a body of water. The rabbit is wearing a pink vest, adding a touch of color to the scene.", 1.0, 3.5, [(f"example_outputs/corgi_walking/rabbit/sd3_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/puppies.png", "FLUX", 15, 13, 0.0025, 7, "Two adorable golden retriever puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting.", "Two adorable crochet golden retriever puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting or enjoying the outdoor environment.", 1.0, 3.5, [(f"example_outputs/puppies/crochet/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/puppies.png", "SD3", 15, 12, 0.01, 5, "Two adorable golden retriever puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting.", "Two adorable husky puppies sitting in a grassy field. They are positioned close to each other, with one dog on the left and the other on the right. Both dogs have their mouths open, possibly panting or enjoying the outdoor environment.", 1.0, 3.5, [(f"example_outputs/puppies/husky/sd3_iterations={i}.png", f"Iteration {i}") for i in range(6)]],
["inputs/iguana.png", "FLUX", 15, 13, 0.0025, 7, "A large orange lizard sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", "A large crochet lizard sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", 1.0, 3.5, [(f"example_outputs/iguana/crochet/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/iguana.png", "FLUX", 15, 13, 0.0025, 7, "A large orange lizard sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", "A large lizard made out of lego bricks sitting on a rock near the ocean. The lizard is positioned in the center of the scene, with the ocean waves visible in the background. The rock is located close to the water, providing a picturesque setting for the lizard''s resting spot.", 1.0, 3.5, [(f"example_outputs/iguana/lego_bricks/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/cow_grass2.png", "FLUX", 15, 12, 0.0025, 6, "A large brown and white cow standing in a grassy field. The cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", "A large cow made out of colorful toy bricks standing in a grassy field. The colorful toy brick cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", 1.0, 3.5, [(f"example_outputs/cow_grass2/colorful_toy_bricks/flux_iterations={i}.png", f"Iteration {i}") for i in range(7)]],
["inputs/cow_grass2.png", "FLUX", 15, 13, 0.0025, 5, "A large brown and white cow standing in a grassy field. The cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", "A large cow made out of flowers standing in a grassy field. The flower cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", 1.0, 3.5, [(f"example_outputs/cow_grass2/flowers/flux_iterations={i}.png", f"Iteration {i}") for i in range(6)]],
["inputs/cow_grass2.png", "SD3", 15, 12, 0.01, 8, "A large brown and white cow standing in a grassy field. The cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", "A large cow made out of wooden blocks standing in a grassy field. The wooden block cow is positioned towards the center of the scene. The field is lush and green, providing a perfect environment for the cow to graze.", 1.0, 3.5, [(f"example_outputs/cow_grass2/wooden_blocks/sd3_iterations={i}.png", f"Iteration {i}") for i in range(9)]],
["inputs/cat_fridge.png", "SD3", 15, 12, 0.01, 8, "A cat sitting on top of a counter in a store. The cat is positioned towards the right side of the counter, and it appears to be looking at the camera. The store has a variety of items displayed, including several bottles scattered around the counter.", "A cat sitting on top of a counter in a store, with the cat and counter crafted using origami folded paper art techniques. The cat has a delicate and intricate appearance, with paper folds used to create its fur and features. The store has a variety of items displayed, including several bottles scattered around the counter.", 1.0, 3.5, [(f"example_outputs/cat_fridge/origami/sd3_iterations={i}.png", f"Iteration {i}") for i in range(9)]],
["inputs/cat.png", "FLUX", 15, 13, 0.0025, 7, "A small, fluffy kitten sitting in a grassy field. The kitten is positioned in the center of the scene, surrounded by a field. The kitten appears to be looking at something in the field.", "A small bear cub sitting in a grassy field. The bear cub is positioned in the center of the scene, surrounded by a field. The bear cub appears to be looking at something in the field.", 1.0, 3.5, [(f"example_outputs/cat/bear/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/cat.png", "SD3", 15, 12, 0.01, 6, "A small, fluffy kitten sitting in a grassy field. The kitten is positioned in the center of the scene, surrounded by a field. The kitten appears to be looking at something in the field.", "A small puppy sitting in a grassy field. The puppy is positioned in the center of the scene, surrounded by a field. The puppy appears to be looking at something in the field.", 1.0, 3.5, [(f"example_outputs/cat/puppy/sd3_iterations={i}.png", f"Iteration {i}") for i in range(7)]],
["inputs/wolf_grass.png", "FLUX", 15, 13, 0.0025, 7, "A wolf standing in a grassy field with yellow flowers. The wolf is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", "A fox standing in a grassy field with yellow flowers. The fox is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", 1.0, 3.5, [(f"example_outputs/wolf_grass/fox/flux_iterations={i}.png", f"Iteration {i}") for i in range(8)]],
["inputs/wolf_grass.png", "SD3", 15, 12, 0.01, 4, "A wolf standing in a grassy field with yellow flowers. The wolf is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", "A baby deer standing in a grassy field with yellow flowers. The baby deer is positioned towards the center of the scene, and its body is facing the camera. The field is filled with grass, and the yellow flowers are scattered throughout the area.", 1.0, 3.5, [(f"example_outputs/wolf_grass/deer/sd3_iterations={i}.png", f"Iteration {i}") for i in range(5)]],
]
return case
def get_duration(
image_src_val: str, model_type_val: str, T_steps_val: int,
n_max_val: int, eta_val: float, flowopt_iterations_val: int,
src_prompt_val: str, tar_prompt_val: str,
src_guidance_scale_val: float, tar_guidance_scale_val: float,
) -> float:
step_duration = 0.6 if model_type_val == 'FLUX' else 0.2
return (n_max_val * (flowopt_iterations_val + 2) * step_duration) + 45
@spaces.GPU(duration=get_duration)
def FlowOpt_run(
image_src_val: str, model_type_val: str, T_steps_val: int,
n_max_val: int, eta_val: float, flowopt_iterations_val: int,
src_prompt_val: str, tar_prompt_val: str,
src_guidance_scale_val: float, tar_guidance_scale_val: float,
):
if not len(src_prompt_val):
raise gr.Error("Source prompt cannot be empty")
if not len(tar_prompt_val):
raise gr.Error("Target prompt cannot be empty")
if model_type_val == 'FLUX':
pipe = pipe_flux.to(device)
elif model_type_val == 'SD3':
pipe = pipe_sd3.to(device)
else:
raise NotImplementedError(f"Model type {model_type_val} not implemented")
scheduler = pipe.scheduler
# set seed
seed = 1024
seed_everything(seed)
# load image
image = Image.open(image_src_val)
# crop image to have both dimensions divisibe by 16 - avoids issues with resizing
image = image.crop((0, 0, image.width - image.width % 16, image.height - image.height % 16))
image_src_val = pipe.image_processor.preprocess(image)
# cast image to half precision
image_src_val = image_src_val.to(device).half()
with torch.autocast("cuda"), torch.inference_mode():
x0_src_denorm = pipe.vae.encode(image_src_val).latent_dist.mode()
x0_src = (x0_src_denorm - pipe.vae.config.shift_factor) * pipe.vae.config.scaling_factor
# send to cuda
x0_src = x0_src.to(device)
negative_prompt = "" # (SD3)
if model_type_val == 'SD3':
yield from sd3_editing(
pipe, scheduler, T_steps_val, n_max_val, x0_src,
src_prompt_val, tar_prompt_val, negative_prompt,
src_guidance_scale_val, tar_guidance_scale_val,
flowopt_iterations_val, eta_val,
)
elif model_type_val == 'FLUX':
yield from flux_editing(
pipe, scheduler, T_steps_val, n_max_val, x0_src,
src_prompt_val, tar_prompt_val,
src_guidance_scale_val, tar_guidance_scale_val,
flowopt_iterations_val, eta_val,
)
else:
raise NotImplementedError(f"Sampler type {model_type_val} not implemented")
intro = """
<h1 style="font-weight: 1000; text-align: center; margin: 0px;">FlowOpt: Fast Optimization Through Whole Flow Processes for Training-Free Editing</h1>
<h3 style="margin-bottom: 10px; text-align: center;">
<a href="https://arxiv.org/abs/2510.22010">[Paper]</a> |
<a href="https://orronai.github.io/FlowOpt/">[Project Page]</a> |
<a href="https://github.com/orronai/FlowOpt">[Code]</a>
</h3>
<br> 🎨 Edit your image using FlowOpt for Flow models! Upload an image, add a description of it, and specify the edits you want to make.
<h3>Notes:</h3>
<ol>
<li>We use FLUX.1 dev and SD3 for the demo. The models are large and may take a while to load.</li>
<li>We recommend 1024x1024 images for the best results. If the input images are too large, there may be out-of-memory errors. For other resolutions, we encourage you to find a suitable set of hyperparameters.</li>
<li>Default hyperparameters for each model used in the paper are provided as examples.</li>
</ol>
"""
css="""
#col-container {
margin: 0 auto;
max-width: 960px;
}
#gallery-image img {
width: 100%; /* match column width */
height: auto; /* preserve aspect ratio */
object-fit: contain;
}
/* Hide thumbnails by default */
#gallery-image .gallery-container .preview .thumbnails {
opacity: 0;
transition: opacity 0.3s ease-in-out;
}
/* Show thumbnails only when hovering over the gallery */
#gallery-image:hover .gallery-container .preview .thumbnails {
opacity: 1;
}
/* Hide caption by default */
.gallery-container .preview .caption {
opacity: 0;
transition: opacity 0.3s ease-in-out;
}
/* Show caption when hovering over #gallery-image */
#gallery-image:hover .gallery-container .preview .caption {
opacity: 1;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(intro)
with gr.Row():
with gr.Column():
image_src = gr.Image(type="filepath", label="Source Image", value="inputs/corgi_walking.png",)
src_prompt = gr.Textbox(lines=2, label="Source Prompt", value="A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene.")
tar_prompt = gr.Textbox(lines=2, label="Target Prompt", value="A cute brown and white dog walking on a sidewalk near a body of water. The dog is wearing a pink vest, adding a touch of color to the scene. The dog and sidewalk are constructed entirely out of Lego bricks, showcasing a blocky and geometric appearance.")
submit_button = gr.Button("Run FlowOpt", variant="primary")
with gr.Row():
model_type = gr.Dropdown(["FLUX", "SD3"], label="Model Type", value="FLUX")
T_steps = gr.Slider(value=15, minimum=10, maximum=50, step=1, label="Total Steps", info="Total number of discretization steps.")
n_max = gr.Slider(value=13, minimum=1, maximum=15, step=1, label="n_max", info="Control the strength of the edit.")
eta = gr.Slider(value=0.0025, minimum=0.0001, maximum=0.05, label="eta", info="Control the optimization step-size (η).")
flowopt_iterations = gr.Number(value=8, minimum=1, maximum=15, label="flowopt_iterations", info="Max number of FlowOpt iterations (N).")
with gr.Column():
image_tar = gr.Gallery(
label="Outputs", show_label=True, format="png",
columns=[3], rows=[3], height="auto", elem_id="gallery-image",
)
with gr.Accordion(label="Advanced Settings", open=False):
src_guidance_scale = gr.Slider(value=1.0, minimum=0.0, maximum=15.0, label="src_guidance_scale", info="Source prompt CFG scale.")
tar_guidance_scale = gr.Slider(value=3.5, minimum=1.0, maximum=15.0, label="tar_guidance_scale", info="Target prompt CFG scale.")
submit_button.click(
fn=FlowOpt_run,
inputs=[
image_src, model_type, T_steps, n_max, eta, flowopt_iterations,
src_prompt, tar_prompt, src_guidance_scale, tar_guidance_scale,
],
outputs=[image_tar],
)
gr.Examples(
label="Examples",
examples=get_examples(),
inputs=[
image_src, model_type, T_steps, n_max, eta,
flowopt_iterations, src_prompt, tar_prompt,
src_guidance_scale, tar_guidance_scale, image_tar,
],
outputs=[image_tar],
)
model_type.input(fn=on_model_change, inputs=[model_type], outputs=[T_steps, n_max, eta])
T_steps.change(fn=on_T_steps_change, inputs=[T_steps, n_max], outputs=[n_max])
demo.queue()
demo.launch()
|