Spaces:
Running
on
Zero
Running
on
Zero
remove blip captioning and more straight forward
Browse files
app.py
CHANGED
|
@@ -48,51 +48,30 @@ pipe_edit = StableDiffusionXLInstructPix2PixPipeline.from_single_file( edit_file
|
|
| 48 |
pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
|
| 49 |
pipe_edit.to("cuda")
|
| 50 |
|
| 51 |
-
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 52 |
-
|
| 53 |
-
processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
|
| 54 |
-
model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap", torch_dtype=torch.float16).to("cuda")
|
| 55 |
-
|
| 56 |
# Generator
|
| 57 |
@spaces.GPU(duration=30, queue=False)
|
| 58 |
def king(type ,
|
| 59 |
input_image ,
|
| 60 |
instruction: str ,
|
| 61 |
-
steps: int =
|
| 62 |
randomize_seed: bool = False,
|
| 63 |
seed: int = 25,
|
| 64 |
-
text_cfg_scale: float = 7.3,
|
| 65 |
-
image_cfg_scale: float = 1.7,
|
| 66 |
width: int = 1024,
|
| 67 |
height: int = 1024,
|
| 68 |
-
guidance_scale: float =
|
| 69 |
use_resolution_binning: bool = True,
|
| 70 |
progress=gr.Progress(track_tqdm=True),
|
| 71 |
):
|
| 72 |
if type=="Image Editing" :
|
| 73 |
raw_image = Image.open(input_image).convert('RGB')
|
| 74 |
-
inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
|
| 75 |
-
out = model.generate(**inputs, min_length=10, max_length=20)
|
| 76 |
-
caption = processor.decode(out[0], skip_special_tokens=True)
|
| 77 |
-
client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| 78 |
-
system_instructions1 = "<s>[SYSTEM] Your task is to modify prompt by USER with edit text, and create new prompt for image generation, reply with prompt only, Your task is to reply with final prompt only. [USER]"
|
| 79 |
-
formatted_prompt = f"{system_instructions1} {caption} [EDIT] {instruction} [FINAL_PROMPT]"
|
| 80 |
-
stream = client1.text_generation(formatted_prompt, max_new_tokens=50, stream=True, details=True, return_full_text=False)
|
| 81 |
-
instructions = "".join([response.token.text for response in stream if response.token.text != "</s>"])
|
| 82 |
-
print(instructions)
|
| 83 |
if randomize_seed:
|
| 84 |
seed = random.randint(0, 99999)
|
| 85 |
-
text_cfg_scale = text_cfg_scale
|
| 86 |
-
image_cfg_scale = image_cfg_scale
|
| 87 |
-
input_image = input_image
|
| 88 |
-
steps=steps
|
| 89 |
generator = torch.manual_seed(seed)
|
| 90 |
output_image = pipe_edit(
|
| 91 |
instructions, image=raw_image,
|
| 92 |
-
guidance_scale=
|
| 93 |
num_inference_steps=steps, generator=generator, output_type="latent",
|
| 94 |
).images
|
| 95 |
-
|
| 96 |
refine = refiner(
|
| 97 |
prompt=instructions,
|
| 98 |
guidance_scale=guidance_scale,
|
|
@@ -193,6 +172,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 193 |
with gr.Row():
|
| 194 |
with gr.Column(scale=1):
|
| 195 |
type = gr.Dropdown(["Image Generation","Image Editing"], label="Task", value="Image Generation",interactive=True)
|
|
|
|
| 196 |
with gr.Column(scale=1):
|
| 197 |
generate_button = gr.Button("Generate")
|
| 198 |
|
|
@@ -200,10 +180,14 @@ with gr.Blocks(css=css) as demo:
|
|
| 200 |
input_image = gr.Image(label="Image", type='filepath', interactive=True)
|
| 201 |
|
| 202 |
with gr.Row():
|
| 203 |
-
text_cfg_scale = gr.Number(value=7.3, step=0.1, label="Text CFG", interactive=True)
|
| 204 |
-
image_cfg_scale = gr.Number(value=1.7, step=0.1,label="Image CFG", interactive=True)
|
| 205 |
guidance_scale = gr.Number(value=6.0, step=0.1, label="Image Generation Guidance Scale", interactive=True)
|
| 206 |
steps = gr.Number(value=25, step=1, label="Steps", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
randomize_seed = gr.Radio(
|
| 208 |
["Fix Seed", "Randomize Seed"],
|
| 209 |
value="Randomize Seed",
|
|
@@ -213,9 +197,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 213 |
)
|
| 214 |
seed = gr.Number(value=1371, step=1, label="Seed", interactive=True)
|
| 215 |
|
| 216 |
-
|
| 217 |
-
width = gr.Slider( label="Width", minimum=256, maximum=2048, step=64, value=1024)
|
| 218 |
-
height = gr.Slider( label="Height", minimum=256, maximum=2048, step=64, value=1024)
|
| 219 |
|
| 220 |
gr.Examples(
|
| 221 |
examples=examples,
|
|
@@ -225,7 +207,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 225 |
cache_examples=False,
|
| 226 |
)
|
| 227 |
|
| 228 |
-
gr.Markdown(help_text)
|
| 229 |
|
| 230 |
instruction.change(fn=response, inputs=[instruction,input_image], outputs=type, queue=False)
|
| 231 |
|
|
@@ -242,8 +224,6 @@ with gr.Blocks(css=css) as demo:
|
|
| 242 |
steps,
|
| 243 |
randomize_seed,
|
| 244 |
seed,
|
| 245 |
-
text_cfg_scale,
|
| 246 |
-
image_cfg_scale,
|
| 247 |
width,
|
| 248 |
height,
|
| 249 |
guidance_scale,
|
|
|
|
| 48 |
pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
|
| 49 |
pipe_edit.to("cuda")
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
# Generator
|
| 52 |
@spaces.GPU(duration=30, queue=False)
|
| 53 |
def king(type ,
|
| 54 |
input_image ,
|
| 55 |
instruction: str ,
|
| 56 |
+
steps: int = 25,
|
| 57 |
randomize_seed: bool = False,
|
| 58 |
seed: int = 25,
|
|
|
|
|
|
|
| 59 |
width: int = 1024,
|
| 60 |
height: int = 1024,
|
| 61 |
+
guidance_scale: float = 7,
|
| 62 |
use_resolution_binning: bool = True,
|
| 63 |
progress=gr.Progress(track_tqdm=True),
|
| 64 |
):
|
| 65 |
if type=="Image Editing" :
|
| 66 |
raw_image = Image.open(input_image).convert('RGB')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
if randomize_seed:
|
| 68 |
seed = random.randint(0, 99999)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
generator = torch.manual_seed(seed)
|
| 70 |
output_image = pipe_edit(
|
| 71 |
instructions, image=raw_image,
|
| 72 |
+
guidance_scale=guidance_scale,
|
| 73 |
num_inference_steps=steps, generator=generator, output_type="latent",
|
| 74 |
).images
|
|
|
|
| 75 |
refine = refiner(
|
| 76 |
prompt=instructions,
|
| 77 |
guidance_scale=guidance_scale,
|
|
|
|
| 172 |
with gr.Row():
|
| 173 |
with gr.Column(scale=1):
|
| 174 |
type = gr.Dropdown(["Image Generation","Image Editing"], label="Task", value="Image Generation",interactive=True)
|
| 175 |
+
enhance_prompt = gr.Chackbox()
|
| 176 |
with gr.Column(scale=1):
|
| 177 |
generate_button = gr.Button("Generate")
|
| 178 |
|
|
|
|
| 180 |
input_image = gr.Image(label="Image", type='filepath', interactive=True)
|
| 181 |
|
| 182 |
with gr.Row():
|
|
|
|
|
|
|
| 183 |
guidance_scale = gr.Number(value=6.0, step=0.1, label="Image Generation Guidance Scale", interactive=True)
|
| 184 |
steps = gr.Number(value=25, step=1, label="Steps", interactive=True)
|
| 185 |
+
|
| 186 |
+
with gr.Row():
|
| 187 |
+
width = gr.Slider( label="Width", minimum=256, maximum=2048, step=64, value=1024)
|
| 188 |
+
height = gr.Slider( label="Height", minimum=256, maximum=2048, step=64, value=1024)
|
| 189 |
+
|
| 190 |
+
with gr.Row()
|
| 191 |
randomize_seed = gr.Radio(
|
| 192 |
["Fix Seed", "Randomize Seed"],
|
| 193 |
value="Randomize Seed",
|
|
|
|
| 197 |
)
|
| 198 |
seed = gr.Number(value=1371, step=1, label="Seed", interactive=True)
|
| 199 |
|
| 200 |
+
|
|
|
|
|
|
|
| 201 |
|
| 202 |
gr.Examples(
|
| 203 |
examples=examples,
|
|
|
|
| 207 |
cache_examples=False,
|
| 208 |
)
|
| 209 |
|
| 210 |
+
# gr.Markdown(help_text)
|
| 211 |
|
| 212 |
instruction.change(fn=response, inputs=[instruction,input_image], outputs=type, queue=False)
|
| 213 |
|
|
|
|
| 224 |
steps,
|
| 225 |
randomize_seed,
|
| 226 |
seed,
|
|
|
|
|
|
|
| 227 |
width,
|
| 228 |
height,
|
| 229 |
guidance_scale,
|