Spaces:

KingNish
/

Image-Gen-Pro

Running on Zero

App Files Files Community

KingNish commited on Jul 14, 2024

Commit

57237e8

verified ·

1 Parent(s): e4cb9b4

remove blip captioning and more straight forward

Browse files

Files changed (1) hide show

app.py +12 -32

app.py CHANGED Viewed

@@ -48,51 +48,30 @@ pipe_edit = StableDiffusionXLInstructPix2PixPipeline.from_single_file( edit_file
 pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
 pipe_edit.to("cuda")
-from transformers import BlipProcessor, BlipForConditionalGeneration
-processor = BlipProcessor.from_pretrained("unography/blip-long-cap")
-model = BlipForConditionalGeneration.from_pretrained("unography/blip-long-cap", torch_dtype=torch.float16).to("cuda")
 # Generator
 @spaces.GPU(duration=30, queue=False)
 def king(type ,
         input_image ,
         instruction: str ,
-        steps: int = 8,
         randomize_seed: bool = False,
         seed: int = 25,
-        text_cfg_scale: float = 7.3,
-        image_cfg_scale: float = 1.7,
         width: int = 1024,
         height: int = 1024,
-        guidance_scale: float = 6,
         use_resolution_binning: bool = True,
         progress=gr.Progress(track_tqdm=True),
     ):
     if type=="Image Editing" :
         raw_image = Image.open(input_image).convert('RGB')
-        inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)
-        out = model.generate(**inputs, min_length=10, max_length=20)
-        caption = processor.decode(out[0], skip_special_tokens=True)
-        client1 = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-        system_instructions1 = "<s>[SYSTEM] Your task is to modify prompt by USER with edit text, and create new prompt for image generation, reply with prompt only, Your task is to reply with final prompt only. [USER]"
-        formatted_prompt = f"{system_instructions1} {caption} [EDIT] {instruction} [FINAL_PROMPT]"
-        stream = client1.text_generation(formatted_prompt, max_new_tokens=50, stream=True, details=True, return_full_text=False)
-        instructions = "".join([response.token.text for response in stream if response.token.text != "</s>"])
-        print(instructions)
         if randomize_seed:
             seed = random.randint(0, 99999)
-        text_cfg_scale = text_cfg_scale
-        image_cfg_scale = image_cfg_scale
-        input_image = input_image
-        steps=steps
         generator = torch.manual_seed(seed)
         output_image = pipe_edit(
             instructions, image=raw_image,
-            guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale,
             num_inference_steps=steps, generator=generator, output_type="latent",
         ).images
         refine = refiner(
             prompt=instructions,
             guidance_scale=guidance_scale,
@@ -193,6 +172,7 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column(scale=1):
             type = gr.Dropdown(["Image Generation","Image Editing"], label="Task", value="Image Generation",interactive=True)
         with gr.Column(scale=1):
             generate_button = gr.Button("Generate")
@@ -200,10 +180,14 @@ with gr.Blocks(css=css) as demo:
         input_image = gr.Image(label="Image", type='filepath', interactive=True)
     with gr.Row():
-        text_cfg_scale = gr.Number(value=7.3, step=0.1, label="Text CFG", interactive=True)
-        image_cfg_scale = gr.Number(value=1.7, step=0.1,label="Image CFG", interactive=True)
         guidance_scale = gr.Number(value=6.0, step=0.1, label="Image Generation Guidance Scale", interactive=True)
         steps = gr.Number(value=25, step=1, label="Steps", interactive=True)
         randomize_seed = gr.Radio(
                 ["Fix Seed", "Randomize Seed"],
                 value="Randomize Seed",
@@ -213,9 +197,7 @@ with gr.Blocks(css=css) as demo:
             )
         seed = gr.Number(value=1371, step=1, label="Seed", interactive=True)
-    with gr.Row():
-        width =  gr.Slider( label="Width", minimum=256, maximum=2048, step=64, value=1024)
-        height =  gr.Slider( label="Height", minimum=256, maximum=2048, step=64, value=1024)
     gr.Examples(
         examples=examples,
@@ -225,7 +207,7 @@ with gr.Blocks(css=css) as demo:
         cache_examples=False,
     )
-    gr.Markdown(help_text)
     instruction.change(fn=response, inputs=[instruction,input_image], outputs=type, queue=False)
@@ -242,8 +224,6 @@ with gr.Blocks(css=css) as demo:
                 steps,
                 randomize_seed,
                 seed,
-                text_cfg_scale,
-                image_cfg_scale,
                 width,
                 height,
                 guidance_scale,

 pipe_edit.scheduler = EDMEulerScheduler(sigma_min=0.002, sigma_max=120.0, sigma_data=1.0, prediction_type="v_prediction")
 pipe_edit.to("cuda")
 # Generator
 @spaces.GPU(duration=30, queue=False)
 def king(type ,
         input_image ,
         instruction: str ,
+        steps: int = 25,
         randomize_seed: bool = False,
         seed: int = 25,
         width: int = 1024,
         height: int = 1024,
+        guidance_scale: float = 7,
         use_resolution_binning: bool = True,
         progress=gr.Progress(track_tqdm=True),
     ):
     if type=="Image Editing" :
         raw_image = Image.open(input_image).convert('RGB')
         if randomize_seed:
             seed = random.randint(0, 99999)
         generator = torch.manual_seed(seed)
         output_image = pipe_edit(
             instructions, image=raw_image,
+            guidance_scale=guidance_scale,
             num_inference_steps=steps, generator=generator, output_type="latent",
         ).images
         refine = refiner(
             prompt=instructions,
             guidance_scale=guidance_scale,
     with gr.Row():
         with gr.Column(scale=1):
             type = gr.Dropdown(["Image Generation","Image Editing"], label="Task", value="Image Generation",interactive=True)
+            enhance_prompt = gr.Chackbox()
         with gr.Column(scale=1):
             generate_button = gr.Button("Generate")
         input_image = gr.Image(label="Image", type='filepath', interactive=True)
     with gr.Row():
         guidance_scale = gr.Number(value=6.0, step=0.1, label="Image Generation Guidance Scale", interactive=True)
         steps = gr.Number(value=25, step=1, label="Steps", interactive=True)
+    with gr.Row():
+        width =  gr.Slider( label="Width", minimum=256, maximum=2048, step=64, value=1024)
+        height =  gr.Slider( label="Height", minimum=256, maximum=2048, step=64, value=1024)
+    with gr.Row()
         randomize_seed = gr.Radio(
                 ["Fix Seed", "Randomize Seed"],
                 value="Randomize Seed",
             )
         seed = gr.Number(value=1371, step=1, label="Seed", interactive=True)
     gr.Examples(
         examples=examples,
         cache_examples=False,
     )
+    # gr.Markdown(help_text)
     instruction.change(fn=response, inputs=[instruction,input_image], outputs=type, queue=False)
                 steps,
                 randomize_seed,
                 seed,
                 width,
                 height,
                 guidance_scale,