Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,11 +7,7 @@ from diffusers import DiffusionPipeline
|
|
| 7 |
import random
|
| 8 |
import numpy as np
|
| 9 |
import os
|
| 10 |
-
import subprocess
|
| 11 |
from qwen_vl_utils import process_vision_info
|
| 12 |
-
from threading import Thread
|
| 13 |
-
import uuid
|
| 14 |
-
import io
|
| 15 |
|
| 16 |
# Initialize models
|
| 17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -36,10 +32,10 @@ enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchan
|
|
| 36 |
MAX_SEED = np.iinfo(np.int32).max
|
| 37 |
MAX_IMAGE_SIZE = 1024 # Reduced to prevent memory issues
|
| 38 |
|
| 39 |
-
# Qwen2VL caption function
|
| 40 |
@spaces.GPU
|
| 41 |
def qwen_caption(image):
|
| 42 |
-
# Convert image to PIL if
|
| 43 |
if not isinstance(image, Image.Image):
|
| 44 |
image = Image.fromarray(image)
|
| 45 |
|
|
@@ -48,7 +44,8 @@ def qwen_caption(image):
|
|
| 48 |
"role": "user",
|
| 49 |
"content": [
|
| 50 |
{"type": "image", "image": image},
|
| 51 |
-
|
|
|
|
| 52 |
],
|
| 53 |
}
|
| 54 |
]
|
|
@@ -77,7 +74,7 @@ def qwen_caption(image):
|
|
| 77 |
|
| 78 |
return output_text
|
| 79 |
|
| 80 |
-
# Prompt Enhancer function
|
| 81 |
def enhance_prompt(input_prompt):
|
| 82 |
result = enhancer_long("Enhance the description: " + input_prompt)
|
| 83 |
enhanced_text = result[0]['summary_text']
|
|
@@ -86,10 +83,8 @@ def enhance_prompt(input_prompt):
|
|
| 86 |
@spaces.GPU(duration=190)
|
| 87 |
def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
|
| 88 |
if image is not None:
|
| 89 |
-
# Convert image to PIL if it's not already
|
| 90 |
if not isinstance(image, Image.Image):
|
| 91 |
image = Image.fromarray(image)
|
| 92 |
-
|
| 93 |
prompt = qwen_caption(image)
|
| 94 |
print(prompt)
|
| 95 |
else:
|
|
@@ -103,10 +98,9 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
|
|
| 103 |
|
| 104 |
generator = torch.Generator(device=device).manual_seed(seed)
|
| 105 |
|
| 106 |
-
#
|
| 107 |
torch.cuda.empty_cache()
|
| 108 |
|
| 109 |
-
# Generate image with FLUX.1-dev
|
| 110 |
try:
|
| 111 |
image = pipe(
|
| 112 |
prompt=prompt,
|
|
@@ -164,10 +158,10 @@ with gr.Blocks(css=custom_css) as demo:
|
|
| 164 |
use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
|
| 165 |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
| 166 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
| 167 |
-
width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
| 168 |
-
height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
| 169 |
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
|
| 170 |
-
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=20)
|
| 171 |
|
| 172 |
generate_btn = gr.Button("Generate Image + Prompt Enhanced", elem_classes="submit-btn")
|
| 173 |
|
|
|
|
| 7 |
import random
|
| 8 |
import numpy as np
|
| 9 |
import os
|
|
|
|
| 10 |
from qwen_vl_utils import process_vision_info
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Initialize models
|
| 13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 32 |
MAX_SEED = np.iinfo(np.int32).max
|
| 33 |
MAX_IMAGE_SIZE = 1024 # Reduced to prevent memory issues
|
| 34 |
|
| 35 |
+
# Qwen2VL caption function – updated to request plain text caption instead of JSON
|
| 36 |
@spaces.GPU
|
| 37 |
def qwen_caption(image):
|
| 38 |
+
# Convert image to PIL if needed
|
| 39 |
if not isinstance(image, Image.Image):
|
| 40 |
image = Image.fromarray(image)
|
| 41 |
|
|
|
|
| 44 |
"role": "user",
|
| 45 |
"content": [
|
| 46 |
{"type": "image", "image": image},
|
| 47 |
+
# Removed "in the form of JSON data {}" to get plain text caption
|
| 48 |
+
{"type": "text", "text": "Generate a detailed and optimized caption for the given image."},
|
| 49 |
],
|
| 50 |
}
|
| 51 |
]
|
|
|
|
| 74 |
|
| 75 |
return output_text
|
| 76 |
|
| 77 |
+
# Prompt Enhancer function (unchanged)
|
| 78 |
def enhance_prompt(input_prompt):
|
| 79 |
result = enhancer_long("Enhance the description: " + input_prompt)
|
| 80 |
enhanced_text = result[0]['summary_text']
|
|
|
|
| 83 |
@spaces.GPU(duration=190)
|
| 84 |
def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
|
| 85 |
if image is not None:
|
|
|
|
| 86 |
if not isinstance(image, Image.Image):
|
| 87 |
image = Image.fromarray(image)
|
|
|
|
| 88 |
prompt = qwen_caption(image)
|
| 89 |
print(prompt)
|
| 90 |
else:
|
|
|
|
| 98 |
|
| 99 |
generator = torch.Generator(device=device).manual_seed(seed)
|
| 100 |
|
| 101 |
+
# Clear GPU cache before generating the image
|
| 102 |
torch.cuda.empty_cache()
|
| 103 |
|
|
|
|
| 104 |
try:
|
| 105 |
image = pipe(
|
| 106 |
prompt=prompt,
|
|
|
|
| 158 |
use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
|
| 159 |
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
|
| 160 |
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
|
| 161 |
+
width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
| 162 |
+
height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
|
| 163 |
guidance_scale = gr.Slider(label="Guidance Scale", minimum=1, maximum=15, step=0.1, value=3.5)
|
| 164 |
+
num_inference_steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=20)
|
| 165 |
|
| 166 |
generate_btn = gr.Button("Generate Image + Prompt Enhanced", elem_classes="submit-btn")
|
| 167 |
|