Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import spaces
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
|
|
|
| 4 |
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor, pipeline
|
| 5 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 6 |
import re
|
|
@@ -56,6 +57,10 @@ MAX_SEED = 2**32 - 1
|
|
| 56 |
|
| 57 |
# Florence caption function
|
| 58 |
def florence_caption(image):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
|
| 60 |
generated_ids = florence_model.generate(
|
| 61 |
input_ids=inputs["input_ids"],
|
|
@@ -137,10 +142,13 @@ def generate_image(prompt, negative_prompt, seed, randomize_seed, width, height,
|
|
| 137 |
|
| 138 |
return image, seed
|
| 139 |
|
| 140 |
-
# Gradio Interface
|
| 141 |
@spaces.GPU
|
| 142 |
def process_workflow(image, text_prompt, vlm_model_choice, use_enhancer, model_choice, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
|
| 143 |
if image is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
if vlm_model_choice == "Long Captioner":
|
| 145 |
prompt = create_captions_rich(image)
|
| 146 |
else: # Florence
|
|
|
|
| 1 |
import spaces
|
| 2 |
import gradio as gr
|
| 3 |
import torch
|
| 4 |
+
from PIL import Image
|
| 5 |
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor, pipeline
|
| 6 |
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 7 |
import re
|
|
|
|
| 57 |
|
| 58 |
# Florence caption function
|
| 59 |
def florence_caption(image):
|
| 60 |
+
# Convert image to PIL if it's not already
|
| 61 |
+
if not isinstance(image, Image.Image):
|
| 62 |
+
image = Image.fromarray(image)
|
| 63 |
+
|
| 64 |
inputs = florence_processor(text="<MORE_DETAILED_CAPTION>", images=image, return_tensors="pt").to(device)
|
| 65 |
generated_ids = florence_model.generate(
|
| 66 |
input_ids=inputs["input_ids"],
|
|
|
|
| 142 |
|
| 143 |
return image, seed
|
| 144 |
|
|
|
|
| 145 |
@spaces.GPU
|
| 146 |
def process_workflow(image, text_prompt, vlm_model_choice, use_enhancer, model_choice, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps):
|
| 147 |
if image is not None:
|
| 148 |
+
# Convert image to PIL if it's not already
|
| 149 |
+
if not isinstance(image, Image.Image):
|
| 150 |
+
image = Image.fromarray(image)
|
| 151 |
+
|
| 152 |
if vlm_model_choice == "Long Captioner":
|
| 153 |
prompt = create_captions_rich(image)
|
| 154 |
else: # Florence
|