File size: 2,531 Bytes
576f489
9c7cc43
 
 
 
 
576f489
 
9c7cc43
50e3c46
9c7cc43
923fb72
9c7cc43
 
 
576f489
9c7cc43
 
576f489
9c7cc43
 
 
 
 
 
576f489
0bd55bc
 
38f5514
0bd55bc
 
89d1322
0bd55bc
 
cfc9fb5
 
576f489
9c7cc43
 
 
576f489
9c7cc43
 
923fb72
576f489
923fb72
9c7cc43
38f5514
 
 
 
 
9c7cc43
576f489
0bd55bc
37171a1
 
923fb72
 
 
 
 
 
 
 
 
 
 
 
 
576f489
38f5514
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import random
from PIL import Image
import os
import torch
from transformers import BlipProcessor, BlipForConditionalGeneration


# Load your images
image_dir = "images"
images = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(".jpg")]
STATIC_IMAGE_PATH = "images/Places365_val_00000009.jpg"
# Load the model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Dummy inference function for now
def run_vlm(image, prompt):
    inputs = processor(image, prompt, return_tensors="pt").to(device)
    out = model.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)
# return f"Model output for: '{prompt}' and selected image."

def get_random_image(event=None):
    jpgs = [f for f in os.listdir(image_dir) if f.endswith(".jpg")]
    if not jpgs:
        return gr.update(value=STATIC_IMAGE_PATH), gr.update(value=STATIC_IMAGE_PATH)
    
    selected = os.path.join(image_dir, random.choice(jpgs))
    print(selected)
    return gr.update(value=selected), gr.update(value=selected)

        

def process(image_path, user_prompt):
    image = Image.open(image_path)
    return run_vlm(image, user_prompt)

with gr.Blocks() as demo:
    with gr.Row():
        image_display = gr.Image(value=STATIC_IMAGE_PATH, type="filepath", label="Selected Image")

    image_path = gr.Textbox(value=STATIC_IMAGE_PATH, visible=False)
    user_prompt = gr.Textbox(label="User Prompt")

    with gr.Row():
        random_button = gr.Button("Random Photo")
        run_button = gr.Button("Run Model")

    output = gr.Textbox(label="Model Output")

    random_button.click(fn=get_random_image, outputs=[image_display, image_path])
    run_button.click(fn=process, inputs=[image_path, user_prompt], outputs=output)

# with gr.Blocks() as demo:
#     with gr.Row():
#         image_display = gr.Image(type="filepath", label="Selected Image")
#         random_button = gr.Button("Randomize Photo")

#     image_path = gr.Textbox(visible=False)
#     user_prompt = gr.Textbox(label="User Prompt")
#     run_button = gr.Button("Run Model")
#     output = gr.Textbox(label="Model Output")

#     random_button.click(fn=random_image, outputs=[image_display, image_path])
#     run_button.click(fn=process, inputs=[image_path, user_prompt], outputs=output)


demo.launch()