init
Browse files
app.py
CHANGED
|
@@ -47,7 +47,7 @@ else:
|
|
| 47 |
@torch.inference_mode()
|
| 48 |
@spaces.GPU(duration=120)
|
| 49 |
# Multimodal Understanding function
|
| 50 |
-
def
|
| 51 |
# Clear CUDA cache before generating
|
| 52 |
torch.cuda.empty_cache()
|
| 53 |
|
|
@@ -84,19 +84,15 @@ def multimodal_understanding(image_src, question, seed, progress=gr.Progress(tra
|
|
| 84 |
single_batch["latitude_field"] = cam[2:].unsqueeze(0)
|
| 85 |
|
| 86 |
figs = make_perspective_figures(single_batch, single_batch, n_pairs=1)
|
|
|
|
| 87 |
for k, fig in figs.items():
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
suffix = "_lat"
|
| 92 |
-
else:
|
| 93 |
-
suffix = f"_{k}"
|
| 94 |
-
out_path = os.path.join(save_dir, f"{stem}_camera_map_vis{suffix}.png")
|
| 95 |
-
plt.tight_layout()
|
| 96 |
-
fig.savefig(out_path, dpi=200, bbox_inches='tight', pad_inches=0)
|
| 97 |
plt.close(fig)
|
|
|
|
| 98 |
|
| 99 |
-
return text
|
| 100 |
|
| 101 |
|
| 102 |
@torch.inference_mode()
|
|
@@ -190,11 +186,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 190 |
with gr.Tab("Camera Understanding"):
|
| 191 |
gr.Markdown(value="## Camera Understanding")
|
| 192 |
image_input = gr.Image()
|
| 193 |
-
with gr.Column():
|
| 194 |
-
question_input = gr.Textbox(label="Question")
|
| 195 |
|
| 196 |
understanding_button = gr.Button("Chat")
|
| 197 |
understanding_output = gr.Textbox(label="Response")
|
|
|
|
|
|
|
| 198 |
|
| 199 |
with gr.Accordion("Advanced options", open=False):
|
| 200 |
und_seed_input = gr.Number(label="Seed", precision=0, value=42)
|
|
@@ -202,16 +198,10 @@ with gr.Blocks(css=css) as demo:
|
|
| 202 |
examples_inpainting = gr.Examples(
|
| 203 |
label="Camera Understanding examples",
|
| 204 |
examples=[
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
"view.jpg",
|
| 208 |
-
],
|
| 209 |
-
[
|
| 210 |
-
"Briefly describe the image.",
|
| 211 |
-
"view.jpg",
|
| 212 |
-
],
|
| 213 |
],
|
| 214 |
-
inputs=
|
| 215 |
)
|
| 216 |
|
| 217 |
generation_button.click(
|
|
@@ -221,9 +211,9 @@ with gr.Blocks(css=css) as demo:
|
|
| 221 |
)
|
| 222 |
|
| 223 |
understanding_button.click(
|
| 224 |
-
|
| 225 |
-
inputs=[image_input,
|
| 226 |
-
outputs=understanding_output
|
| 227 |
)
|
| 228 |
|
| 229 |
demo.launch(share=True)
|
|
|
|
| 47 |
@torch.inference_mode()
|
| 48 |
@spaces.GPU(duration=120)
|
| 49 |
# Multimodal Understanding function
|
| 50 |
+
def camera_understanding(image_src, question, seed, progress=gr.Progress(track_tqdm=True)):
|
| 51 |
# Clear CUDA cache before generating
|
| 52 |
torch.cuda.empty_cache()
|
| 53 |
|
|
|
|
| 84 |
single_batch["latitude_field"] = cam[2:].unsqueeze(0)
|
| 85 |
|
| 86 |
figs = make_perspective_figures(single_batch, single_batch, n_pairs=1)
|
| 87 |
+
imgs = []
|
| 88 |
for k, fig in figs.items():
|
| 89 |
+
fig.canvas.draw()
|
| 90 |
+
img = np.array(fig.canvas.renderer.buffer_rgba())
|
| 91 |
+
imgs.append(img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
plt.close(fig)
|
| 93 |
+
merged_imgs = np.concatenate(imgs, axis=1)
|
| 94 |
|
| 95 |
+
return text, merged_imgs
|
| 96 |
|
| 97 |
|
| 98 |
@torch.inference_mode()
|
|
|
|
| 186 |
with gr.Tab("Camera Understanding"):
|
| 187 |
gr.Markdown(value="## Camera Understanding")
|
| 188 |
image_input = gr.Image()
|
|
|
|
|
|
|
| 189 |
|
| 190 |
understanding_button = gr.Button("Chat")
|
| 191 |
understanding_output = gr.Textbox(label="Response")
|
| 192 |
+
|
| 193 |
+
image_output = gr.Gallery(label="Camera Maps", columns=1, rows=1)
|
| 194 |
|
| 195 |
with gr.Accordion("Advanced options", open=False):
|
| 196 |
und_seed_input = gr.Number(label="Seed", precision=0, value=42)
|
|
|
|
| 198 |
examples_inpainting = gr.Examples(
|
| 199 |
label="Camera Understanding examples",
|
| 200 |
examples=[
|
| 201 |
+
"assets/1.jpg",
|
| 202 |
+
"assets/2.jpg",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
],
|
| 204 |
+
inputs=image_input,
|
| 205 |
)
|
| 206 |
|
| 207 |
generation_button.click(
|
|
|
|
| 211 |
)
|
| 212 |
|
| 213 |
understanding_button.click(
|
| 214 |
+
camera_understanding,
|
| 215 |
+
inputs=[image_input, und_seed_input],
|
| 216 |
+
outputs=[understanding_output, image_output]
|
| 217 |
)
|
| 218 |
|
| 219 |
demo.launch(share=True)
|