Spaces:

KangLiao
/

Puffin

Running on Zero

App Files Files Community

KangLiao commited on Oct 9

Commit

2d1f86e

1 Parent(s): da23342

init

Browse files

Files changed (1) hide show

app.py +15 -25

app.py CHANGED Viewed

@@ -47,7 +47,7 @@ else:
 @torch.inference_mode()
 @spaces.GPU(duration=120)
 # Multimodal Understanding function
-def multimodal_understanding(image_src, question, seed, progress=gr.Progress(track_tqdm=True)):
     # Clear CUDA cache before generating
     torch.cuda.empty_cache()
@@ -84,19 +84,15 @@ def multimodal_understanding(image_src, question, seed, progress=gr.Progress(tra
     single_batch["latitude_field"] = cam[2:].unsqueeze(0)
     figs = make_perspective_figures(single_batch, single_batch, n_pairs=1)
     for k, fig in figs.items():
-        if "up_field" in k:
-            suffix = "_up"
-        elif "latitude_field" in k:
-            suffix = "_lat"
-        else:
-            suffix = f"_{k}"
-        out_path = os.path.join(save_dir, f"{stem}_camera_map_vis{suffix}.png")
-        plt.tight_layout()
-        fig.savefig(out_path, dpi=200, bbox_inches='tight', pad_inches=0)
         plt.close(fig)
-    return text
 @torch.inference_mode()
@@ -190,11 +186,11 @@ with gr.Blocks(css=css) as demo:
     with gr.Tab("Camera Understanding"):
         gr.Markdown(value="## Camera Understanding")
         image_input = gr.Image()
-        with gr.Column():
-            question_input = gr.Textbox(label="Question")
         understanding_button = gr.Button("Chat")
         understanding_output = gr.Textbox(label="Response")
         with gr.Accordion("Advanced options", open=False):
             und_seed_input = gr.Number(label="Seed", precision=0, value=42)
@@ -202,16 +198,10 @@ with gr.Blocks(css=css) as demo:
         examples_inpainting = gr.Examples(
             label="Camera Understanding examples",
             examples=[
-                [
-                    "Is the picture taken in winter?",
-                    "view.jpg",
-                ],
-                [
-                    "Briefly describe the image.",
-                    "view.jpg",
-                ],
             ],
-            inputs=[question_input, image_input],
         )
     generation_button.click(
@@ -221,9 +211,9 @@ with gr.Blocks(css=css) as demo:
     )
     understanding_button.click(
-        multimodal_understanding,
-        inputs=[image_input, question_input, und_seed_input],
-        outputs=understanding_output
     )
 demo.launch(share=True)

 @torch.inference_mode()
 @spaces.GPU(duration=120)
 # Multimodal Understanding function
+def camera_understanding(image_src, question, seed, progress=gr.Progress(track_tqdm=True)):
     # Clear CUDA cache before generating
     torch.cuda.empty_cache()
     single_batch["latitude_field"] = cam[2:].unsqueeze(0)
     figs = make_perspective_figures(single_batch, single_batch, n_pairs=1)
+    imgs = []
     for k, fig in figs.items():
+        fig.canvas.draw()
+        img = np.array(fig.canvas.renderer.buffer_rgba())
+        imgs.append(img)
         plt.close(fig)
+    merged_imgs = np.concatenate(imgs, axis=1)
+    return text, merged_imgs
 @torch.inference_mode()
     with gr.Tab("Camera Understanding"):
         gr.Markdown(value="## Camera Understanding")
         image_input = gr.Image()
         understanding_button = gr.Button("Chat")
         understanding_output = gr.Textbox(label="Response")
+        image_output = gr.Gallery(label="Camera Maps", columns=1, rows=1)
         with gr.Accordion("Advanced options", open=False):
             und_seed_input = gr.Number(label="Seed", precision=0, value=42)
         examples_inpainting = gr.Examples(
             label="Camera Understanding examples",
             examples=[
+                "assets/1.jpg",
+                "assets/2.jpg",
             ],
+            inputs=image_input,
         )
     generation_button.click(
     )
     understanding_button.click(
+        camera_understanding,
+        inputs=[image_input, und_seed_input],
+        outputs=[understanding_output, image_output]
     )
 demo.launch(share=True)