Spaces:

KangLiao
/

Puffin

Running on Zero

App Files Files Community

KangLiao commited on Oct 9

Commit

cc8a5f7

1 Parent(s): f18fdea

init

Browse files

Files changed (2) hide show

.gitattributes +1 -0
app.py +34 -11

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import torch
 from PIL import Image
 import numpy as np
 import spaces  # Import spaces for ZeroGPU compatibility
@@ -47,6 +48,26 @@ checkpoint_path = "checkpoints/Puffin-Base.pth"
 checkpoint = torch.load(checkpoint_path)
 info = model.load_state_dict(checkpoint, strict=False)
 @torch.inference_mode()
 @spaces.GPU(duration=120)
@@ -88,23 +109,23 @@ def camera_understanding(image_src, question, seed, progress=gr.Progress(track_t
     single_batch["latitude_field"] = cam[2:].unsqueeze(0)
     figs = make_perspective_figures(single_batch, single_batch, n_pairs=1)
-    imgs = []
     for k, fig in figs.items():
-        fig.canvas.draw()
-        img = np.array(fig.canvas.renderer.buffer_rgba())
-        imgs.append(img)
         plt.close(fig)
-    merged_imgs = np.concatenate(imgs, axis=1)
-    return text, merged_imgs
 @torch.inference_mode()
 @spaces.GPU(duration=120)  # Specify a duration to avoid timeout
 def generate_image(prompt_scene,
                    seed=42,
-                   roll=3,
-                   pitch=1.0,
                    fov=1.0,
                    progress=gr.Progress(track_tqdm=True)):
     # Clear CUDA cache and avoid tracking gradients
@@ -126,6 +147,7 @@ def generate_image(prompt_scene,
     cam_map = cam_map / (math.pi / 2)
     prompt = prompt_scene + " " + prompt_camera
     bsz = 4
     with torch.no_grad():
@@ -167,7 +189,7 @@ with gr.Blocks(css=css) as demo:
                 roll = gr.Slider(minimum=-0.7854, maximum=0.7854, value=0.1000, step=0.1000, label="roll value")
                 pitch = gr.Slider(minimum=-0.7854, maximum=0.7854, value=-0.1000, step=0.1000, label="pitch value")
                 fov = gr.Slider(minimum=0.3491, maximum=1.8326, value=1.5000, step=0.1000, label="fov value")
-            seed_input = gr.Number(label="Seed (Optional)", precision=0, value=1234)
         generation_button = gr.Button("Generate Images")
@@ -192,7 +214,8 @@ with gr.Blocks(css=css) as demo:
         understanding_button = gr.Button("Chat")
         understanding_output = gr.Textbox(label="Response")
-        camera_output = gr.Gallery(label="Camera Maps", columns=1, rows=1)
         with gr.Accordion("Advanced options", open=False):
             und_seed_input = gr.Number(label="Seed", precision=0, value=42)
@@ -215,7 +238,7 @@ with gr.Blocks(css=css) as demo:
     understanding_button.click(
         camera_understanding,
         inputs=[image_input, und_seed_input],
-        outputs=[understanding_output, camera_output]
     )
 demo.launch(share=True)

 import gradio as gr
 import torch
+import io
 from PIL import Image
 import numpy as np
 import spaces  # Import spaces for ZeroGPU compatibility
 checkpoint = torch.load(checkpoint_path)
 info = model.load_state_dict(checkpoint, strict=False)
+def fig_to_image(fig):
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
+    buf.seek(0)
+    img = Image.open(buf).convert('RGB')
+    buf.close()
+    return img
+def extract_up_lat_figs(fig_dict):
+    fig_up, fig_lat = None, None
+    others = {}
+    for k, fig in fig_dict.items():
+        if ("up_field" in k) and (fig_up is None):
+            fig_up = fig
+        elif ("latitude_field" in k) and (fig_lat is None):
+            fig_lat = fig
+        else:
+            others[k] = fig
+    return fig_up, fig_lat, others
 @torch.inference_mode()
 @spaces.GPU(duration=120)
     single_batch["latitude_field"] = cam[2:].unsqueeze(0)
     figs = make_perspective_figures(single_batch, single_batch, n_pairs=1)
+    up_img = lat_img = None
     for k, fig in figs.items():
+        if "up_field" in k:
+            up_img = fig_to_image(fig)
+        elif "latitude_field" in k:
+            lat_img = fig_to_image(fig)
         plt.close(fig)
+    return text, up_img, lat_img
 @torch.inference_mode()
 @spaces.GPU(duration=120)  # Specify a duration to avoid timeout
 def generate_image(prompt_scene,
                    seed=42,
+                   roll=0.1,
+                   pitch=0.1,
                    fov=1.0,
                    progress=gr.Progress(track_tqdm=True)):
     # Clear CUDA cache and avoid tracking gradients
     cam_map = cam_map / (math.pi / 2)
     prompt = prompt_scene + " " + prompt_camera
+    print("prompt:", prompt)
     bsz = 4
     with torch.no_grad():
                 roll = gr.Slider(minimum=-0.7854, maximum=0.7854, value=0.1000, step=0.1000, label="roll value")
                 pitch = gr.Slider(minimum=-0.7854, maximum=0.7854, value=-0.1000, step=0.1000, label="pitch value")
                 fov = gr.Slider(minimum=0.3491, maximum=1.8326, value=1.5000, step=0.1000, label="fov value")
+            seed_input = gr.Number(label="Seed (Optional)", precision=0, value=42)
         generation_button = gr.Button("Generate Images")
         understanding_button = gr.Button("Chat")
         understanding_output = gr.Textbox(label="Response")
+        camera1 = gr.Gallery(label="Camera Maps", columns=1, rows=1)
+        camera2 = gr.Gallery(label="Camera Maps", columns=1, rows=1)
         with gr.Accordion("Advanced options", open=False):
             und_seed_input = gr.Number(label="Seed", precision=0, value=42)
     understanding_button.click(
         camera_understanding,
         inputs=[image_input, und_seed_input],
+        outputs=[understanding_output, camera1, camera2]
     )
 demo.launch(share=True)