File size: 8,512 Bytes
9891441
d65081d
 
 
 
9891441
f8d6272
d65081d
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
dc673c1
d65081d
 
dc673c1
d65081d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8d6272
d65081d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
9891441
d65081d
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
dc673c1
d65081d
9891441
d65081d
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dc673c1
d65081d
 
 
 
 
 
dc673c1
 
d65081d
 
 
 
 
 
 
 
 
dc673c1
 
 
 
 
 
d65081d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6cd3957
 
dc673c1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import os
import io
import tempfile
import numpy as np
from PIL import Image

import gradio as gr
import torch
from transformers import GLPNForDepthEstimation, GLPNImageProcessor

import open3d as o3d


# ------------------------------
# Model setup (loaded once)
# ------------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
MODEL.eval()


# ------------------------------
# Utilities
# ------------------------------
def _resize_to_mult32(img: Image.Image, max_h=480):
    """Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
    new_h = min(max_h, img.height)
    new_h -= new_h % 32
    new_w = int(new_h * img.width / img.height)
    diff = new_w % 32
    new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
    return img.resize((new_w, new_h), Image.BICUBIC)


def predict_depth(image_pil: Image.Image):
    """Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
    img = _resize_to_mult32(image_pil.convert("RGB"))
    inputs = FE(images=img, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        outputs = MODEL(**inputs)
        pred = outputs.predicted_depth  # (1, 1, H, W)

    # remove padding GLPN expects around borders (pad=16)
    pad = 16
    depth = pred.squeeze().float().cpu().numpy() * 1000.0  # scale for nicer contrast
    depth = depth[pad:-pad, pad:-pad]

    rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
    return rgb, depth


def depth_to_colormap(depth: np.ndarray):
    """Return a PIL image (plasma colormap) from depth for preview."""
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    import matplotlib.cm as cm

    d = depth.copy()
    d -= d.min()
    if d.max() > 0:
        d /= d.max()
    d8 = (d * 255).astype(np.uint8)

    colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8)
    return Image.fromarray(colored)


def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
    """Create an Open3D point cloud from RGB + relative depth."""
    # Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
    d = depth.copy()
    d -= d.min()
    if d.max() > 0:
        d /= d.max()
    depth_u8 = (d * 255).astype(np.uint8)

    rgb_np = np.array(rgb_pil)  # H, W, 3 (uint8)

    depth_o3d = o3d.geometry.Image(depth_u8)
    color_o3d = o3d.geometry.Image(rgb_np)

    rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
        color_o3d, depth_o3d, convert_rgb_to_intensity=False
    )

    h, w = rgb_np.shape[:2]
    intr = o3d.camera.PinholeCameraIntrinsic()
    intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)

    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)

    # Clean & orient normals
    if len(pcd.points) > 0:
        _, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
        pcd = pcd.select_by_index(ind)
        if len(pcd.points) > 0:
            pcd.estimate_normals()
            pcd.orient_normals_to_align_with_direction()
    return pcd


def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
    if len(pcd.points) == 0:
        return None
    mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
        pcd, depth=depth, n_threads=1
    )
    # Rotate 180Β° around x for typical camera convention
    R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
    mesh.rotate(R, center=(0, 0, 0))
    mesh.compute_vertex_normals()
    return mesh


def save_o3d(obj, path):
    ext = os.path.splitext(path)[1].lower()
    if isinstance(obj, o3d.geometry.PointCloud):
        if ext == ".ply":
            o3d.io.write_point_cloud(path, obj)
        else:
            raise ValueError("Point cloud: please save as .ply")
    elif isinstance(obj, o3d.geometry.TriangleMesh):
        if ext in {".obj", ".ply"}:
            o3d.io.write_triangle_mesh(path, obj)
        else:
            raise ValueError("Mesh: use .obj or .ply")
    else:
        raise ValueError("Unsupported type for saving")


def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
    """
    Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
    we return None and rely on the Model3D viewer + downloads.
    """
    try:
        from open3d.visualization import rendering

        # Ensure it has some color
        if not mesh.has_vertex_colors():
            mesh.paint_uniform_color([0.8, 0.8, 0.85])

        renderer = rendering.OffscreenRenderer(width, height)
        mat = rendering.MaterialRecord()
        mat.shader = "defaultLit"

        scene = renderer.scene
        scene.set_background([1, 1, 1, 1])
        scene.add_geometry("mesh", mesh, mat)

        bbox = mesh.get_axis_aligned_bounding_box()
        center = bbox.get_center()
        extent = bbox.get_extent()
        radius = np.linalg.norm(extent) * 0.8 + 1e-6

        cam = scene.camera
        cam.look_at(center, center + [0, 0, radius], [0, 1, 0])

        img_o3d = renderer.render_to_image()
        img = np.asarray(img_o3d)
        return Image.fromarray(img)
    except Exception:
        return None


# ------------------------------
# Gradio pipeline
# ------------------------------
def run_pipeline(image: Image.Image, poisson_depth: int = 10):
    """
    Main function wired to Gradio:
    returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
    """
    if image is None:
        return None, None, None, None

    # 1) depth
    rgb, depth = predict_depth(image)
    depth_vis = depth_to_colormap(depth)

    # 2) point cloud
    pcd = rgbd_to_pointcloud(rgb, depth)
    if len(pcd.points) == 0:
        return depth_vis, None, None, None

    # 3) mesh
    mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
    if mesh is None:
        # At least return PCD
        with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
            save_o3d(pcd, fpcd.name)
            pcd_path = fpcd.name
        return depth_vis, None, pcd_path, None

    # 4) save artifacts
    with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
        save_o3d(pcd, fpcd.name)
        pcd_path = fpcd.name

    # Save mesh in OBJ (works with Gradio Model3D)
    with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
        save_o3d(mesh, fmesh.name)
        mesh_obj_path = fmesh.name

    # 5) mesh preview (best effort)
    preview = render_mesh_image(mesh, 768, 512)

    return depth_vis, preview, pcd_path, mesh_obj_path


# ------------------------------
# Interface
# ------------------------------
TITLE = "Monocular Depth β†’ Point Cloud β†’ Poisson Mesh (GLPN + Open3D)"
DESC = """
Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
and downloads for .ply (point cloud) and .obj (mesh).
**Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes.
"""

with gr.Blocks(title="2D β†’ 3D Reconstruction") as demo:
    gr.Markdown(f"# {TITLE}")
    gr.Markdown(DESC)

    with gr.Row():
        with gr.Column():
            in_img = gr.Image(
                type="pil",
                sources=["upload", "clipboard"],
                label="Input Image",
                image_mode="RGB"
            )
            poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
            run_btn = gr.Button("Reconstruct 3D", variant="primary")

        with gr.Column():
            depth_out = gr.Image(label="Depth Map (colormap)")
            mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)

    with gr.Row():
        pcd_file = gr.File(label="Download Point Cloud (.ply)")
        mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
        mesh_obj_file = gr.File(label="Download Mesh (.obj)")

    run_btn.click(
        fn=run_pipeline,
        inputs=[in_img, poisson_depth],
        outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
    )
    # Also expose mesh file separately (same path as viewer output)
    mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)

if __name__ == "__main__":
    # share=True creates a public link (useful on Spaces/Colab/local)
    demo.launch(share=True)