import os import io import tempfile import numpy as np from PIL import Image import gradio as gr import torch from transformers import GLPNForDepthEstimation, GLPNImageProcessor import open3d as o3d # ------------------------------ # Model setup (loaded once) # ------------------------------ DEVICE = "cuda" if torch.cuda.is_available() else "cpu" FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE) MODEL.eval() # ------------------------------ # Utilities # ------------------------------ def _resize_to_mult32(img: Image.Image, max_h=480): """Resize keeping aspect, cap height to max_h, and make both dims multiple of 32.""" new_h = min(max_h, img.height) new_h -= new_h % 32 new_w = int(new_h * img.width / img.height) diff = new_w % 32 new_w = new_w - diff if diff < 16 else new_w + (32 - diff) return img.resize((new_w, new_h), Image.BICUBIC) def predict_depth(image_pil: Image.Image): """Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy).""" img = _resize_to_mult32(image_pil.convert("RGB")) inputs = FE(images=img, return_tensors="pt").to(DEVICE) with torch.no_grad(): outputs = MODEL(**inputs) pred = outputs.predicted_depth # (1, 1, H, W) # remove padding GLPN expects around borders (pad=16) pad = 16 depth = pred.squeeze().float().cpu().numpy() * 1000.0 # scale for nicer contrast depth = depth[pad:-pad, pad:-pad] rgb = img.crop((pad, pad, img.width - pad, img.height - pad)) return rgb, depth def depth_to_colormap(depth: np.ndarray): """Return a PIL image (plasma colormap) from depth for preview.""" import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import matplotlib.cm as cm d = depth.copy() d -= d.min() if d.max() > 0: d /= d.max() d8 = (d * 255).astype(np.uint8) colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8) return Image.fromarray(colored) def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray): """Create an Open3D point cloud from RGB + relative depth.""" # Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience d = depth.copy() d -= d.min() if d.max() > 0: d /= d.max() depth_u8 = (d * 255).astype(np.uint8) rgb_np = np.array(rgb_pil) # H, W, 3 (uint8) depth_o3d = o3d.geometry.Image(depth_u8) color_o3d = o3d.geometry.Image(rgb_np) rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( color_o3d, depth_o3d, convert_rgb_to_intensity=False ) h, w = rgb_np.shape[:2] intr = o3d.camera.PinholeCameraIntrinsic() intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0) pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr) # Clean & orient normals if len(pcd.points) > 0: _, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) pcd = pcd.select_by_index(ind) if len(pcd.points) > 0: pcd.estimate_normals() pcd.orient_normals_to_align_with_direction() return pcd def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10): if len(pcd.points) == 0: return None mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( pcd, depth=depth, n_threads=1 ) # Rotate 180° around x for typical camera convention R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) mesh.rotate(R, center=(0, 0, 0)) mesh.compute_vertex_normals() return mesh def save_o3d(obj, path): ext = os.path.splitext(path)[1].lower() if isinstance(obj, o3d.geometry.PointCloud): if ext == ".ply": o3d.io.write_point_cloud(path, obj) else: raise ValueError("Point cloud: please save as .ply") elif isinstance(obj, o3d.geometry.TriangleMesh): if ext in {".obj", ".ply"}: o3d.io.write_triangle_mesh(path, obj) else: raise ValueError("Mesh: use .obj or .ply") else: raise ValueError("Unsupported type for saving") def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480): """ Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa), we return None and rely on the Model3D viewer + downloads. """ try: from open3d.visualization import rendering # Ensure it has some color if not mesh.has_vertex_colors(): mesh.paint_uniform_color([0.8, 0.8, 0.85]) renderer = rendering.OffscreenRenderer(width, height) mat = rendering.MaterialRecord() mat.shader = "defaultLit" scene = renderer.scene scene.set_background([1, 1, 1, 1]) scene.add_geometry("mesh", mesh, mat) bbox = mesh.get_axis_aligned_bounding_box() center = bbox.get_center() extent = bbox.get_extent() radius = np.linalg.norm(extent) * 0.8 + 1e-6 cam = scene.camera cam.look_at(center, center + [0, 0, radius], [0, 1, 0]) img_o3d = renderer.render_to_image() img = np.asarray(img_o3d) return Image.fromarray(img) except Exception: return None # ------------------------------ # Gradio pipeline # ------------------------------ def run_pipeline(image: Image.Image, poisson_depth: int = 10): """ Main function wired to Gradio: returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path) """ if image is None: return None, None, None, None # 1) depth rgb, depth = predict_depth(image) depth_vis = depth_to_colormap(depth) # 2) point cloud pcd = rgbd_to_pointcloud(rgb, depth) if len(pcd.points) == 0: return depth_vis, None, None, None # 3) mesh mesh = pointcloud_to_mesh(pcd, depth=poisson_depth) if mesh is None: # At least return PCD with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd: save_o3d(pcd, fpcd.name) pcd_path = fpcd.name return depth_vis, None, pcd_path, None # 4) save artifacts with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd: save_o3d(pcd, fpcd.name) pcd_path = fpcd.name # Save mesh in OBJ (works with Gradio Model3D) with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh: save_o3d(mesh, fmesh.name) mesh_obj_path = fmesh.name # 5) mesh preview (best effort) preview = render_mesh_image(mesh, 768, 512) return depth_vis, preview, pcd_path, mesh_obj_path # ------------------------------ # Interface # ------------------------------ TITLE = "Monocular Depth → Point Cloud → Poisson Mesh (GLPN + Open3D)" DESC = """ Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available), and downloads for .ply (point cloud) and .obj (mesh). **Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes. """ with gr.Blocks(title="2D → 3D Reconstruction") as demo: gr.Markdown(f"# {TITLE}") gr.Markdown(DESC) with gr.Row(): with gr.Column(): in_img = gr.Image( type="pil", sources=["upload", "clipboard"], label="Input Image", image_mode="RGB" ) poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)") run_btn = gr.Button("Reconstruct 3D", variant="primary") with gr.Column(): depth_out = gr.Image(label="Depth Map (colormap)") mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True) with gr.Row(): pcd_file = gr.File(label="Download Point Cloud (.ply)") mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)") mesh_obj_file = gr.File(label="Download Mesh (.obj)") run_btn.click( fn=run_pipeline, inputs=[in_img, poisson_depth], outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view] ) # Also expose mesh file separately (same path as viewer output) mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file) if __name__ == "__main__": # share=True creates a public link (useful on Spaces/Colab/local) demo.launch(share=True)