Spaces:
Running
Running
| import os | |
| import io | |
| import tempfile | |
| import numpy as np | |
| from PIL import Image | |
| import gradio as gr | |
| import torch | |
| from transformers import GLPNForDepthEstimation, GLPNImageProcessor | |
| import open3d as o3d | |
| # ------------------------------ | |
| # Model setup (loaded once) | |
| # ------------------------------ | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu") | |
| MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE) | |
| MODEL.eval() | |
| # ------------------------------ | |
| # Utilities | |
| # ------------------------------ | |
| def _resize_to_mult32(img: Image.Image, max_h=480): | |
| """Resize keeping aspect, cap height to max_h, and make both dims multiple of 32.""" | |
| new_h = min(max_h, img.height) | |
| new_h -= new_h % 32 | |
| new_w = int(new_h * img.width / img.height) | |
| diff = new_w % 32 | |
| new_w = new_w - diff if diff < 16 else new_w + (32 - diff) | |
| return img.resize((new_w, new_h), Image.BICUBIC) | |
| def predict_depth(image_pil: Image.Image): | |
| """Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy).""" | |
| img = _resize_to_mult32(image_pil.convert("RGB")) | |
| inputs = FE(images=img, return_tensors="pt").to(DEVICE) | |
| with torch.no_grad(): | |
| outputs = MODEL(**inputs) | |
| pred = outputs.predicted_depth # (1, 1, H, W) | |
| # remove padding GLPN expects around borders (pad=16) | |
| pad = 16 | |
| depth = pred.squeeze().float().cpu().numpy() * 1000.0 # scale for nicer contrast | |
| depth = depth[pad:-pad, pad:-pad] | |
| rgb = img.crop((pad, pad, img.width - pad, img.height - pad)) | |
| return rgb, depth | |
| def depth_to_colormap(depth: np.ndarray): | |
| """Return a PIL image (plasma colormap) from depth for preview.""" | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import matplotlib.cm as cm | |
| d = depth.copy() | |
| d -= d.min() | |
| if d.max() > 0: | |
| d /= d.max() | |
| d8 = (d * 255).astype(np.uint8) | |
| colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8) | |
| return Image.fromarray(colored) | |
| def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray): | |
| """Create an Open3D point cloud from RGB + relative depth.""" | |
| # Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience | |
| d = depth.copy() | |
| d -= d.min() | |
| if d.max() > 0: | |
| d /= d.max() | |
| depth_u8 = (d * 255).astype(np.uint8) | |
| rgb_np = np.array(rgb_pil) # H, W, 3 (uint8) | |
| depth_o3d = o3d.geometry.Image(depth_u8) | |
| color_o3d = o3d.geometry.Image(rgb_np) | |
| rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth( | |
| color_o3d, depth_o3d, convert_rgb_to_intensity=False | |
| ) | |
| h, w = rgb_np.shape[:2] | |
| intr = o3d.camera.PinholeCameraIntrinsic() | |
| intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0) | |
| pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr) | |
| # Clean & orient normals | |
| if len(pcd.points) > 0: | |
| _, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0) | |
| pcd = pcd.select_by_index(ind) | |
| if len(pcd.points) > 0: | |
| pcd.estimate_normals() | |
| pcd.orient_normals_to_align_with_direction() | |
| return pcd | |
| def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10): | |
| if len(pcd.points) == 0: | |
| return None | |
| mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson( | |
| pcd, depth=depth, n_threads=1 | |
| ) | |
| # Rotate 180Β° around x for typical camera convention | |
| R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0)) | |
| mesh.rotate(R, center=(0, 0, 0)) | |
| mesh.compute_vertex_normals() | |
| return mesh | |
| def save_o3d(obj, path): | |
| ext = os.path.splitext(path)[1].lower() | |
| if isinstance(obj, o3d.geometry.PointCloud): | |
| if ext == ".ply": | |
| o3d.io.write_point_cloud(path, obj) | |
| else: | |
| raise ValueError("Point cloud: please save as .ply") | |
| elif isinstance(obj, o3d.geometry.TriangleMesh): | |
| if ext in {".obj", ".ply"}: | |
| o3d.io.write_triangle_mesh(path, obj) | |
| else: | |
| raise ValueError("Mesh: use .obj or .ply") | |
| else: | |
| raise ValueError("Unsupported type for saving") | |
| def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480): | |
| """ | |
| Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa), | |
| we return None and rely on the Model3D viewer + downloads. | |
| """ | |
| try: | |
| from open3d.visualization import rendering | |
| # Ensure it has some color | |
| if not mesh.has_vertex_colors(): | |
| mesh.paint_uniform_color([0.8, 0.8, 0.85]) | |
| renderer = rendering.OffscreenRenderer(width, height) | |
| mat = rendering.MaterialRecord() | |
| mat.shader = "defaultLit" | |
| scene = renderer.scene | |
| scene.set_background([1, 1, 1, 1]) | |
| scene.add_geometry("mesh", mesh, mat) | |
| bbox = mesh.get_axis_aligned_bounding_box() | |
| center = bbox.get_center() | |
| extent = bbox.get_extent() | |
| radius = np.linalg.norm(extent) * 0.8 + 1e-6 | |
| cam = scene.camera | |
| cam.look_at(center, center + [0, 0, radius], [0, 1, 0]) | |
| img_o3d = renderer.render_to_image() | |
| img = np.asarray(img_o3d) | |
| return Image.fromarray(img) | |
| except Exception: | |
| return None | |
| # ------------------------------ | |
| # Gradio pipeline | |
| # ------------------------------ | |
| def run_pipeline(image: Image.Image, poisson_depth: int = 10): | |
| """ | |
| Main function wired to Gradio: | |
| returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path) | |
| """ | |
| if image is None: | |
| return None, None, None, None | |
| # 1) depth | |
| rgb, depth = predict_depth(image) | |
| depth_vis = depth_to_colormap(depth) | |
| # 2) point cloud | |
| pcd = rgbd_to_pointcloud(rgb, depth) | |
| if len(pcd.points) == 0: | |
| return depth_vis, None, None, None | |
| # 3) mesh | |
| mesh = pointcloud_to_mesh(pcd, depth=poisson_depth) | |
| if mesh is None: | |
| # At least return PCD | |
| with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd: | |
| save_o3d(pcd, fpcd.name) | |
| pcd_path = fpcd.name | |
| return depth_vis, None, pcd_path, None | |
| # 4) save artifacts | |
| with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd: | |
| save_o3d(pcd, fpcd.name) | |
| pcd_path = fpcd.name | |
| # Save mesh in OBJ (works with Gradio Model3D) | |
| with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh: | |
| save_o3d(mesh, fmesh.name) | |
| mesh_obj_path = fmesh.name | |
| # 5) mesh preview (best effort) | |
| preview = render_mesh_image(mesh, 768, 512) | |
| return depth_vis, preview, pcd_path, mesh_obj_path | |
| # ------------------------------ | |
| # Interface | |
| # ------------------------------ | |
| TITLE = "Monocular Depth β Point Cloud β Poisson Mesh (GLPN + Open3D)" | |
| DESC = """ | |
| Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct | |
| a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available), | |
| and downloads for .ply (point cloud) and .obj (mesh). | |
| **Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes. | |
| """ | |
| with gr.Blocks(title="2D β 3D Reconstruction") as demo: | |
| gr.Markdown(f"# {TITLE}") | |
| gr.Markdown(DESC) | |
| with gr.Row(): | |
| with gr.Column(): | |
| in_img = gr.Image( | |
| type="pil", | |
| sources=["upload", "clipboard"], | |
| label="Input Image", | |
| image_mode="RGB" | |
| ) | |
| poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)") | |
| run_btn = gr.Button("Reconstruct 3D", variant="primary") | |
| with gr.Column(): | |
| depth_out = gr.Image(label="Depth Map (colormap)") | |
| mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True) | |
| with gr.Row(): | |
| pcd_file = gr.File(label="Download Point Cloud (.ply)") | |
| mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)") | |
| mesh_obj_file = gr.File(label="Download Mesh (.obj)") | |
| run_btn.click( | |
| fn=run_pipeline, | |
| inputs=[in_img, poisson_depth], | |
| outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view] | |
| ) | |
| # Also expose mesh file separately (same path as viewer output) | |
| mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file) | |
| if __name__ == "__main__": | |
| # share=True creates a public link (useful on Spaces/Colab/local) | |
| demo.launch(share=True) | |