Spaces:
Running
Running
File size: 8,512 Bytes
9891441 d65081d 9891441 f8d6272 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d f8d6272 d65081d dc673c1 d65081d 9891441 d65081d dc673c1 d65081d dc673c1 d65081d 9891441 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d dc673c1 d65081d 6cd3957 dc673c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
import os
import io
import tempfile
import numpy as np
from PIL import Image
import gradio as gr
import torch
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
import open3d as o3d
# ------------------------------
# Model setup (loaded once)
# ------------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
MODEL.eval()
# ------------------------------
# Utilities
# ------------------------------
def _resize_to_mult32(img: Image.Image, max_h=480):
"""Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
new_h = min(max_h, img.height)
new_h -= new_h % 32
new_w = int(new_h * img.width / img.height)
diff = new_w % 32
new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
return img.resize((new_w, new_h), Image.BICUBIC)
def predict_depth(image_pil: Image.Image):
"""Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
img = _resize_to_mult32(image_pil.convert("RGB"))
inputs = FE(images=img, return_tensors="pt").to(DEVICE)
with torch.no_grad():
outputs = MODEL(**inputs)
pred = outputs.predicted_depth # (1, 1, H, W)
# remove padding GLPN expects around borders (pad=16)
pad = 16
depth = pred.squeeze().float().cpu().numpy() * 1000.0 # scale for nicer contrast
depth = depth[pad:-pad, pad:-pad]
rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
return rgb, depth
def depth_to_colormap(depth: np.ndarray):
"""Return a PIL image (plasma colormap) from depth for preview."""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.cm as cm
d = depth.copy()
d -= d.min()
if d.max() > 0:
d /= d.max()
d8 = (d * 255).astype(np.uint8)
colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8)
return Image.fromarray(colored)
def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
"""Create an Open3D point cloud from RGB + relative depth."""
# Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
d = depth.copy()
d -= d.min()
if d.max() > 0:
d /= d.max()
depth_u8 = (d * 255).astype(np.uint8)
rgb_np = np.array(rgb_pil) # H, W, 3 (uint8)
depth_o3d = o3d.geometry.Image(depth_u8)
color_o3d = o3d.geometry.Image(rgb_np)
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
color_o3d, depth_o3d, convert_rgb_to_intensity=False
)
h, w = rgb_np.shape[:2]
intr = o3d.camera.PinholeCameraIntrinsic()
intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
# Clean & orient normals
if len(pcd.points) > 0:
_, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
pcd = pcd.select_by_index(ind)
if len(pcd.points) > 0:
pcd.estimate_normals()
pcd.orient_normals_to_align_with_direction()
return pcd
def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
if len(pcd.points) == 0:
return None
mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=depth, n_threads=1
)
# Rotate 180Β° around x for typical camera convention
R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
mesh.rotate(R, center=(0, 0, 0))
mesh.compute_vertex_normals()
return mesh
def save_o3d(obj, path):
ext = os.path.splitext(path)[1].lower()
if isinstance(obj, o3d.geometry.PointCloud):
if ext == ".ply":
o3d.io.write_point_cloud(path, obj)
else:
raise ValueError("Point cloud: please save as .ply")
elif isinstance(obj, o3d.geometry.TriangleMesh):
if ext in {".obj", ".ply"}:
o3d.io.write_triangle_mesh(path, obj)
else:
raise ValueError("Mesh: use .obj or .ply")
else:
raise ValueError("Unsupported type for saving")
def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
"""
Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
we return None and rely on the Model3D viewer + downloads.
"""
try:
from open3d.visualization import rendering
# Ensure it has some color
if not mesh.has_vertex_colors():
mesh.paint_uniform_color([0.8, 0.8, 0.85])
renderer = rendering.OffscreenRenderer(width, height)
mat = rendering.MaterialRecord()
mat.shader = "defaultLit"
scene = renderer.scene
scene.set_background([1, 1, 1, 1])
scene.add_geometry("mesh", mesh, mat)
bbox = mesh.get_axis_aligned_bounding_box()
center = bbox.get_center()
extent = bbox.get_extent()
radius = np.linalg.norm(extent) * 0.8 + 1e-6
cam = scene.camera
cam.look_at(center, center + [0, 0, radius], [0, 1, 0])
img_o3d = renderer.render_to_image()
img = np.asarray(img_o3d)
return Image.fromarray(img)
except Exception:
return None
# ------------------------------
# Gradio pipeline
# ------------------------------
def run_pipeline(image: Image.Image, poisson_depth: int = 10):
"""
Main function wired to Gradio:
returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
"""
if image is None:
return None, None, None, None
# 1) depth
rgb, depth = predict_depth(image)
depth_vis = depth_to_colormap(depth)
# 2) point cloud
pcd = rgbd_to_pointcloud(rgb, depth)
if len(pcd.points) == 0:
return depth_vis, None, None, None
# 3) mesh
mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
if mesh is None:
# At least return PCD
with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
save_o3d(pcd, fpcd.name)
pcd_path = fpcd.name
return depth_vis, None, pcd_path, None
# 4) save artifacts
with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
save_o3d(pcd, fpcd.name)
pcd_path = fpcd.name
# Save mesh in OBJ (works with Gradio Model3D)
with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
save_o3d(mesh, fmesh.name)
mesh_obj_path = fmesh.name
# 5) mesh preview (best effort)
preview = render_mesh_image(mesh, 768, 512)
return depth_vis, preview, pcd_path, mesh_obj_path
# ------------------------------
# Interface
# ------------------------------
TITLE = "Monocular Depth β Point Cloud β Poisson Mesh (GLPN + Open3D)"
DESC = """
Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
and downloads for .ply (point cloud) and .obj (mesh).
**Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes.
"""
with gr.Blocks(title="2D β 3D Reconstruction") as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESC)
with gr.Row():
with gr.Column():
in_img = gr.Image(
type="pil",
sources=["upload", "clipboard"],
label="Input Image",
image_mode="RGB"
)
poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
run_btn = gr.Button("Reconstruct 3D", variant="primary")
with gr.Column():
depth_out = gr.Image(label="Depth Map (colormap)")
mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)
with gr.Row():
pcd_file = gr.File(label="Download Point Cloud (.ply)")
mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
mesh_obj_file = gr.File(label="Download Mesh (.obj)")
run_btn.click(
fn=run_pipeline,
inputs=[in_img, poisson_depth],
outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
)
# Also expose mesh file separately (same path as viewer output)
mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)
if __name__ == "__main__":
# share=True creates a public link (useful on Spaces/Colab/local)
demo.launch(share=True)
|