codex-model / app.py
Tohru127's picture
Update app.py
dc673c1 verified
import os
import io
import tempfile
import numpy as np
from PIL import Image
import gradio as gr
import torch
from transformers import GLPNForDepthEstimation, GLPNImageProcessor
import open3d as o3d
# ------------------------------
# Model setup (loaded once)
# ------------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
MODEL.eval()
# ------------------------------
# Utilities
# ------------------------------
def _resize_to_mult32(img: Image.Image, max_h=480):
"""Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
new_h = min(max_h, img.height)
new_h -= new_h % 32
new_w = int(new_h * img.width / img.height)
diff = new_w % 32
new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
return img.resize((new_w, new_h), Image.BICUBIC)
def predict_depth(image_pil: Image.Image):
"""Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
img = _resize_to_mult32(image_pil.convert("RGB"))
inputs = FE(images=img, return_tensors="pt").to(DEVICE)
with torch.no_grad():
outputs = MODEL(**inputs)
pred = outputs.predicted_depth # (1, 1, H, W)
# remove padding GLPN expects around borders (pad=16)
pad = 16
depth = pred.squeeze().float().cpu().numpy() * 1000.0 # scale for nicer contrast
depth = depth[pad:-pad, pad:-pad]
rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
return rgb, depth
def depth_to_colormap(depth: np.ndarray):
"""Return a PIL image (plasma colormap) from depth for preview."""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.cm as cm
d = depth.copy()
d -= d.min()
if d.max() > 0:
d /= d.max()
d8 = (d * 255).astype(np.uint8)
colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8)
return Image.fromarray(colored)
def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
"""Create an Open3D point cloud from RGB + relative depth."""
# Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
d = depth.copy()
d -= d.min()
if d.max() > 0:
d /= d.max()
depth_u8 = (d * 255).astype(np.uint8)
rgb_np = np.array(rgb_pil) # H, W, 3 (uint8)
depth_o3d = o3d.geometry.Image(depth_u8)
color_o3d = o3d.geometry.Image(rgb_np)
rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
color_o3d, depth_o3d, convert_rgb_to_intensity=False
)
h, w = rgb_np.shape[:2]
intr = o3d.camera.PinholeCameraIntrinsic()
intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
# Clean & orient normals
if len(pcd.points) > 0:
_, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
pcd = pcd.select_by_index(ind)
if len(pcd.points) > 0:
pcd.estimate_normals()
pcd.orient_normals_to_align_with_direction()
return pcd
def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
if len(pcd.points) == 0:
return None
mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
pcd, depth=depth, n_threads=1
)
# Rotate 180Β° around x for typical camera convention
R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
mesh.rotate(R, center=(0, 0, 0))
mesh.compute_vertex_normals()
return mesh
def save_o3d(obj, path):
ext = os.path.splitext(path)[1].lower()
if isinstance(obj, o3d.geometry.PointCloud):
if ext == ".ply":
o3d.io.write_point_cloud(path, obj)
else:
raise ValueError("Point cloud: please save as .ply")
elif isinstance(obj, o3d.geometry.TriangleMesh):
if ext in {".obj", ".ply"}:
o3d.io.write_triangle_mesh(path, obj)
else:
raise ValueError("Mesh: use .obj or .ply")
else:
raise ValueError("Unsupported type for saving")
def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
"""
Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
we return None and rely on the Model3D viewer + downloads.
"""
try:
from open3d.visualization import rendering
# Ensure it has some color
if not mesh.has_vertex_colors():
mesh.paint_uniform_color([0.8, 0.8, 0.85])
renderer = rendering.OffscreenRenderer(width, height)
mat = rendering.MaterialRecord()
mat.shader = "defaultLit"
scene = renderer.scene
scene.set_background([1, 1, 1, 1])
scene.add_geometry("mesh", mesh, mat)
bbox = mesh.get_axis_aligned_bounding_box()
center = bbox.get_center()
extent = bbox.get_extent()
radius = np.linalg.norm(extent) * 0.8 + 1e-6
cam = scene.camera
cam.look_at(center, center + [0, 0, radius], [0, 1, 0])
img_o3d = renderer.render_to_image()
img = np.asarray(img_o3d)
return Image.fromarray(img)
except Exception:
return None
# ------------------------------
# Gradio pipeline
# ------------------------------
def run_pipeline(image: Image.Image, poisson_depth: int = 10):
"""
Main function wired to Gradio:
returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
"""
if image is None:
return None, None, None, None
# 1) depth
rgb, depth = predict_depth(image)
depth_vis = depth_to_colormap(depth)
# 2) point cloud
pcd = rgbd_to_pointcloud(rgb, depth)
if len(pcd.points) == 0:
return depth_vis, None, None, None
# 3) mesh
mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
if mesh is None:
# At least return PCD
with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
save_o3d(pcd, fpcd.name)
pcd_path = fpcd.name
return depth_vis, None, pcd_path, None
# 4) save artifacts
with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
save_o3d(pcd, fpcd.name)
pcd_path = fpcd.name
# Save mesh in OBJ (works with Gradio Model3D)
with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
save_o3d(mesh, fmesh.name)
mesh_obj_path = fmesh.name
# 5) mesh preview (best effort)
preview = render_mesh_image(mesh, 768, 512)
return depth_vis, preview, pcd_path, mesh_obj_path
# ------------------------------
# Interface
# ------------------------------
TITLE = "Monocular Depth β†’ Point Cloud β†’ Poisson Mesh (GLPN + Open3D)"
DESC = """
Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
and downloads for .ply (point cloud) and .obj (mesh).
**Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes.
"""
with gr.Blocks(title="2D β†’ 3D Reconstruction") as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESC)
with gr.Row():
with gr.Column():
in_img = gr.Image(
type="pil",
sources=["upload", "clipboard"],
label="Input Image",
image_mode="RGB"
)
poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
run_btn = gr.Button("Reconstruct 3D", variant="primary")
with gr.Column():
depth_out = gr.Image(label="Depth Map (colormap)")
mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)
with gr.Row():
pcd_file = gr.File(label="Download Point Cloud (.ply)")
mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
mesh_obj_file = gr.File(label="Download Mesh (.obj)")
run_btn.click(
fn=run_pipeline,
inputs=[in_img, poisson_depth],
outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
)
# Also expose mesh file separately (same path as viewer output)
mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)
if __name__ == "__main__":
# share=True creates a public link (useful on Spaces/Colab/local)
demo.launch(share=True)