Spaces:

Tohru127
/

codex-model

Running

File size: 8,512 Bytes

import os
import io
import tempfile
import numpy as np
from PIL import Image

import gradio as gr
import torch
from transformers import GLPNForDepthEstimation, GLPNImageProcessor

import open3d as o3d


# ------------------------------
# Model setup (loaded once)
# ------------------------------
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
MODEL.eval()


# ------------------------------
# Utilities
# ------------------------------
def _resize_to_mult32(img: Image.Image, max_h=480):
    """Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
    new_h = min(max_h, img.height)
    new_h -= new_h % 32
    new_w = int(new_h * img.width / img.height)
    diff = new_w % 32
    new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
    return img.resize((new_w, new_h), Image.BICUBIC)


def predict_depth(image_pil: Image.Image):
    """Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
    img = _resize_to_mult32(image_pil.convert("RGB"))
    inputs = FE(images=img, return_tensors="pt").to(DEVICE)

    with torch.no_grad():
        outputs = MODEL(**inputs)
        pred = outputs.predicted_depth  # (1, 1, H, W)

    # remove padding GLPN expects around borders (pad=16)
    pad = 16
    depth = pred.squeeze().float().cpu().numpy() * 1000.0  # scale for nicer contrast
    depth = depth[pad:-pad, pad:-pad]

    rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
    return rgb, depth


def depth_to_colormap(depth: np.ndarray):
    """Return a PIL image (plasma colormap) from depth for preview."""
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    import matplotlib.cm as cm

    d = depth.copy()
    d -= d.min()
    if d.max() > 0:
        d /= d.max()
    d8 = (d * 255).astype(np.uint8)

    colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8)
    return Image.fromarray(colored)


def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
    """Create an Open3D point cloud from RGB + relative depth."""
    # Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
    d = depth.copy()
    d -= d.min()
    if d.max() > 0:
        d /= d.max()
    depth_u8 = (d * 255).astype(np.uint8)

    rgb_np = np.array(rgb_pil)  # H, W, 3 (uint8)

    depth_o3d = o3d.geometry.Image(depth_u8)
    color_o3d = o3d.geometry.Image(rgb_np)

    rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
        color_o3d, depth_o3d, convert_rgb_to_intensity=False
    )

    h, w = rgb_np.shape[:2]
    intr = o3d.camera.PinholeCameraIntrinsic()
    intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)

    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)

    # Clean & orient normals
    if len(pcd.points) > 0:
        _, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
        pcd = pcd.select_by_index(ind)
        if len(pcd.points) > 0:
            pcd.estimate_normals()
            pcd.orient_normals_to_align_with_direction()
    return pcd


def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
    if len(pcd.points) == 0:
        return None
    mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
        pcd, depth=depth, n_threads=1
    )
    # Rotate 180° around x for typical camera convention
    R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
    mesh.rotate(R, center=(0, 0, 0))
    mesh.compute_vertex_normals()
    return mesh


def save_o3d(obj, path):
    ext = os.path.splitext(path)[1].lower()
    if isinstance(obj, o3d.geometry.PointCloud):
        if ext == ".ply":
            o3d.io.write_point_cloud(path, obj)
        else:
            raise ValueError("Point cloud: please save as .ply")
    elif isinstance(obj, o3d.geometry.TriangleMesh):
        if ext in {".obj", ".ply"}:
            o3d.io.write_triangle_mesh(path, obj)
        else:
            raise ValueError("Mesh: use .obj or .ply")
    else:
        raise ValueError("Unsupported type for saving")


def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
    """
    Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
    we return None and rely on the Model3D viewer + downloads.
    """
    try:
        from open3d.visualization import rendering

        # Ensure it has some color
        if not mesh.has_vertex_colors():
            mesh.paint_uniform_color([0.8, 0.8, 0.85])

        renderer = rendering.OffscreenRenderer(width, height)
        mat = rendering.MaterialRecord()
        mat.shader = "defaultLit"

        scene = renderer.scene
        scene.set_background([1, 1, 1, 1])
        scene.add_geometry("mesh", mesh, mat)

        bbox = mesh.get_axis_aligned_bounding_box()
        center = bbox.get_center()
        extent = bbox.get_extent()
        radius = np.linalg.norm(extent) * 0.8 + 1e-6

        cam = scene.camera
        cam.look_at(center, center + [0, 0, radius], [0, 1, 0])

        img_o3d = renderer.render_to_image()
        img = np.asarray(img_o3d)
        return Image.fromarray(img)
    except Exception:
        return None


# ------------------------------
# Gradio pipeline
# ------------------------------
def run_pipeline(image: Image.Image, poisson_depth: int = 10):
    """
    Main function wired to Gradio:
    returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
    """
    if image is None:
        return None, None, None, None

    # 1) depth
    rgb, depth = predict_depth(image)
    depth_vis = depth_to_colormap(depth)

    # 2) point cloud
    pcd = rgbd_to_pointcloud(rgb, depth)
    if len(pcd.points) == 0:
        return depth_vis, None, None, None

    # 3) mesh
    mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
    if mesh is None:
        # At least return PCD
        with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
            save_o3d(pcd, fpcd.name)
            pcd_path = fpcd.name
        return depth_vis, None, pcd_path, None

    # 4) save artifacts
    with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
        save_o3d(pcd, fpcd.name)
        pcd_path = fpcd.name

    # Save mesh in OBJ (works with Gradio Model3D)
    with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
        save_o3d(mesh, fmesh.name)
        mesh_obj_path = fmesh.name

    # 5) mesh preview (best effort)
    preview = render_mesh_image(mesh, 768, 512)

    return depth_vis, preview, pcd_path, mesh_obj_path


# ------------------------------
# Interface
# ------------------------------
TITLE = "Monocular Depth → Point Cloud → Poisson Mesh (GLPN + Open3D)"
DESC = """
Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
and downloads for .ply (point cloud) and .obj (mesh).
**Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes.
"""

with gr.Blocks(title="2D → 3D Reconstruction") as demo:
    gr.Markdown(f"# {TITLE}")
    gr.Markdown(DESC)

    with gr.Row():
        with gr.Column():
            in_img = gr.Image(
                type="pil",
                sources=["upload", "clipboard"],
                label="Input Image",
                image_mode="RGB"
            )
            poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
            run_btn = gr.Button("Reconstruct 3D", variant="primary")

        with gr.Column():
            depth_out = gr.Image(label="Depth Map (colormap)")
            mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)

    with gr.Row():
        pcd_file = gr.File(label="Download Point Cloud (.ply)")
        mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
        mesh_obj_file = gr.File(label="Download Mesh (.obj)")

    run_btn.click(
        fn=run_pipeline,
        inputs=[in_img, poisson_depth],
        outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
    )
    # Also expose mesh file separately (same path as viewer output)
    mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)

if __name__ == "__main__":
    # share=True creates a public link (useful on Spaces/Colab/local)
    demo.launch(share=True)