Spaces:

Tohru127
/

codex-model

Sleeping

App Files Files Community

Tohru127 commited on Sep 29

Commit

d65081d

verified ·

1 Parent(s): 2f2fc7c

Update app.py

Browse files

Files changed (1) hide show

app.py +240 -242

app.py CHANGED Viewed

@@ -1,250 +1,248 @@
-# app.py
-from __future__ import annotations
-import datetime as dt
-import io
 import os
-import shutil
-import subprocess
-import textwrap
-import uuid
-from pathlib import Path
-from typing import List, Optional, Tuple
 import gradio as gr
-from PIL import Image
-# Optional: Open3D for meshing
-try:
-    import open3d as o3d
-except Exception:
-    o3d = None  # We’ll still run COLMAP and return the fused point cloud if meshing libs aren’t present.
-# Be gentle with HF CPU boxes that choke on many threads
-os.environ.setdefault("OMP_NUM_THREADS", "4")
-def _run(cmd: List[str], cwd: Optional[Path] = None, env: Optional[dict] = None) -> Tuple[int, str]:
-    """Run a subprocess and capture merged stdout/stderr as text."""
-    p = subprocess.run(
-        cmd,
-        cwd=str(cwd) if cwd else None,
-        env=env,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
     )
-    return p.returncode, p.stdout
-def _ensure_tool(tool: str) -> bool:
-    return shutil.which(tool) is not None
-def _save_images(files: List[gr.File], out_dir: Path, max_px: int) -> None:
-    out_dir.mkdir(parents=True, exist_ok=True)
-    for f in files:
-        # gr.File returns a path str in .name on Spaces
-        src = Path(f.name)
-        with Image.open(src) as im:
-            im = im.convert("RGB")
-            w, h = im.size
-            scale = min(max_px / max(w, h), 1.0)
-            if scale < 1.0:
-                im = im.resize((int(w * scale), int(h * scale)))
-            im.save(out_dir / src.name, quality=92)
-def run_pipeline(
-    files: List[gr.File],
-    max_px: int,
-    match_mode: str,
-    use_gpu_sift: bool,
-    voxel: float,
-    depth: int,
-    tris: int,
-):
-    logs: List[str] = []
-    try:
-        if not files:
-            return None, [], "Please upload 3–30 images.", gr.update(visible=False)
-        if not _ensure_tool("colmap"):
-            return None, [], "COLMAP not found. Make sure `packages.txt` contains `colmap`.", gr.update(visible=False)
-        run_id = dt.datetime.now().strftime("run_%Y%m%d_%H%M%S_") + uuid.uuid4().hex[:8]
-        run_dir = Path("runs") / run_id
-        imgs_dir = run_dir / "images"
-        db = run_dir / "db.db"
-        sparse_dir = run_dir / "sparse"
-        dense_dir = run_dir / "dense"
-        run_dir.mkdir(parents=True, exist_ok=True)
-        logs.append(f"Workspace: {run_dir}")
-        _save_images(files, imgs_dir, max_px)
-        img_count = len(list(imgs_dir.glob("*")))
-        logs.append(f"Ingested {img_count} image(s). Max side capped at {max_px}px")
-        # 1) Features
-        feat_cmd = [
-            "colmap", "feature_extractor",
-            "--database_path", str(db),
-            "--image_path", str(imgs_dir),
-            "--ImageReader.single_camera", "1",
-            "--SiftExtraction.use_gpu", "1" if (use_gpu_sift and _ensure_tool("nvidia-smi")) else "0",
-        ]
-        code, out = _run(feat_cmd, cwd=run_dir)
-        logs.append("[feature_extractor]\n" + out)
-        if code != 0:
-            raise RuntimeError("COLMAP feature extraction failed.")
-        # 2) Matching
-        if match_mode == "sequential":
-            match_cmd = ["colmap", "sequential_matcher", "--database_path", str(db)]
-        elif match_mode == "exhaustive":
-            match_cmd = ["colmap", "exhaustive_matcher", "--database_path", str(db)]
         else:
-            # Spatial matcher needs priors; default to exhaustive if none
-            match_cmd = ["colmap", "exhaustive_matcher", "--database_path", str(db)]
-        code, out = _run(match_cmd, cwd=run_dir)
-        logs.append(f"[{match_mode}_matcher]\n" + out)
-        if code != 0:
-            raise RuntimeError("COLMAP matching failed.")
-        # 3) Sparse reconstruction
-        sparse_dir.mkdir(exist_ok=True)
-        code, out = _run(
-            ["colmap", "mapper", "--database_path", str(db), "--image_path", str(imgs_dir), "--output_path", str(sparse_dir)],
-            cwd=run_dir,
-        )
-        logs.append("[mapper]\n" + out)
-        if code != 0 or not any((sparse_dir).glob("*/cameras.txt")):
-            raise RuntimeError("COLMAP mapper failed or produced no model.")
-        model_dirs = sorted(sparse_dir.glob("*"))
-        model_dir = model_dirs[0]
-        # 4) Undistort & dense
-        code, out = _run(
-            ["colmap", "image_undistorter", "--image_path", str(imgs_dir), "--input_path", str(model_dir), "--output_path", str(dense_dir), "--output_type", "COLMAP"],
-            cwd=run_dir,
-        )
-        logs.append("[image_undistorter]\n" + out)
-        if code != 0:
-            raise RuntimeError("Undistortion failed.")
-        code, out = _run(
-            ["colmap", "patch_match_stereo", "--workspace_path", str(dense_dir), "--workspace_format", "COLMAP", "--PatchMatchStereo.geom_consistency", "true"],
-            cwd=run_dir,
-        )
-        logs.append("[patch_match_stereo]\n" + out)
-        if code != 0:
-            raise RuntimeError("PatchMatch failed.")
-        fused = run_dir / "fused.ply"
-        code, out = _run(
-            ["colmap", "stereo_fusion", "--workspace_path", str(dense_dir), "--workspace_format", "COLMAP", "--input_type", "geometric", "--output_path", str(fused)],
-            cwd=run_dir,
-        )
-        logs.append("[stereo_fusion]\n" + out)
-        if code != 0 or not fused.exists():
-            raise RuntimeError("Fusion failed.")
-        # 5) Meshing (Open3D). If not available, just return fused point cloud.
-        mesh_paths = []
-        preview_path = fused  # default to point cloud preview
-        if o3d is not None:
-            pcd = o3d.io.read_point_cloud(str(fused))
-            if voxel and voxel > 0:
-                pcd = pcd.voxel_down_sample(voxel)
-            pcd.estimate_normals(o3d.geometry.KDTreeSearchParamKNN(knn=20))
-            # Poisson surface reconstruction
-            mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=depth)
-            mesh.remove_degenerate_triangles()
-            mesh.remove_duplicated_triangles()
-            mesh.remove_duplicated_vertices()
-            mesh.remove_non_manifold_edges()
-            if tris and tris > 0:
-                mesh = mesh.simplify_quadric_decimation(tris)
-            mesh.compute_vertex_normals()
-            mesh_ply = run_dir / "mesh.ply"
-            mesh_obj = run_dir / "mesh.obj"
-            o3d.io.write_triangle_mesh(str(mesh_ply), mesh)
-            o3d.io.write_triangle_mesh(str(mesh_obj), mesh)
-            mesh_paths = [mesh_ply, mesh_obj]
-            preview_path = mesh_ply
-        files_out = [preview_path] + mesh_paths
-        file_list = [str(p) for p in files_out if Path(p).exists()]
-        return str(preview_path), file_list, "\n".join(logs[-80:]), gr.update(visible=True)
-    except Exception as e:
-        logs.append("\n[ERROR]\n" + textwrap.fill(str(e), width=100))
-        return None, [], "\n".join(logs[-120:]), gr.update(visible=False)
-def build_ui():
-    with gr.Blocks(title="Sparse Multi-View 3D (Urban Planning)", theme=gr.themes.Soft()) as demo:
-        gr.Markdown(
-            """
-            # 🗺️ Sparse Multi-View 3D for Urban Planning
-            Upload **3–30 photos** of a scene (streetscape, plaza, façade). We estimate camera poses with **COLMAP**,
-            build a **dense point cloud**, and (optionally) **mesh** it with Open3D.
-            **Tips for sparse captures:** overlap ~60–70%, vary viewpoint (walk an arc), avoid moving cars/people.
-            """
-        )
-        with gr.Row():
-            with gr.Column(scale=2):
-                images = gr.File(label="Upload images (JPG/PNG)", file_types=["image"], file_count="multiple")
-                gallery = gr.Gallery(label="Preview", columns=6, height=160)
-                def _show_gallery(files: List[gr.File]):
-                    rows = []
-                    for f in files or []:
-                        try:
-                            with Image.open(f.name) as im:
-                                rows.append((Path(f.name).name, im.convert("RGB")))
-                        except Exception:
-                            pass
-                    return rows
-                images.change(_show_gallery, inputs=images, outputs=gallery)
-                with gr.Accordion("Reconstruction settings", open=False):
-                    max_px = gr.Slider(1024, 4096, value=2400, step=64, label="Max image size (px, longest side)")
-                    match_mode = gr.Radio(["exhaustive", "sequential", "spatial"], value="sequential", label="Matching mode")
-                    use_gpu_sift = gr.Checkbox(True, label="Use GPU SIFT if available")
-                with gr.Accordion("Meshing", open=True):
-                    voxel = gr.Slider(0.0, 0.05, value=0.01, step=0.005, label="Voxel downsample (m, approx units)")
-                    depth = gr.Slider(6, 12, value=9, step=1, label="Poisson depth (higher → more detail)")
-                    tris = gr.Slider(0, 500_000, value=150_000, step=10_000, label="Target triangles (0 = keep)")
-                run = gr.Button("▶ Reconstruct 3D", variant="primary")
-            with gr.Column(scale=1):
-                preview = gr.Model3D(label="Preview (PLY/OBJ)", visible=False)
-                outputs = gr.Files(label="Downloads")
-                logs = gr.Markdown("Logs will appear here…")
-        run.click(
-            run_pipeline,
-            inputs=[images, max_px, match_mode, use_gpu_sift, voxel, depth, tris],
-            outputs=[preview, outputs, logs, preview],
-            queue=True,
-        )
-        gr.Markdown(
-            """
-            ### Notes & Scaling
-            - Results are in **arbitrary units** (SfM scale). For metric scale, align in GIS/CAD with known distances.
-            - Outdoor scenes with repetitive textures (glass/trees) can be challenging—add more oblique views if possible.
-            """
-        )
-    return demo
 if __name__ == "__main__":
-    build_ui().launch()

 import os
+import io
+import tempfile
+import numpy as np
+from PIL import Image
 import gradio as gr
+import torch
+from transformers import GLPNForDepthEstimation, GLPNImageProcessor
+import open3d as o3d
+# ------------------------------
+# Model setup (loaded once)
+# ------------------------------
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
+MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
+MODEL.eval()
+# ------------------------------
+# Utilities
+# ------------------------------
+def _resize_to_mult32(img: Image.Image, max_h=480):
+    """Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
+    new_h = min(max_h, img.height)
+    new_h -= new_h % 32
+    new_w = int(new_h * img.width / img.height)
+    diff = new_w % 32
+    new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
+    return img.resize((new_w, new_h), Image.BICUBIC)
+def predict_depth(image_pil: Image.Image):
+    """Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
+    img = _resize_to_mult32(image_pil.convert("RGB"))
+    inputs = FE(images=img, return_tensors="pt").to(DEVICE)
+    with torch.no_grad():
+        outputs = MODEL(**inputs)
+        pred = outputs.predicted_depth  # (1, 1, H, W)
+    # remove padding GLPN expects around borders (pad=16)
+    pad = 16
+    depth = pred.squeeze().float().cpu().numpy() * 1000.0  # scale for nicer contrast
+    depth = depth[pad:-pad, pad:-pad]
+    rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
+    return rgb, depth
+def depth_to_colormap(depth: np.ndarray):
+    """Return a PIL image (plasma colormap) from depth for preview."""
+    import matplotlib
+    matplotlib.use("Agg")
+    import matplotlib.pyplot as plt
+    d = depth.copy()
+    d -= d.min()
+    if d.max() > 0:
+        d /= d.max()
+    d8 = (d * 255).astype(np.uint8)
+    # Make a small PNG buffer
+    import matplotlib.cm as cm
+    cmap = cm.get_cmap("plasma")
+    colored = (cmap(d8)[:, :, :3] * 255).astype(np.uint8)
+    return Image.fromarray(colored)
+def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
+    """Create an Open3D point cloud from RGB + relative depth."""
+    # Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
+    d = depth.copy()
+    d -= d.min()
+    if d.max() > 0:
+        d /= d.max()
+    depth_u8 = (d * 255).astype(np.uint8)
+    rgb_np = np.array(rgb_pil)  # H, W, 3 (uint8)
+    depth_o3d = o3d.geometry.Image(depth_u8)
+    color_o3d = o3d.geometry.Image(rgb_np)
+    rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
+        color_o3d, depth_o3d, convert_rgb_to_intensity=False
+    )
+    h, w = rgb_np.shape[:2]
+    intr = o3d.camera.PinholeCameraIntrinsic()
+    intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)
+    pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
+    # Clean & orient normals
+    if len(pcd.points) > 0:
+        _, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
+        pcd = pcd.select_by_index(ind)
+        if len(pcd.points) > 0:
+            pcd.estimate_normals()
+            pcd.orient_normals_to_align_with_direction()
+    return pcd
+def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
+    if len(pcd.points) == 0:
+        return None
+    mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
+        pcd, depth=depth, n_threads=1
     )
+    # Rotate 180° around x for typical camera convention
+    R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
+    mesh.rotate(R, center=(0, 0, 0))
+    mesh.compute_vertex_normals()
+    return mesh
+def save_o3d(obj, path):
+    ext = os.path.splitext(path)[1].lower()
+    if isinstance(obj, o3d.geometry.PointCloud):
+        if ext == ".ply":
+            o3d.io.write_point_cloud(path, obj)
+        else:
+            raise ValueError("Point cloud: please save as .ply")
+    elif isinstance(obj, o3d.geometry.TriangleMesh):
+        if ext == ".obj":
+            o3d.io.write_triangle_mesh(path, obj)
+        elif ext == ".ply":
+            o3d.io.write_triangle_mesh(path, obj)
         else:
+            raise ValueError("Mesh: use .obj or .ply")
+    else:
+        raise ValueError("Unsupported type for saving")
+def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
+    """
+    Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
+    we return None and rely on the Model3D viewer + downloads.
+    """
+    try:
+        from open3d.visualization import rendering
+        # Make sure mesh has vertex colors or a default material
+        if not mesh.has_vertex_colors():
+            mesh.paint_uniform_color([0.8, 0.8, 0.85])
+        renderer = rendering.OffscreenRenderer(width, height)
+        mat = rendering.MaterialRecord()
+        mat.shader = "defaultLit"
+        scene = renderer.scene
+        scene.set_background([1, 1, 1, 1])
+        scene.add_geometry("mesh", mesh, mat)
+        bbox = mesh.get_axis_aligned_bounding_box()
+        center = bbox.get_center()
+        extent = bbox.get_extent()
+        radius = np.linalg.norm(extent) * 0.8 + 1e-6
+        # Camera looking at center from +z
+        cam = scene.camera
+        cam.look_at(center, center + [0, 0, radius], [0, 1, 0])
+        img_o3d = renderer.render_to_image()
+        img = np.asarray(img_o3d)
+        return Image.fromarray(img)
+    except Exception:
+        return None
+# ------------------------------
+# Gradio pipeline
+# ------------------------------
+def run_pipeline(image: Image.Image, poisson_depth: int = 10):
+    """
+    Main function wired to Gradio:
+    returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
+    """
+    if image is None:
+        return None, None, None, None
+    # 1) depth
+    rgb, depth = predict_depth(image)
+    depth_vis = depth_to_colormap(depth)
+    # 2) point cloud
+    pcd = rgbd_to_pointcloud(rgb, depth)
+    if len(pcd.points) == 0:
+        return depth_vis, None, None, None
+    # 3) mesh
+    mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
+    if mesh is None:
+        # At least return PCD
+        with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
+            save_o3d(pcd, fpcd.name)
+            pcd_path = fpcd.name
+        return depth_vis, None, pcd_path, None
+    # 4) save artifacts
+    with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
+        save_o3d(pcd, fpcd.name)
+        pcd_path = fpcd.name
+    # Save mesh in OBJ (works with Gradio Model3D)
+    with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
+        save_o3d(mesh, fmesh.name)
+        mesh_obj_path = fmesh.name
+    # 5) mesh preview (best effort)
+    preview = render_mesh_image(mesh, 768, 512)
+    return depth_vis, preview, pcd_path, mesh_obj_path
+# ------------------------------
+# Interface
+# ------------------------------
+TITLE = "Monocular Depth → Point Cloud → Poisson Mesh (GLPN + Open3D)"
+DESC = """
+Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
+a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
+and downloads for .ply (point cloud) and .obj (mesh).
+**Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes.
+"""
+with gr.Blocks(title="2D → 3D Reconstruction") as demo:
+    gr.Markdown(f"# {TITLE}")
+    gr.Markdown(DESC)
+    with gr.Row():
+        with gr.Column():
+            in_img = gr.Image(type="pil", label="Input Image")
+            poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
+            run_btn = gr.Button("Reconstruct 3D", variant="primary")
+        with gr.Column():
+            depth_out = gr.Image(label="Depth Map (colormap)")
+            mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)
+    with gr.Row():
+        pcd_file = gr.File(label="Download Point Cloud (.ply)")
+        mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
+        mesh_obj_file = gr.File(label="Download Mesh (.obj)")
+    run_btn.click(
+        fn=run_pipeline,
+        inputs=[in_img, poisson_depth],
+        outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
+    )
+    # Also expose mesh file separately (same path as viewer output)
+    mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)
 if __name__ == "__main__":
+    demo.launch()