Tohru127 commited on
Commit
d65081d
·
verified ·
1 Parent(s): 2f2fc7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -242
app.py CHANGED
@@ -1,250 +1,248 @@
1
- # app.py
2
- from __future__ import annotations
3
-
4
- import datetime as dt
5
- import io
6
  import os
7
- import shutil
8
- import subprocess
9
- import textwrap
10
- import uuid
11
- from pathlib import Path
12
- from typing import List, Optional, Tuple
13
 
14
  import gradio as gr
15
- from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- # Optional: Open3D for meshing
18
- try:
19
- import open3d as o3d
20
- except Exception:
21
- o3d = None # We’ll still run COLMAP and return the fused point cloud if meshing libs aren’t present.
22
-
23
- # Be gentle with HF CPU boxes that choke on many threads
24
- os.environ.setdefault("OMP_NUM_THREADS", "4")
25
-
26
- def _run(cmd: List[str], cwd: Optional[Path] = None, env: Optional[dict] = None) -> Tuple[int, str]:
27
- """Run a subprocess and capture merged stdout/stderr as text."""
28
- p = subprocess.run(
29
- cmd,
30
- cwd=str(cwd) if cwd else None,
31
- env=env,
32
- stdout=subprocess.PIPE,
33
- stderr=subprocess.STDOUT,
34
- text=True,
 
 
35
  )
36
- return p.returncode, p.stdout
37
-
38
- def _ensure_tool(tool: str) -> bool:
39
- return shutil.which(tool) is not None
40
-
41
- def _save_images(files: List[gr.File], out_dir: Path, max_px: int) -> None:
42
- out_dir.mkdir(parents=True, exist_ok=True)
43
- for f in files:
44
- # gr.File returns a path str in .name on Spaces
45
- src = Path(f.name)
46
- with Image.open(src) as im:
47
- im = im.convert("RGB")
48
- w, h = im.size
49
- scale = min(max_px / max(w, h), 1.0)
50
- if scale < 1.0:
51
- im = im.resize((int(w * scale), int(h * scale)))
52
- im.save(out_dir / src.name, quality=92)
53
-
54
- def run_pipeline(
55
- files: List[gr.File],
56
- max_px: int,
57
- match_mode: str,
58
- use_gpu_sift: bool,
59
- voxel: float,
60
- depth: int,
61
- tris: int,
62
- ):
63
- logs: List[str] = []
64
- try:
65
- if not files:
66
- return None, [], "Please upload 3–30 images.", gr.update(visible=False)
67
-
68
- if not _ensure_tool("colmap"):
69
- return None, [], "COLMAP not found. Make sure `packages.txt` contains `colmap`.", gr.update(visible=False)
70
-
71
- run_id = dt.datetime.now().strftime("run_%Y%m%d_%H%M%S_") + uuid.uuid4().hex[:8]
72
- run_dir = Path("runs") / run_id
73
- imgs_dir = run_dir / "images"
74
- db = run_dir / "db.db"
75
- sparse_dir = run_dir / "sparse"
76
- dense_dir = run_dir / "dense"
77
- run_dir.mkdir(parents=True, exist_ok=True)
78
-
79
- logs.append(f"Workspace: {run_dir}")
80
- _save_images(files, imgs_dir, max_px)
81
- img_count = len(list(imgs_dir.glob("*")))
82
- logs.append(f"Ingested {img_count} image(s). Max side capped at {max_px}px")
83
-
84
- # 1) Features
85
- feat_cmd = [
86
- "colmap", "feature_extractor",
87
- "--database_path", str(db),
88
- "--image_path", str(imgs_dir),
89
- "--ImageReader.single_camera", "1",
90
- "--SiftExtraction.use_gpu", "1" if (use_gpu_sift and _ensure_tool("nvidia-smi")) else "0",
91
- ]
92
- code, out = _run(feat_cmd, cwd=run_dir)
93
- logs.append("[feature_extractor]\n" + out)
94
- if code != 0:
95
- raise RuntimeError("COLMAP feature extraction failed.")
96
-
97
- # 2) Matching
98
- if match_mode == "sequential":
99
- match_cmd = ["colmap", "sequential_matcher", "--database_path", str(db)]
100
- elif match_mode == "exhaustive":
101
- match_cmd = ["colmap", "exhaustive_matcher", "--database_path", str(db)]
102
  else:
103
- # Spatial matcher needs priors; default to exhaustive if none
104
- match_cmd = ["colmap", "exhaustive_matcher", "--database_path", str(db)]
105
- code, out = _run(match_cmd, cwd=run_dir)
106
- logs.append(f"[{match_mode}_matcher]\n" + out)
107
- if code != 0:
108
- raise RuntimeError("COLMAP matching failed.")
109
-
110
- # 3) Sparse reconstruction
111
- sparse_dir.mkdir(exist_ok=True)
112
- code, out = _run(
113
- ["colmap", "mapper", "--database_path", str(db), "--image_path", str(imgs_dir), "--output_path", str(sparse_dir)],
114
- cwd=run_dir,
115
- )
116
- logs.append("[mapper]\n" + out)
117
- if code != 0 or not any((sparse_dir).glob("*/cameras.txt")):
118
- raise RuntimeError("COLMAP mapper failed or produced no model.")
119
-
120
- model_dirs = sorted(sparse_dir.glob("*"))
121
- model_dir = model_dirs[0]
122
-
123
- # 4) Undistort & dense
124
- code, out = _run(
125
- ["colmap", "image_undistorter", "--image_path", str(imgs_dir), "--input_path", str(model_dir), "--output_path", str(dense_dir), "--output_type", "COLMAP"],
126
- cwd=run_dir,
127
- )
128
- logs.append("[image_undistorter]\n" + out)
129
- if code != 0:
130
- raise RuntimeError("Undistortion failed.")
131
-
132
- code, out = _run(
133
- ["colmap", "patch_match_stereo", "--workspace_path", str(dense_dir), "--workspace_format", "COLMAP", "--PatchMatchStereo.geom_consistency", "true"],
134
- cwd=run_dir,
135
- )
136
- logs.append("[patch_match_stereo]\n" + out)
137
- if code != 0:
138
- raise RuntimeError("PatchMatch failed.")
139
-
140
- fused = run_dir / "fused.ply"
141
- code, out = _run(
142
- ["colmap", "stereo_fusion", "--workspace_path", str(dense_dir), "--workspace_format", "COLMAP", "--input_type", "geometric", "--output_path", str(fused)],
143
- cwd=run_dir,
144
- )
145
- logs.append("[stereo_fusion]\n" + out)
146
- if code != 0 or not fused.exists():
147
- raise RuntimeError("Fusion failed.")
148
-
149
- # 5) Meshing (Open3D). If not available, just return fused point cloud.
150
- mesh_paths = []
151
- preview_path = fused # default to point cloud preview
152
-
153
- if o3d is not None:
154
- pcd = o3d.io.read_point_cloud(str(fused))
155
- if voxel and voxel > 0:
156
- pcd = pcd.voxel_down_sample(voxel)
157
- pcd.estimate_normals(o3d.geometry.KDTreeSearchParamKNN(knn=20))
158
-
159
- # Poisson surface reconstruction
160
- mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=depth)
161
- mesh.remove_degenerate_triangles()
162
- mesh.remove_duplicated_triangles()
163
- mesh.remove_duplicated_vertices()
164
- mesh.remove_non_manifold_edges()
165
-
166
- if tris and tris > 0:
167
- mesh = mesh.simplify_quadric_decimation(tris)
168
-
169
- mesh.compute_vertex_normals()
170
-
171
- mesh_ply = run_dir / "mesh.ply"
172
- mesh_obj = run_dir / "mesh.obj"
173
- o3d.io.write_triangle_mesh(str(mesh_ply), mesh)
174
- o3d.io.write_triangle_mesh(str(mesh_obj), mesh)
175
- mesh_paths = [mesh_ply, mesh_obj]
176
- preview_path = mesh_ply
177
-
178
- files_out = [preview_path] + mesh_paths
179
- file_list = [str(p) for p in files_out if Path(p).exists()]
180
-
181
- return str(preview_path), file_list, "\n".join(logs[-80:]), gr.update(visible=True)
182
-
183
- except Exception as e:
184
- logs.append("\n[ERROR]\n" + textwrap.fill(str(e), width=100))
185
- return None, [], "\n".join(logs[-120:]), gr.update(visible=False)
186
-
187
- def build_ui():
188
- with gr.Blocks(title="Sparse Multi-View 3D (Urban Planning)", theme=gr.themes.Soft()) as demo:
189
- gr.Markdown(
190
- """
191
- # 🗺️ Sparse Multi-View 3D for Urban Planning
192
-
193
- Upload **3–30 photos** of a scene (streetscape, plaza, façade). We estimate camera poses with **COLMAP**,
194
- build a **dense point cloud**, and (optionally) **mesh** it with Open3D.
195
-
196
- **Tips for sparse captures:** overlap ~60–70%, vary viewpoint (walk an arc), avoid moving cars/people.
197
- """
198
- )
199
- with gr.Row():
200
- with gr.Column(scale=2):
201
- images = gr.File(label="Upload images (JPG/PNG)", file_types=["image"], file_count="multiple")
202
- gallery = gr.Gallery(label="Preview", columns=6, height=160)
203
-
204
- def _show_gallery(files: List[gr.File]):
205
- rows = []
206
- for f in files or []:
207
- try:
208
- with Image.open(f.name) as im:
209
- rows.append((Path(f.name).name, im.convert("RGB")))
210
- except Exception:
211
- pass
212
- return rows
213
-
214
- images.change(_show_gallery, inputs=images, outputs=gallery)
215
-
216
- with gr.Accordion("Reconstruction settings", open=False):
217
- max_px = gr.Slider(1024, 4096, value=2400, step=64, label="Max image size (px, longest side)")
218
- match_mode = gr.Radio(["exhaustive", "sequential", "spatial"], value="sequential", label="Matching mode")
219
- use_gpu_sift = gr.Checkbox(True, label="Use GPU SIFT if available")
220
-
221
- with gr.Accordion("Meshing", open=True):
222
- voxel = gr.Slider(0.0, 0.05, value=0.01, step=0.005, label="Voxel downsample (m, approx units)")
223
- depth = gr.Slider(6, 12, value=9, step=1, label="Poisson depth (higher → more detail)")
224
- tris = gr.Slider(0, 500_000, value=150_000, step=10_000, label="Target triangles (0 = keep)")
225
-
226
- run = gr.Button("▶ Reconstruct 3D", variant="primary")
227
-
228
- with gr.Column(scale=1):
229
- preview = gr.Model3D(label="Preview (PLY/OBJ)", visible=False)
230
- outputs = gr.Files(label="Downloads")
231
- logs = gr.Markdown("Logs will appear here…")
232
-
233
- run.click(
234
- run_pipeline,
235
- inputs=[images, max_px, match_mode, use_gpu_sift, voxel, depth, tris],
236
- outputs=[preview, outputs, logs, preview],
237
- queue=True,
238
- )
239
-
240
- gr.Markdown(
241
- """
242
- ### Notes & Scaling
243
- - Results are in **arbitrary units** (SfM scale). For metric scale, align in GIS/CAD with known distances.
244
- - Outdoor scenes with repetitive textures (glass/trees) can be challenging—add more oblique views if possible.
245
- """
246
- )
247
- return demo
248
 
249
  if __name__ == "__main__":
250
- build_ui().launch()
 
 
 
 
 
 
1
  import os
2
+ import io
3
+ import tempfile
4
+ import numpy as np
5
+ from PIL import Image
 
 
6
 
7
  import gradio as gr
8
+ import torch
9
+ from transformers import GLPNForDepthEstimation, GLPNImageProcessor
10
+
11
+ import open3d as o3d
12
+
13
+ # ------------------------------
14
+ # Model setup (loaded once)
15
+ # ------------------------------
16
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
17
+ FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
18
+ MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
19
+ MODEL.eval()
20
+
21
+ # ------------------------------
22
+ # Utilities
23
+ # ------------------------------
24
+ def _resize_to_mult32(img: Image.Image, max_h=480):
25
+ """Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
26
+ new_h = min(max_h, img.height)
27
+ new_h -= new_h % 32
28
+ new_w = int(new_h * img.width / img.height)
29
+ diff = new_w % 32
30
+ new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
31
+ return img.resize((new_w, new_h), Image.BICUBIC)
32
+
33
+ def predict_depth(image_pil: Image.Image):
34
+ """Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
35
+ img = _resize_to_mult32(image_pil.convert("RGB"))
36
+ inputs = FE(images=img, return_tensors="pt").to(DEVICE)
37
+
38
+ with torch.no_grad():
39
+ outputs = MODEL(**inputs)
40
+ pred = outputs.predicted_depth # (1, 1, H, W)
41
+
42
+ # remove padding GLPN expects around borders (pad=16)
43
+ pad = 16
44
+ depth = pred.squeeze().float().cpu().numpy() * 1000.0 # scale for nicer contrast
45
+ depth = depth[pad:-pad, pad:-pad]
46
+
47
+ rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
48
+ return rgb, depth
49
+
50
+ def depth_to_colormap(depth: np.ndarray):
51
+ """Return a PIL image (plasma colormap) from depth for preview."""
52
+ import matplotlib
53
+ matplotlib.use("Agg")
54
+ import matplotlib.pyplot as plt
55
+
56
+ d = depth.copy()
57
+ d -= d.min()
58
+ if d.max() > 0:
59
+ d /= d.max()
60
+ d8 = (d * 255).astype(np.uint8)
61
+
62
+ # Make a small PNG buffer
63
+ import matplotlib.cm as cm
64
+ cmap = cm.get_cmap("plasma")
65
+ colored = (cmap(d8)[:, :, :3] * 255).astype(np.uint8)
66
+ return Image.fromarray(colored)
67
+
68
+ def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
69
+ """Create an Open3D point cloud from RGB + relative depth."""
70
+ # Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
71
+ d = depth.copy()
72
+ d -= d.min()
73
+ if d.max() > 0:
74
+ d /= d.max()
75
+ depth_u8 = (d * 255).astype(np.uint8)
76
+
77
+ rgb_np = np.array(rgb_pil) # H, W, 3 (uint8)
78
+
79
+ depth_o3d = o3d.geometry.Image(depth_u8)
80
+ color_o3d = o3d.geometry.Image(rgb_np)
81
+
82
+ rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
83
+ color_o3d, depth_o3d, convert_rgb_to_intensity=False
84
+ )
85
 
86
+ h, w = rgb_np.shape[:2]
87
+ intr = o3d.camera.PinholeCameraIntrinsic()
88
+ intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)
89
+
90
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)
91
+
92
+ # Clean & orient normals
93
+ if len(pcd.points) > 0:
94
+ _, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
95
+ pcd = pcd.select_by_index(ind)
96
+ if len(pcd.points) > 0:
97
+ pcd.estimate_normals()
98
+ pcd.orient_normals_to_align_with_direction()
99
+ return pcd
100
+
101
+ def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
102
+ if len(pcd.points) == 0:
103
+ return None
104
+ mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
105
+ pcd, depth=depth, n_threads=1
106
  )
107
+ # Rotate 180° around x for typical camera convention
108
+ R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
109
+ mesh.rotate(R, center=(0, 0, 0))
110
+ mesh.compute_vertex_normals()
111
+ return mesh
112
+
113
+ def save_o3d(obj, path):
114
+ ext = os.path.splitext(path)[1].lower()
115
+ if isinstance(obj, o3d.geometry.PointCloud):
116
+ if ext == ".ply":
117
+ o3d.io.write_point_cloud(path, obj)
118
+ else:
119
+ raise ValueError("Point cloud: please save as .ply")
120
+ elif isinstance(obj, o3d.geometry.TriangleMesh):
121
+ if ext == ".obj":
122
+ o3d.io.write_triangle_mesh(path, obj)
123
+ elif ext == ".ply":
124
+ o3d.io.write_triangle_mesh(path, obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  else:
126
+ raise ValueError("Mesh: use .obj or .ply")
127
+ else:
128
+ raise ValueError("Unsupported type for saving")
129
+
130
+ def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
131
+ """
132
+ Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
133
+ we return None and rely on the Model3D viewer + downloads.
134
+ """
135
+ try:
136
+ from open3d.visualization import rendering
137
+
138
+ # Make sure mesh has vertex colors or a default material
139
+ if not mesh.has_vertex_colors():
140
+ mesh.paint_uniform_color([0.8, 0.8, 0.85])
141
+
142
+ renderer = rendering.OffscreenRenderer(width, height)
143
+ mat = rendering.MaterialRecord()
144
+ mat.shader = "defaultLit"
145
+
146
+ scene = renderer.scene
147
+ scene.set_background([1, 1, 1, 1])
148
+ scene.add_geometry("mesh", mesh, mat)
149
+
150
+ bbox = mesh.get_axis_aligned_bounding_box()
151
+ center = bbox.get_center()
152
+ extent = bbox.get_extent()
153
+ radius = np.linalg.norm(extent) * 0.8 + 1e-6
154
+
155
+ # Camera looking at center from +z
156
+ cam = scene.camera
157
+ cam.look_at(center, center + [0, 0, radius], [0, 1, 0])
158
+
159
+ img_o3d = renderer.render_to_image()
160
+ img = np.asarray(img_o3d)
161
+ return Image.fromarray(img)
162
+ except Exception:
163
+ return None
164
+
165
+ # ------------------------------
166
+ # Gradio pipeline
167
+ # ------------------------------
168
+ def run_pipeline(image: Image.Image, poisson_depth: int = 10):
169
+ """
170
+ Main function wired to Gradio:
171
+ returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
172
+ """
173
+ if image is None:
174
+ return None, None, None, None
175
+
176
+ # 1) depth
177
+ rgb, depth = predict_depth(image)
178
+ depth_vis = depth_to_colormap(depth)
179
+
180
+ # 2) point cloud
181
+ pcd = rgbd_to_pointcloud(rgb, depth)
182
+ if len(pcd.points) == 0:
183
+ return depth_vis, None, None, None
184
+
185
+ # 3) mesh
186
+ mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
187
+ if mesh is None:
188
+ # At least return PCD
189
+ with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
190
+ save_o3d(pcd, fpcd.name)
191
+ pcd_path = fpcd.name
192
+ return depth_vis, None, pcd_path, None
193
+
194
+ # 4) save artifacts
195
+ with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
196
+ save_o3d(pcd, fpcd.name)
197
+ pcd_path = fpcd.name
198
+
199
+ # Save mesh in OBJ (works with Gradio Model3D)
200
+ with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
201
+ save_o3d(mesh, fmesh.name)
202
+ mesh_obj_path = fmesh.name
203
+
204
+ # 5) mesh preview (best effort)
205
+ preview = render_mesh_image(mesh, 768, 512)
206
+
207
+ return depth_vis, preview, pcd_path, mesh_obj_path
208
+
209
+ # ------------------------------
210
+ # Interface
211
+ # ------------------------------
212
+ TITLE = "Monocular Depth → Point Cloud → Poisson Mesh (GLPN + Open3D)"
213
+ DESC = """
214
+ Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
215
+ a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
216
+ and downloads for .ply (point cloud) and .obj (mesh).
217
+ **Note:** monocular depth lacks absolute scale; this is for visualization/demo purposes.
218
+ """
219
+
220
+ with gr.Blocks(title="2D → 3D Reconstruction") as demo:
221
+ gr.Markdown(f"# {TITLE}")
222
+ gr.Markdown(DESC)
223
+
224
+ with gr.Row():
225
+ with gr.Column():
226
+ in_img = gr.Image(type="pil", label="Input Image")
227
+ poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
228
+ run_btn = gr.Button("Reconstruct 3D", variant="primary")
229
+
230
+ with gr.Column():
231
+ depth_out = gr.Image(label="Depth Map (colormap)")
232
+ mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)
233
+
234
+ with gr.Row():
235
+ pcd_file = gr.File(label="Download Point Cloud (.ply)")
236
+ mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
237
+ mesh_obj_file = gr.File(label="Download Mesh (.obj)")
238
+
239
+ run_btn.click(
240
+ fn=run_pipeline,
241
+ inputs=[in_img, poisson_depth],
242
+ outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
243
+ )
244
+ # Also expose mesh file separately (same path as viewer output)
245
+ mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
  if __name__ == "__main__":
248
+ demo.launch()