Update app.py
Browse files
app.py
CHANGED
|
@@ -1,155 +1,1662 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
-
from PIL import Image
|
| 6 |
import torch
|
| 7 |
-
|
|
|
|
| 8 |
import open3d as o3d
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
else ("mps" if getattr(torch.backends, "mps", None) and torch.backends.mps.is_available() else "cpu")
|
| 19 |
-
)
|
| 20 |
-
MODEL_ID = "vinvino02/glpn-nyu"
|
| 21 |
-
PROCESSOR = GLPNImageProcessor.from_pretrained(MODEL_ID)
|
| 22 |
-
MODEL = GLPNForDepthEstimation.from_pretrained(MODEL_ID).to(DEVICE).eval()
|
| 23 |
-
|
| 24 |
-
# ---- Helpers ----
|
| 25 |
-
def _resize_main(pil_img: Image.Image):
|
| 26 |
-
new_h = max(32, min(pil_img.height, 480))
|
| 27 |
-
new_h -= new_h % 32
|
| 28 |
-
new_w = int(new_h * pil_img.width / max(1, pil_img.height))
|
| 29 |
-
return pil_img.resize((new_w, new_h), Image.BILINEAR), (pil_img.width, pil_img.height)
|
| 30 |
-
|
| 31 |
-
@torch.inference_mode()
|
| 32 |
-
def _depth_pred_float(pil_img: Image.Image) -> np.ndarray:
|
| 33 |
-
resized, (W, H) = _resize_main(pil_img)
|
| 34 |
-
inputs = PROCESSOR(images=resized, return_tensors="pt")
|
| 35 |
-
inputs = {k: v.to(DEVICE) for k, v in inputs.items()}
|
| 36 |
-
out = MODEL(**inputs).predicted_depth # [1, h, w]
|
| 37 |
-
up = torch.nn.functional.interpolate(out.unsqueeze(1), size=(H, W), mode="bicubic", align_corners=False).squeeze(1)
|
| 38 |
-
return up[0].detach().float().cpu().numpy()
|
| 39 |
-
|
| 40 |
-
def _depth_preview_u8(d: np.ndarray) -> Image.Image:
|
| 41 |
-
d = d - d.min()
|
| 42 |
-
mx = float(d.max()) if d.size else 1.0
|
| 43 |
-
if mx <= 0: mx = 1.0
|
| 44 |
-
return Image.fromarray((255.0 * d / mx).astype(np.uint8))
|
| 45 |
-
|
| 46 |
-
def _depth_to_metric_meters(d: np.ndarray, near=0.3, far=5.0) -> np.ndarray:
|
| 47 |
-
lo, hi = np.percentile(d, [2.0, 98.0])
|
| 48 |
-
d01 = np.clip((d - lo) / max(hi - lo, 1e-6), 0, 1).astype(np.float32)
|
| 49 |
-
return (near + d01 * (far - near)).astype(np.float32)
|
| 50 |
-
|
| 51 |
-
def _rgbd_for_open3d(rgb: Image.Image, depth_m: np.ndarray, far=5.0) -> o3d.geometry.RGBDImage:
|
| 52 |
-
depth_scale = 1000.0 # meters * 1000
|
| 53 |
-
depth_o3d = o3d.geometry.Image((depth_m * depth_scale).astype(np.float32))
|
| 54 |
-
color_o3d = o3d.geometry.Image(np.array(rgb.convert("RGB")))
|
| 55 |
-
return o3d.geometry.RGBDImage.create_from_color_and_depth(
|
| 56 |
-
color_o3d, depth_o3d, convert_rgb_to_intensity=False,
|
| 57 |
-
depth_scale=depth_scale, depth_trunc=far
|
| 58 |
-
)
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
return mesh
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
mesh.compute_vertex_normals()
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
| 100 |
try:
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
logs.append("1) Predicting depth (GLPN)β¦")
|
| 106 |
-
d_pred = _depth_pred_float(image)
|
| 107 |
-
depth_preview = _depth_preview_u8(d_pred)
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
|
| 119 |
-
out = Path("outputs"); out.mkdir(parents=True, exist_ok=True)
|
| 120 |
-
mesh_path = str(out / "mesh.ply")
|
| 121 |
-
o3d.io.write_triangle_mesh(mesh_path, mesh)
|
| 122 |
-
logs.append(f"Saved mesh β {mesh_path}")
|
| 123 |
-
logs.append(f"Mesh stats: Vertices={len(mesh.vertices):,} Triangles={len(mesh.triangles):,}")
|
| 124 |
|
| 125 |
-
|
| 126 |
-
viewer_path = str(out / "mesh_viewer.ply")
|
| 127 |
-
o3d.io.write_triangle_mesh(viewer_path, _normalize_for_view(mesh))
|
| 128 |
-
logs.append(f"Done in {time.time()-t0:.1f}s.")
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
btn = gr.Button("Run", variant="primary")
|
| 145 |
-
logs = gr.Textbox(label="Logs", lines=10)
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
|
|
|
|
|
|
|
| 153 |
|
| 154 |
-
|
| 155 |
-
demo.launch(
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Advanced 3D Reconstruction from Single/Multiple Images
|
| 3 |
+
Enhanced with Responsible AI features and multi-image support
|
| 4 |
+
Addresses: Privacy, Fairness, Explainability, Multiple Image Processing
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import gradio as gr
|
| 8 |
import numpy as np
|
|
|
|
| 9 |
import torch
|
| 10 |
+
from PIL import Image
|
| 11 |
+
from transformers import GLPNForDepthEstimation, GLPNImageProcessor, DPTForDepthEstimation, DPTImageProcessor
|
| 12 |
import open3d as o3d
|
| 13 |
+
import plotly.graph_objects as go
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
import io
|
| 16 |
+
import json
|
| 17 |
+
import time
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
import tempfile
|
| 20 |
+
import zipfile
|
| 21 |
+
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
+
# ============================================================================
|
| 24 |
+
# RESPONSIBLE AI DOCUMENTATION
|
| 25 |
+
# ============================================================================
|
| 26 |
+
RESPONSIBLE_AI_TEXT = """
|
| 27 |
+
## Responsible AI & Ethics
|
| 28 |
+
|
| 29 |
+
### Model Limitations & Bias
|
| 30 |
+
|
| 31 |
+
**Training Data Geographic Bias:**
|
| 32 |
+
- **GLPN**: Trained on NYU Depth V2 dataset (primarily New York City indoor scenes)
|
| 33 |
+
- **Performance**: Excellent for Western urban interiors, office spaces, apartments
|
| 34 |
+
- **Limitations**: May underperform on non-Western architecture, outdoor scenes, rural settings
|
| 35 |
+
|
| 36 |
+
- **DPT**: Trained on mixed datasets (MIX 6 - multiple indoor/outdoor sources)
|
| 37 |
+
- **Performance**: Better generalization but still biased toward Western built environments
|
| 38 |
+
- **Limitations**: Less accurate for cultural artifacts, traditional architecture, natural landscapes
|
| 39 |
+
|
| 40 |
+
**Scene Type Performance:**
|
| 41 |
+
| Scene Type | GLPN Accuracy | DPT Accuracy | Notes |
|
| 42 |
+
|------------|---------------|--------------|-------|
|
| 43 |
+
| Modern Indoor (Western) | βββββ | βββββ | Optimal |
|
| 44 |
+
| Traditional Architecture | βββ | ββββ | May miss details |
|
| 45 |
+
| Outdoor/Natural | ββ | ββββ | GLPN struggles |
|
| 46 |
+
| Reflective Surfaces | β | ββ | Known failure case |
|
| 47 |
+
| Transparent Objects | β | β | Cannot estimate depth |
|
| 48 |
+
|
| 49 |
+
### Privacy Considerations
|
| 50 |
+
|
| 51 |
+
**Webcam Usage:**
|
| 52 |
+
- β οΈ **Warning**: Webcam captures are processed locally but may inadvertently capture:
|
| 53 |
+
- Identifiable people in background
|
| 54 |
+
- Sensitive documents or screens
|
| 55 |
+
- Private spaces or property
|
| 56 |
+
|
| 57 |
+
**Best Practices:**
|
| 58 |
+
- Only capture objects/spaces you have permission to document
|
| 59 |
+
- Ensure no people are in frame (or obtain consent)
|
| 60 |
+
- Avoid capturing sensitive information
|
| 61 |
+
- All processing is done locally - no images sent to external servers
|
| 62 |
+
|
| 63 |
+
**Data Retention:**
|
| 64 |
+
- Images are processed in memory only
|
| 65 |
+
- No automatic storage or logging
|
| 66 |
+
- Downloaded files are user-controlled
|
| 67 |
+
- No telemetry or usage tracking
|
| 68 |
+
|
| 69 |
+
### Explainability Features
|
| 70 |
+
|
| 71 |
+
This app provides multiple explainability layers:
|
| 72 |
+
|
| 73 |
+
1. **Depth Map Visualization**: Color-coded confidence in distance estimation
|
| 74 |
+
2. **Uncertainty Maps**: Shows where model is uncertain (darker = less confident)
|
| 75 |
+
3. **Quality Metrics**: Statistical measures of reconstruction reliability
|
| 76 |
+
4. **Outlier Detection**: Identifies and reports noisy predictions
|
| 77 |
+
5. **Model Comparison**: Compare GLPN vs DPT to understand model differences
|
| 78 |
+
|
| 79 |
+
### Fairness & Accessibility
|
| 80 |
+
|
| 81 |
+
**Accessibility Features:**
|
| 82 |
+
- File upload (primary method) - works for all users
|
| 83 |
+
- Webcam (optional) - for users with camera access
|
| 84 |
+
- Multiple format exports - compatible with free software
|
| 85 |
+
- Detailed documentation - no assumed prior knowledge
|
| 86 |
+
|
| 87 |
+
**Known Limitations:**
|
| 88 |
+
- Requires visual input (not accessible to blind users for capture)
|
| 89 |
+
- Processing time varies by hardware (may disadvantage low-resource users)
|
| 90 |
+
- Models optimized for Western scenes (geographic bias)
|
| 91 |
+
|
| 92 |
+
### Environmental Impact
|
| 93 |
+
|
| 94 |
+
**Computational Cost:**
|
| 95 |
+
- **GLPN Processing**: ~2GB RAM, 0.3-2.5s CPU time
|
| 96 |
+
- **DPT Processing**: ~5GB RAM, 0.8-6.5s CPU time
|
| 97 |
+
- **Carbon Estimate**: ~0.001-0.005 kWh per reconstruction
|
| 98 |
+
|
| 99 |
+
**Recommendations:**
|
| 100 |
+
- Use GLPN for most tasks (4x more efficient)
|
| 101 |
+
- Batch process multiple images to reduce overhead
|
| 102 |
+
- Consider hardware upgrade carbon cost vs processing efficiency
|
| 103 |
+
|
| 104 |
+
### Dual-Use & Misuse Prevention
|
| 105 |
+
|
| 106 |
+
**Prohibited Uses:**
|
| 107 |
+
- β Unauthorized surveillance or monitoring
|
| 108 |
+
- β Scanning people without explicit consent
|
| 109 |
+
- β Documenting property without permission
|
| 110 |
+
- β Creating deepfakes or deceptive content
|
| 111 |
+
- β Any use that violates privacy or dignity
|
| 112 |
+
|
| 113 |
+
**Intended Uses:**
|
| 114 |
+
- β
Educational research and learning
|
| 115 |
+
- β
Personal photography projects
|
| 116 |
+
- β
Architectural documentation (with permission)
|
| 117 |
+
- β
Product design and prototyping
|
| 118 |
+
- β
Cultural heritage preservation (authorized)
|
| 119 |
+
|
| 120 |
+
### Terms of Use
|
| 121 |
+
|
| 122 |
+
By using this application, you agree to:
|
| 123 |
+
1. Only process images you have rights to use
|
| 124 |
+
2. Not capture identifiable people without consent
|
| 125 |
+
3. Use outputs ethically and legally
|
| 126 |
+
4. Not use for surveillance or deceptive purposes
|
| 127 |
+
5. Understand model limitations and biases
|
| 128 |
+
|
| 129 |
+
**If you observe misuse or have ethical concerns, please report them.**
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
+
# ============================================================================
|
| 133 |
+
# LITERATURE REVIEW & THEORETICAL BACKGROUND
|
| 134 |
+
# ============================================================================
|
| 135 |
+
THEORY_TEXT = """
|
| 136 |
+
## Theoretical Background
|
| 137 |
+
|
| 138 |
+
## About This Tool
|
| 139 |
+
|
| 140 |
+
This application demonstrates how artificial intelligence can convert single 2D photographs into interactive 3D models automatically.
|
| 141 |
+
|
| 142 |
+
### What Makes This Special
|
| 143 |
+
|
| 144 |
+
**Traditional Approach:**
|
| 145 |
+
- Need special equipment (3D scanner, multiple cameras)
|
| 146 |
+
- Requires technical expertise
|
| 147 |
+
- Time-consuming process
|
| 148 |
+
- Expensive
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## The Technology
|
| 152 |
+
|
| 153 |
+
### AI Models Used
|
| 154 |
+
|
| 155 |
+
This tool uses state-of-the-art artificial intelligence models:
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
### Depth Estimation Technology
|
| 159 |
+
|
| 160 |
+
**GLPN (Global-Local Path Networks)**
|
| 161 |
+
- Paper: Kim et al., CVPR 2022
|
| 162 |
+
- Optimized for: Indoor/outdoor architectural scenes
|
| 163 |
+
- Training: NYU Depth V2 (urban indoor environments)
|
| 164 |
+
- Best for: Building interiors, street-level views, architectural details
|
| 165 |
+
- Geographic advantage: Fast processing for field documentation
|
| 166 |
+
|
| 167 |
+
**DPT (Dense Prediction Transformer)**
|
| 168 |
+
- Paper: Ranftl et al., ICCV 2021
|
| 169 |
+
- Optimized for: Complex urban scenes
|
| 170 |
+
- Training: Multiple datasets (urban and natural environments)
|
| 171 |
+
- Best for: Wide-area urban landscapes, complex built environments
|
| 172 |
+
- Geographic advantage: Superior accuracy for planning-grade documentation
|
| 173 |
+
|
| 174 |
+
### How It Works (Simple)
|
| 175 |
+
1. **AI looks at photo** β Recognizes objects, patterns, perspective
|
| 176 |
+
2. **Estimates distance** β Figures out what's close, what's far
|
| 177 |
+
3. **Creates 3D points** β Places colored dots in 3D space
|
| 178 |
+
4. **Builds surface** β Connects dots into smooth shape
|
| 179 |
+
|
| 180 |
+
### Multi-Image Processing & Automatic Alignment (NEW!)
|
| 181 |
+
|
| 182 |
+
**Single Image Mode:**
|
| 183 |
+
- Fast, works from one photo
|
| 184 |
+
- Relative depth only (no absolute scale)
|
| 185 |
+
- Hidden surfaces cannot be reconstructed
|
| 186 |
+
|
| 187 |
+
**Multiple Image Mode:**
|
| 188 |
+
- Upload 2-8 images of same object/scene from different angles
|
| 189 |
+
- **Automatic Alignment**: Uses ICP (Iterative Closest Point) algorithm to align point clouds
|
| 190 |
+
- **Automatic Merging**: Combines aligned point clouds into unified 3D model
|
| 191 |
+
- No manual alignment needed - fully automated!
|
| 192 |
+
|
| 193 |
+
**Alignment Pipeline:**
|
| 194 |
+
1. **Feature Extraction**: Computes FPFH (Fast Point Feature Histograms) for each point cloud
|
| 195 |
+
2. **Global Registration**: RANSAC-based matching to find initial alignment
|
| 196 |
+
3. **Refinement**: ICP (Iterative Closest Point) for precise alignment
|
| 197 |
+
4. **Merging**: Combines aligned clouds, removes duplicates, creates unified mesh
|
| 198 |
+
|
| 199 |
+
**Why Multiple Images Help:**
|
| 200 |
+
- Complete 360Β° coverage (all sides visible)
|
| 201 |
+
- Better accuracy through redundancy
|
| 202 |
+
- More complete models
|
| 203 |
+
- Professional-grade results automatically!
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
# ============================================================================
|
| 207 |
+
# MODEL LOADING
|
| 208 |
+
# ============================================================================
|
| 209 |
+
|
| 210 |
+
print("Loading GLPN model...")
|
| 211 |
+
glpn_processor = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
|
| 212 |
+
glpn_model = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu")
|
| 213 |
+
print("GLPN model loaded successfully!")
|
| 214 |
+
|
| 215 |
+
# DPT will be loaded on demand
|
| 216 |
+
dpt_model = None
|
| 217 |
+
dpt_processor = None
|
| 218 |
+
|
| 219 |
+
# ============================================================================
|
| 220 |
+
# UNCERTAINTY ESTIMATION
|
| 221 |
+
# ============================================================================
|
| 222 |
+
|
| 223 |
+
def estimate_uncertainty(depth_map):
|
| 224 |
+
"""
|
| 225 |
+
Estimate uncertainty in depth predictions
|
| 226 |
+
Higher values = less confident predictions
|
| 227 |
+
"""
|
| 228 |
+
# Compute local depth variance as proxy for uncertainty
|
| 229 |
+
from scipy.ndimage import generic_filter
|
| 230 |
+
|
| 231 |
+
def local_std(values):
|
| 232 |
+
return np.std(values)
|
| 233 |
+
|
| 234 |
+
# Compute local standard deviation
|
| 235 |
+
uncertainty = generic_filter(depth_map, local_std, size=5)
|
| 236 |
+
|
| 237 |
+
# Normalize to 0-1 range
|
| 238 |
+
uncertainty = (uncertainty - uncertainty.min()) / (uncertainty.max() - uncertainty.min() + 1e-8)
|
| 239 |
+
|
| 240 |
+
return uncertainty
|
| 241 |
+
|
| 242 |
+
# ============================================================================
|
| 243 |
+
# FAILURE CASE DETECTION
|
| 244 |
+
# ============================================================================
|
| 245 |
+
|
| 246 |
+
def detect_challenging_conditions(image, depth_map):
|
| 247 |
+
"""
|
| 248 |
+
Detect challenging scenarios that may lead to poor reconstruction
|
| 249 |
+
Returns: List of warnings
|
| 250 |
+
"""
|
| 251 |
+
warnings = []
|
| 252 |
+
|
| 253 |
+
# Convert to numpy if needed
|
| 254 |
+
img_array = np.array(image)
|
| 255 |
+
|
| 256 |
+
# 1. Check for very dark images
|
| 257 |
+
brightness = np.mean(img_array)
|
| 258 |
+
if brightness < 50:
|
| 259 |
+
warnings.append("β οΈ Very dark image - may reduce depth accuracy")
|
| 260 |
+
|
| 261 |
+
# 2. Check for low contrast
|
| 262 |
+
std_dev = np.std(img_array)
|
| 263 |
+
if std_dev < 30:
|
| 264 |
+
warnings.append("β οΈ Low contrast - uniform textures reduce accuracy")
|
| 265 |
+
|
| 266 |
+
# 3. Check for potential reflective surfaces (high local variance in depth)
|
| 267 |
+
depth_variance = np.var(depth_map)
|
| 268 |
+
if depth_variance > np.percentile(np.var(depth_map.reshape(-1, 10), axis=1), 95):
|
| 269 |
+
warnings.append("β οΈ Possible reflective surfaces detected - depth may be inaccurate")
|
| 270 |
+
|
| 271 |
+
# 4. Check for extreme depth discontinuities (potential transparent objects)
|
| 272 |
+
from scipy.ndimage import sobel
|
| 273 |
+
depth_edges = np.sqrt(sobel(depth_map, axis=0)**2 + sobel(depth_map, axis=1)**2)
|
| 274 |
+
if np.percentile(depth_edges, 99) > 3 * np.percentile(depth_edges, 95):
|
| 275 |
+
warnings.append("β οΈ Sharp depth discontinuities - may indicate transparent/reflective objects")
|
| 276 |
+
|
| 277 |
+
# 5. Check image size
|
| 278 |
+
if image.width < 320 or image.height < 240:
|
| 279 |
+
warnings.append("β οΈ Low resolution image - use higher resolution for better results")
|
| 280 |
+
|
| 281 |
+
return warnings
|
| 282 |
+
|
| 283 |
+
# ============================================================================
|
| 284 |
+
# AUTOMATIC ALIGNMENT FUNCTIONS
|
| 285 |
+
# ============================================================================
|
| 286 |
+
|
| 287 |
+
def align_point_clouds(point_clouds):
|
| 288 |
+
"""
|
| 289 |
+
Automatically align multiple point clouds using ICP (Iterative Closest Point)
|
| 290 |
+
Returns aligned point clouds and transformation matrices
|
| 291 |
+
"""
|
| 292 |
+
if len(point_clouds) <= 1:
|
| 293 |
+
return point_clouds, []
|
| 294 |
+
|
| 295 |
+
print("\n" + "="*60)
|
| 296 |
+
print("Starting Automatic Alignment (ICP)")
|
| 297 |
+
print("="*60)
|
| 298 |
+
|
| 299 |
+
aligned_pcds = [point_clouds[0]] # First cloud is reference
|
| 300 |
+
transformations = []
|
| 301 |
+
|
| 302 |
+
for i in range(1, len(point_clouds)):
|
| 303 |
+
print(f"\nAligning point cloud {i+1} to reference...")
|
| 304 |
+
|
| 305 |
+
source = point_clouds[i]
|
| 306 |
+
target = aligned_pcds[0] # Always align to first cloud
|
| 307 |
+
|
| 308 |
+
# Initial alignment using global registration (faster, rough alignment)
|
| 309 |
+
print(f" Step 1: Computing FPFH features...")
|
| 310 |
+
source_down = source.voxel_down_sample(voxel_size=0.05)
|
| 311 |
+
target_down = target.voxel_down_sample(voxel_size=0.05)
|
| 312 |
+
|
| 313 |
+
source_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
|
| 314 |
+
target_down.estimate_normals(o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=30))
|
| 315 |
+
|
| 316 |
+
source_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
|
| 317 |
+
source_down,
|
| 318 |
+
o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=100)
|
| 319 |
+
)
|
| 320 |
+
target_fpfh = o3d.pipelines.registration.compute_fpfh_feature(
|
| 321 |
+
target_down,
|
| 322 |
+
o3d.geometry.KDTreeSearchParamHybrid(radius=0.25, max_nn=100)
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
print(f" Step 2: Global registration (RANSAC)...")
|
| 326 |
+
result_ransac = o3d.pipelines.registration.registration_ransac_based_on_feature_matching(
|
| 327 |
+
source_down, target_down, source_fpfh, target_fpfh,
|
| 328 |
+
mutual_filter=True,
|
| 329 |
+
max_correspondence_distance=0.15,
|
| 330 |
+
estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(False),
|
| 331 |
+
ransac_n=3,
|
| 332 |
+
checkers=[
|
| 333 |
+
o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
|
| 334 |
+
o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(0.15)
|
| 335 |
+
],
|
| 336 |
+
criteria=o3d.pipelines.registration.RANSACConvergenceCriteria(100000, 0.999)
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
print(f" Global registration fitness: {result_ransac.fitness:.4f}")
|
| 340 |
+
|
| 341 |
+
# Refine with ICP
|
| 342 |
+
print(f" Step 3: Refining with ICP...")
|
| 343 |
+
threshold = 0.02
|
| 344 |
+
result_icp = o3d.pipelines.registration.registration_icp(
|
| 345 |
+
source, target, threshold, result_ransac.transformation,
|
| 346 |
+
o3d.pipelines.registration.TransformationEstimationPointToPlane()
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
print(f" ICP fitness: {result_icp.fitness:.4f}")
|
| 350 |
+
print(f" ICP RMSE: {result_icp.inlier_rmse:.6f}")
|
| 351 |
+
|
| 352 |
+
# Apply transformation
|
| 353 |
+
source_aligned = source.transform(result_icp.transformation)
|
| 354 |
+
aligned_pcds.append(source_aligned)
|
| 355 |
+
transformations.append(result_icp.transformation)
|
| 356 |
+
|
| 357 |
+
print(f" β Point cloud {i+1} aligned successfully!")
|
| 358 |
+
|
| 359 |
+
print("\n" + "="*60)
|
| 360 |
+
print(f"Alignment complete! All {len(point_clouds)} point clouds aligned.")
|
| 361 |
+
print("="*60 + "\n")
|
| 362 |
+
|
| 363 |
+
return aligned_pcds, transformations
|
| 364 |
+
|
| 365 |
+
def merge_point_clouds(aligned_pcds):
|
| 366 |
+
"""
|
| 367 |
+
Merge aligned point clouds into a single unified point cloud
|
| 368 |
+
"""
|
| 369 |
+
print("Merging aligned point clouds...")
|
| 370 |
+
merged = o3d.geometry.PointCloud()
|
| 371 |
+
|
| 372 |
+
for pcd in aligned_pcds:
|
| 373 |
+
merged += pcd
|
| 374 |
+
|
| 375 |
+
# Remove duplicate points and outliers
|
| 376 |
+
print("Cleaning merged point cloud...")
|
| 377 |
+
merged = merged.voxel_down_sample(voxel_size=0.01)
|
| 378 |
+
cl, ind = merged.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
|
| 379 |
+
merged = merged.select_by_index(ind)
|
| 380 |
+
|
| 381 |
+
print(f"Merged point cloud: {len(merged.points)} points")
|
| 382 |
+
return merged
|
| 383 |
+
|
| 384 |
+
def create_mesh_from_merged_pointcloud(pcd):
|
| 385 |
+
"""
|
| 386 |
+
Create a high-quality mesh from merged point cloud
|
| 387 |
+
"""
|
| 388 |
+
print("Creating mesh from merged point cloud...")
|
| 389 |
+
|
| 390 |
+
# Estimate normals
|
| 391 |
+
pcd.estimate_normals()
|
| 392 |
+
pcd.orient_normals_consistent_tangent_plane(100)
|
| 393 |
+
|
| 394 |
+
# Poisson reconstruction
|
| 395 |
+
mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
| 396 |
+
pcd, depth=10, n_threads=-1
|
| 397 |
+
)
|
| 398 |
+
|
| 399 |
+
# Remove low density vertices
|
| 400 |
+
vertices_to_remove = densities < np.quantile(densities, 0.01)
|
| 401 |
+
mesh.remove_vertices_by_mask(vertices_to_remove)
|
| 402 |
+
|
| 403 |
+
# Transfer colors
|
| 404 |
+
print("Transferring colors to merged mesh...")
|
| 405 |
+
pcd_tree = o3d.geometry.KDTreeFlann(pcd)
|
| 406 |
+
mesh_colors = []
|
| 407 |
+
for vertex in mesh.vertices:
|
| 408 |
+
[_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1)
|
| 409 |
+
mesh_colors.append(pcd.colors[idx[0]])
|
| 410 |
+
mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors))
|
| 411 |
+
|
| 412 |
+
# Clean up
|
| 413 |
+
mesh.remove_degenerate_triangles()
|
| 414 |
+
mesh.remove_duplicated_triangles()
|
| 415 |
+
mesh.remove_duplicated_vertices()
|
| 416 |
+
mesh.remove_non_manifold_edges()
|
| 417 |
+
|
| 418 |
+
print(f"Merged mesh: {len(mesh.vertices)} vertices, {len(mesh.triangles)} triangles")
|
| 419 |
return mesh
|
| 420 |
|
| 421 |
+
# ============================================================================
|
| 422 |
+
# CORE 3D RECONSTRUCTION FUNCTIONS
|
| 423 |
+
# ============================================================================
|
| 424 |
+
|
| 425 |
+
def process_single_image(image, model_choice, image_idx=0, total_images=1):
|
| 426 |
+
"""Process a single image and return depth map, point cloud, mesh, and metrics"""
|
| 427 |
+
|
| 428 |
+
print(f"\n{'='*60}")
|
| 429 |
+
print(f"Processing image {image_idx+1}/{total_images}")
|
| 430 |
+
print(f"{'='*60}")
|
| 431 |
+
|
| 432 |
+
# STEP 1: Preprocess image
|
| 433 |
+
print("Step 1: Preprocessing image...")
|
| 434 |
+
new_height = 480 if image.height > 480 else image.height
|
| 435 |
+
new_height -= (new_height % 32)
|
| 436 |
+
new_width = int(new_height * image.width / image.height)
|
| 437 |
+
diff = new_width % 32
|
| 438 |
+
new_width = new_width - diff if diff < 16 else new_width + (32 - diff)
|
| 439 |
+
new_size = (new_width, new_height)
|
| 440 |
+
image = image.resize(new_size, Image.LANCZOS)
|
| 441 |
+
print(f"Image resized to: {new_size}")
|
| 442 |
+
|
| 443 |
+
# STEP 2: Depth estimation
|
| 444 |
+
print("Step 2: Estimating depth...")
|
| 445 |
+
if model_choice == "GLPN (Recommended)":
|
| 446 |
+
processor = glpn_processor
|
| 447 |
+
model = glpn_model
|
| 448 |
+
else:
|
| 449 |
+
global dpt_model, dpt_processor
|
| 450 |
+
if dpt_model is None:
|
| 451 |
+
print("Loading DPT model (first time only)...")
|
| 452 |
+
dpt_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
|
| 453 |
+
dpt_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
|
| 454 |
+
processor = dpt_processor
|
| 455 |
+
model = dpt_model
|
| 456 |
+
|
| 457 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 458 |
+
|
| 459 |
+
start_time = time.time()
|
| 460 |
+
with torch.no_grad():
|
| 461 |
+
outputs = model(**inputs)
|
| 462 |
+
predicted_depth = outputs.predicted_depth
|
| 463 |
+
|
| 464 |
+
depth_time = time.time() - start_time
|
| 465 |
+
print(f"Depth estimation completed in {depth_time:.2f}s")
|
| 466 |
+
|
| 467 |
+
# Process depth output
|
| 468 |
+
pad = 16
|
| 469 |
+
output = predicted_depth.squeeze().cpu().numpy() * 1000.0
|
| 470 |
+
output = output[pad:-pad, pad:-pad]
|
| 471 |
+
image_cropped = image.crop((pad, pad, image.width - pad, image.height - pad))
|
| 472 |
+
|
| 473 |
+
# Ensure depth and image have same dimensions
|
| 474 |
+
depth_height, depth_width = output.shape
|
| 475 |
+
img_width, img_height = image_cropped.size
|
| 476 |
+
|
| 477 |
+
print(f"After crop - Depth shape: {output.shape}, Image size: {image_cropped.size}")
|
| 478 |
+
|
| 479 |
+
# Resize depth to match image if needed
|
| 480 |
+
if depth_height != img_height or depth_width != img_width:
|
| 481 |
+
print(f"Resizing depth from ({depth_height}, {depth_width}) to ({img_height}, {img_width})")
|
| 482 |
+
from scipy import ndimage
|
| 483 |
+
zoom_factors = (img_height / depth_height, img_width / depth_width)
|
| 484 |
+
output = ndimage.zoom(output, zoom_factors, order=1)
|
| 485 |
+
print(f"Depth resized to: {output.shape}")
|
| 486 |
+
|
| 487 |
+
image = image_cropped
|
| 488 |
+
|
| 489 |
+
# STEP 3: Estimate uncertainty
|
| 490 |
+
print("Step 3: Estimating uncertainty...")
|
| 491 |
+
uncertainty_map = estimate_uncertainty(output)
|
| 492 |
+
|
| 493 |
+
# STEP 4: Detect challenging conditions
|
| 494 |
+
print("Step 4: Detecting challenging conditions...")
|
| 495 |
+
warnings = detect_challenging_conditions(image, output)
|
| 496 |
+
|
| 497 |
+
# STEP 5: Create point cloud
|
| 498 |
+
print("Step 5: Generating point cloud...")
|
| 499 |
+
width, height = image.size
|
| 500 |
+
|
| 501 |
+
depth_image = (output * 255 / np.max(output)).astype(np.uint8)
|
| 502 |
+
image_array = np.array(image)
|
| 503 |
+
|
| 504 |
+
print(f"Creating RGBD - Image: {image_array.shape}, Depth: {depth_image.shape}")
|
| 505 |
+
|
| 506 |
+
depth_o3d = o3d.geometry.Image(depth_image)
|
| 507 |
+
image_o3d = o3d.geometry.Image(image_array)
|
| 508 |
+
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
|
| 509 |
+
image_o3d, depth_o3d, convert_rgb_to_intensity=False
|
| 510 |
+
)
|
| 511 |
+
|
| 512 |
+
camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
|
| 513 |
+
camera_intrinsic.set_intrinsics(width, height, 500, 500, width/2, height/2)
|
| 514 |
+
|
| 515 |
+
pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, camera_intrinsic)
|
| 516 |
+
initial_points = len(pcd.points)
|
| 517 |
+
print(f"Initial point cloud: {initial_points} points")
|
| 518 |
+
|
| 519 |
+
# STEP 6: Clean point cloud
|
| 520 |
+
print("Step 6: Cleaning point cloud...")
|
| 521 |
+
cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
|
| 522 |
+
pcd = pcd.select_by_index(ind)
|
| 523 |
+
outliers_removed = initial_points - len(pcd.points)
|
| 524 |
+
print(f"Removed {outliers_removed} outliers")
|
| 525 |
+
|
| 526 |
+
# STEP 7: Estimate normals
|
| 527 |
+
print("Step 7: Estimating normals...")
|
| 528 |
+
pcd.estimate_normals()
|
| 529 |
+
pcd.orient_normals_to_align_with_direction()
|
| 530 |
+
|
| 531 |
+
# STEP 8: Create mesh
|
| 532 |
+
print("Step 8: Creating mesh...")
|
| 533 |
+
mesh_start = time.time()
|
| 534 |
+
mesh = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
|
| 535 |
+
pcd, depth=10, n_threads=1
|
| 536 |
+
)[0]
|
| 537 |
+
|
| 538 |
+
# Transfer colors from point cloud to mesh vertices
|
| 539 |
+
print("Transferring colors to mesh...")
|
| 540 |
+
pcd_tree = o3d.geometry.KDTreeFlann(pcd)
|
| 541 |
+
mesh_colors = []
|
| 542 |
+
for vertex in mesh.vertices:
|
| 543 |
+
[_, idx, _] = pcd_tree.search_knn_vector_3d(vertex, 1)
|
| 544 |
+
mesh_colors.append(pcd.colors[idx[0]])
|
| 545 |
+
mesh.vertex_colors = o3d.utility.Vector3dVector(np.array(mesh_colors))
|
| 546 |
+
|
| 547 |
+
# Rotate mesh
|
| 548 |
+
rotation = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
|
| 549 |
+
mesh.rotate(rotation, center=(0, 0, 0))
|
| 550 |
+
mesh_time = time.time() - mesh_start
|
| 551 |
+
print(f"Mesh created in {mesh_time:.2f}s")
|
| 552 |
+
|
| 553 |
+
# STEP 9: Compute quality metrics
|
| 554 |
+
print("Step 9: Computing metrics...")
|
| 555 |
mesh.compute_vertex_normals()
|
| 556 |
+
|
| 557 |
+
metrics = {
|
| 558 |
+
'image_index': image_idx + 1,
|
| 559 |
+
'model_used': model_choice,
|
| 560 |
+
'depth_estimation_time': f"{depth_time:.2f}s",
|
| 561 |
+
'mesh_reconstruction_time': f"{mesh_time:.2f}s",
|
| 562 |
+
'total_time': f"{depth_time + mesh_time:.2f}s",
|
| 563 |
+
'initial_points': initial_points,
|
| 564 |
+
'outliers_removed': outliers_removed,
|
| 565 |
+
'final_points': len(pcd.points),
|
| 566 |
+
'vertices': len(mesh.vertices),
|
| 567 |
+
'triangles': len(mesh.triangles),
|
| 568 |
+
'is_edge_manifold': mesh.is_edge_manifold(),
|
| 569 |
+
'is_vertex_manifold': mesh.is_vertex_manifold(),
|
| 570 |
+
'is_watertight': mesh.is_watertight(),
|
| 571 |
+
'warnings': warnings,
|
| 572 |
+
'avg_uncertainty': float(np.mean(uncertainty_map))
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
# Compute surface area
|
| 576 |
+
try:
|
| 577 |
+
vertices = np.asarray(mesh.vertices)
|
| 578 |
+
triangles = np.asarray(mesh.triangles)
|
| 579 |
+
v0 = vertices[triangles[:, 0]]
|
| 580 |
+
v1 = vertices[triangles[:, 1]]
|
| 581 |
+
v2 = vertices[triangles[:, 2]]
|
| 582 |
+
cross = np.cross(v1 - v0, v2 - v0)
|
| 583 |
+
areas = 0.5 * np.linalg.norm(cross, axis=1)
|
| 584 |
+
total_area = np.sum(areas)
|
| 585 |
+
metrics['surface_area'] = float(total_area)
|
| 586 |
+
except:
|
| 587 |
+
metrics['surface_area'] = "Unable to compute"
|
| 588 |
+
|
| 589 |
+
# Compute volume if watertight
|
| 590 |
+
try:
|
| 591 |
+
if mesh.is_watertight():
|
| 592 |
+
volume = mesh.get_volume()
|
| 593 |
+
metrics['volume'] = float(volume)
|
| 594 |
+
else:
|
| 595 |
+
metrics['volume'] = None
|
| 596 |
+
except:
|
| 597 |
+
metrics['volume'] = None
|
| 598 |
+
|
| 599 |
+
return {
|
| 600 |
+
'image': image,
|
| 601 |
+
'depth_map': output,
|
| 602 |
+
'uncertainty_map': uncertainty_map,
|
| 603 |
+
'point_cloud': pcd,
|
| 604 |
+
'mesh': mesh,
|
| 605 |
+
'metrics': metrics,
|
| 606 |
+
'warnings': warnings
|
| 607 |
+
}
|
| 608 |
|
| 609 |
+
def process_image(images, model_choice="GLPN (Recommended)", visualization_type="mesh", enable_alignment=True):
|
| 610 |
+
"""Main processing pipeline - handles single or multiple images with automatic alignment"""
|
| 611 |
+
|
| 612 |
+
if images is None or len(images) == 0:
|
| 613 |
+
return None, None, None, "Please upload at least one image.", None
|
| 614 |
+
|
| 615 |
try:
|
| 616 |
+
# Handle single image vs multiple images
|
| 617 |
+
if not isinstance(images, list):
|
| 618 |
+
images = [images]
|
| 619 |
+
|
| 620 |
+
num_images = len(images)
|
| 621 |
+
print(f"\n{'#'*60}")
|
| 622 |
+
print(f"Starting reconstruction with {num_images} image(s)")
|
| 623 |
+
print(f"Model: {model_choice}")
|
| 624 |
+
print(f"Automatic Alignment: {'Enabled' if enable_alignment and num_images > 1 else 'Disabled'}")
|
| 625 |
+
print(f"{'#'*60}\n")
|
| 626 |
+
|
| 627 |
+
# Process each image
|
| 628 |
+
results = []
|
| 629 |
+
for idx, img in enumerate(images):
|
| 630 |
+
result = process_single_image(img, model_choice, idx, num_images)
|
| 631 |
+
results.append(result)
|
| 632 |
+
|
| 633 |
+
# AUTOMATIC ALIGNMENT for multiple images
|
| 634 |
+
aligned_pcds = None
|
| 635 |
+
merged_pcd = None
|
| 636 |
+
merged_mesh = None
|
| 637 |
+
alignment_info = ""
|
| 638 |
+
|
| 639 |
+
if num_images > 1 and enable_alignment:
|
| 640 |
+
try:
|
| 641 |
+
# Extract point clouds
|
| 642 |
+
point_clouds = [r['point_cloud'] for r in results]
|
| 643 |
+
|
| 644 |
+
# Align them
|
| 645 |
+
aligned_pcds, transformations = align_point_clouds(point_clouds)
|
| 646 |
+
|
| 647 |
+
# Merge into single point cloud
|
| 648 |
+
merged_pcd = merge_point_clouds(aligned_pcds)
|
| 649 |
+
|
| 650 |
+
# Create unified mesh
|
| 651 |
+
merged_mesh = create_mesh_from_merged_pointcloud(merged_pcd)
|
| 652 |
+
|
| 653 |
+
alignment_info = f"""
|
| 654 |
+
### β¨ Automatic Alignment Results
|
| 655 |
|
| 656 |
+
Successfully aligned and merged {num_images} point clouds!
|
|
|
|
|
|
|
|
|
|
| 657 |
|
| 658 |
+
**Alignment Quality:**
|
| 659 |
+
"""
|
| 660 |
+
for i, trans in enumerate(transformations):
|
| 661 |
+
translation = np.linalg.norm(trans[:3, 3])
|
| 662 |
+
alignment_info += f"- Image {i+2} β Image 1: Translation distance = {translation:.3f} units\n"
|
| 663 |
+
|
| 664 |
+
alignment_info += f"""
|
| 665 |
+
**Merged Model Statistics:**
|
| 666 |
+
- Total Points: {len(merged_pcd.points):,}
|
| 667 |
+
- Mesh Vertices: {len(merged_mesh.vertices):,}
|
| 668 |
+
- Mesh Triangles: {len(merged_mesh.triangles):,}
|
| 669 |
+
- Watertight: {'β Yes' if merged_mesh.is_watertight() else 'β No (may need repair)'}
|
| 670 |
|
| 671 |
+
*The merged model provides a complete 360Β° reconstruction!*
|
| 672 |
+
"""
|
| 673 |
+
except Exception as e:
|
| 674 |
+
print(f"Alignment failed: {e}")
|
| 675 |
+
import traceback
|
| 676 |
+
traceback.print_exc()
|
| 677 |
+
alignment_info = f"""
|
| 678 |
+
### β οΈ Automatic Alignment Failed
|
| 679 |
|
| 680 |
+
Error: {str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
+
**Fallback:** Individual models exported separately. You can try manual alignment in CloudCompare/MeshLab.
|
|
|
|
|
|
|
|
|
|
| 683 |
|
| 684 |
+
**Common causes:**
|
| 685 |
+
- Insufficient overlap between images
|
| 686 |
+
- Very different viewpoints
|
| 687 |
+
- Lack of distinctive features
|
| 688 |
+
- Reflective/transparent surfaces
|
| 689 |
+
"""
|
| 690 |
+
|
| 691 |
+
# Create combined visualizations
|
| 692 |
+
print("\n" + "="*60)
|
| 693 |
+
print("Creating visualizations...")
|
| 694 |
+
print("="*60)
|
| 695 |
+
|
| 696 |
+
# 1. DEPTH MAP COMPARISON (for first image or grid for multiple)
|
| 697 |
+
if num_images == 1:
|
| 698 |
+
# Single image visualization
|
| 699 |
+
result = results[0]
|
| 700 |
+
fig, ax = plt.subplots(1, 3, figsize=(18, 6))
|
| 701 |
+
|
| 702 |
+
ax[0].imshow(result['image'])
|
| 703 |
+
ax[0].set_title('Original Image', fontsize=14, fontweight='bold')
|
| 704 |
+
ax[0].axis('off')
|
| 705 |
+
|
| 706 |
+
im1 = ax[1].imshow(result['depth_map'], cmap='plasma')
|
| 707 |
+
ax[1].set_title('Depth Map', fontsize=14, fontweight='bold')
|
| 708 |
+
ax[1].axis('off')
|
| 709 |
+
plt.colorbar(im1, ax=ax[1], fraction=0.046, pad=0.04)
|
| 710 |
+
|
| 711 |
+
im2 = ax[2].imshow(result['uncertainty_map'], cmap='Reds')
|
| 712 |
+
ax[2].set_title('Uncertainty Map (Red = Less Confident)', fontsize=14, fontweight='bold')
|
| 713 |
+
ax[2].axis('off')
|
| 714 |
+
plt.colorbar(im2, ax=ax[2], fraction=0.046, pad=0.04)
|
| 715 |
+
|
| 716 |
+
plt.tight_layout()
|
| 717 |
+
else:
|
| 718 |
+
# Multiple images - create grid
|
| 719 |
+
rows = (num_images + 1) // 2
|
| 720 |
+
fig, axes = plt.subplots(rows, 6, figsize=(24, 4*rows))
|
| 721 |
+
if rows == 1:
|
| 722 |
+
axes = axes.reshape(1, -1)
|
| 723 |
+
|
| 724 |
+
for idx, result in enumerate(results):
|
| 725 |
+
row = idx // 2
|
| 726 |
+
col = (idx % 2) * 3
|
| 727 |
+
|
| 728 |
+
axes[row, col].imshow(result['image'])
|
| 729 |
+
axes[row, col].set_title(f'Image {idx+1}', fontsize=12, fontweight='bold')
|
| 730 |
+
axes[row, col].axis('off')
|
| 731 |
+
|
| 732 |
+
im1 = axes[row, col+1].imshow(result['depth_map'], cmap='plasma')
|
| 733 |
+
axes[row, col+1].set_title(f'Depth {idx+1}', fontsize=12, fontweight='bold')
|
| 734 |
+
axes[row, col+1].axis('off')
|
| 735 |
+
|
| 736 |
+
im2 = axes[row, col+2].imshow(result['uncertainty_map'], cmap='Reds')
|
| 737 |
+
axes[row, col+2].set_title(f'Uncertainty {idx+1}', fontsize=12, fontweight='bold')
|
| 738 |
+
axes[row, col+2].axis('off')
|
| 739 |
+
|
| 740 |
+
# Hide unused subplots
|
| 741 |
+
for idx in range(num_images, rows * 2):
|
| 742 |
+
row = idx // 2
|
| 743 |
+
for col in range(3):
|
| 744 |
+
axes[row, (idx % 2) * 3 + col].axis('off')
|
| 745 |
+
|
| 746 |
+
plt.tight_layout()
|
| 747 |
+
|
| 748 |
+
buf = io.BytesIO()
|
| 749 |
+
plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
| 750 |
+
buf.seek(0)
|
| 751 |
+
depth_viz = Image.open(buf)
|
| 752 |
+
plt.close()
|
| 753 |
+
|
| 754 |
+
# 2. 3D VISUALIZATION
|
| 755 |
+
print("Creating 3D visualization...")
|
| 756 |
+
|
| 757 |
+
if num_images == 1:
|
| 758 |
+
# Single visualization
|
| 759 |
+
result = results[0]
|
| 760 |
+
points = np.asarray(result['point_cloud'].points)
|
| 761 |
+
colors = np.asarray(result['point_cloud'].colors)
|
| 762 |
+
mesh = result['mesh']
|
| 763 |
+
|
| 764 |
+
if visualization_type == "point_cloud":
|
| 765 |
+
scatter = go.Scatter3d(
|
| 766 |
+
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 767 |
+
mode='markers',
|
| 768 |
+
marker=dict(
|
| 769 |
+
size=2,
|
| 770 |
+
color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 771 |
+
for r, g, b in colors],
|
| 772 |
+
),
|
| 773 |
+
name='Point Cloud'
|
| 774 |
+
)
|
| 775 |
+
|
| 776 |
+
plotly_fig = go.Figure(data=[scatter])
|
| 777 |
+
plotly_fig.update_layout(
|
| 778 |
+
scene=dict(
|
| 779 |
+
xaxis=dict(visible=False),
|
| 780 |
+
yaxis=dict(visible=False),
|
| 781 |
+
zaxis=dict(visible=False),
|
| 782 |
+
aspectmode='data',
|
| 783 |
+
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 784 |
+
),
|
| 785 |
+
margin=dict(l=0, r=0, t=30, b=0),
|
| 786 |
+
height=700,
|
| 787 |
+
title="Point Cloud"
|
| 788 |
+
)
|
| 789 |
+
|
| 790 |
+
elif visualization_type == "mesh":
|
| 791 |
+
vertices = np.asarray(mesh.vertices)
|
| 792 |
+
triangles = np.asarray(mesh.triangles)
|
| 793 |
+
|
| 794 |
+
if mesh.has_vertex_colors():
|
| 795 |
+
vertex_colors = np.asarray(mesh.vertex_colors)
|
| 796 |
+
colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 797 |
+
for r, g, b in vertex_colors]
|
| 798 |
+
|
| 799 |
+
mesh_trace = go.Mesh3d(
|
| 800 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 801 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 802 |
+
vertexcolor=colors_rgb,
|
| 803 |
+
opacity=0.95,
|
| 804 |
+
name='Mesh'
|
| 805 |
+
)
|
| 806 |
+
else:
|
| 807 |
+
mesh_trace = go.Mesh3d(
|
| 808 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 809 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 810 |
+
color='lightblue',
|
| 811 |
+
opacity=0.9,
|
| 812 |
+
name='Mesh'
|
| 813 |
+
)
|
| 814 |
+
|
| 815 |
+
plotly_fig = go.Figure(data=[mesh_trace])
|
| 816 |
+
plotly_fig.update_layout(
|
| 817 |
+
scene=dict(
|
| 818 |
+
xaxis=dict(visible=False),
|
| 819 |
+
yaxis=dict(visible=False),
|
| 820 |
+
zaxis=dict(visible=False),
|
| 821 |
+
aspectmode='data',
|
| 822 |
+
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 823 |
+
),
|
| 824 |
+
margin=dict(l=0, r=0, t=30, b=0),
|
| 825 |
+
height=700,
|
| 826 |
+
title="3D Mesh"
|
| 827 |
+
)
|
| 828 |
+
|
| 829 |
+
else: # both
|
| 830 |
+
from plotly.subplots import make_subplots
|
| 831 |
+
|
| 832 |
+
vertices = np.asarray(mesh.vertices)
|
| 833 |
+
triangles = np.asarray(mesh.triangles)
|
| 834 |
+
|
| 835 |
+
scatter = go.Scatter3d(
|
| 836 |
+
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 837 |
+
mode='markers',
|
| 838 |
+
marker=dict(
|
| 839 |
+
size=2,
|
| 840 |
+
color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 841 |
+
for r, g, b in colors],
|
| 842 |
+
),
|
| 843 |
+
name='Point Cloud'
|
| 844 |
+
)
|
| 845 |
+
|
| 846 |
+
if mesh.has_vertex_colors():
|
| 847 |
+
vertex_colors = np.asarray(mesh.vertex_colors)
|
| 848 |
+
colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 849 |
+
for r, g, b in vertex_colors]
|
| 850 |
+
|
| 851 |
+
mesh_trace = go.Mesh3d(
|
| 852 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 853 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 854 |
+
vertexcolor=colors_rgb,
|
| 855 |
+
opacity=0.95,
|
| 856 |
+
name='Mesh'
|
| 857 |
+
)
|
| 858 |
+
else:
|
| 859 |
+
mesh_trace = go.Mesh3d(
|
| 860 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 861 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 862 |
+
color='lightblue',
|
| 863 |
+
opacity=0.9,
|
| 864 |
+
name='Mesh'
|
| 865 |
+
)
|
| 866 |
+
|
| 867 |
+
plotly_fig = make_subplots(
|
| 868 |
+
rows=1, cols=2,
|
| 869 |
+
specs=[[{'type': 'scatter3d'}, {'type': 'scatter3d'}]],
|
| 870 |
+
subplot_titles=('Point Cloud', '3D Mesh')
|
| 871 |
+
)
|
| 872 |
+
|
| 873 |
+
plotly_fig.add_trace(scatter, row=1, col=1)
|
| 874 |
+
plotly_fig.add_trace(mesh_trace, row=1, col=2)
|
| 875 |
+
|
| 876 |
+
plotly_fig.update_layout(
|
| 877 |
+
scene=dict(
|
| 878 |
+
xaxis=dict(visible=False),
|
| 879 |
+
yaxis=dict(visible=False),
|
| 880 |
+
zaxis=dict(visible=False),
|
| 881 |
+
aspectmode='data',
|
| 882 |
+
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 883 |
+
),
|
| 884 |
+
scene2=dict(
|
| 885 |
+
xaxis=dict(visible=False),
|
| 886 |
+
yaxis=dict(visible=False),
|
| 887 |
+
zaxis=dict(visible=False),
|
| 888 |
+
aspectmode='data',
|
| 889 |
+
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 890 |
+
),
|
| 891 |
+
height=600,
|
| 892 |
+
showlegend=False,
|
| 893 |
+
margin=dict(l=0, r=0, t=50, b=0)
|
| 894 |
+
)
|
| 895 |
+
|
| 896 |
+
else:
|
| 897 |
+
# Multiple images - show all reconstructions
|
| 898 |
+
traces = []
|
| 899 |
+
|
| 900 |
+
if merged_pcd is not None and merged_mesh is not None:
|
| 901 |
+
# Show the merged result
|
| 902 |
+
points = np.asarray(merged_pcd.points)
|
| 903 |
+
colors = np.asarray(merged_pcd.colors)
|
| 904 |
+
|
| 905 |
+
if visualization_type == "point_cloud" or visualization_type == "both":
|
| 906 |
+
scatter = go.Scatter3d(
|
| 907 |
+
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 908 |
+
mode='markers',
|
| 909 |
+
marker=dict(
|
| 910 |
+
size=1.5,
|
| 911 |
+
color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 912 |
+
for r, g, b in colors],
|
| 913 |
+
),
|
| 914 |
+
name='Merged Point Cloud'
|
| 915 |
+
)
|
| 916 |
+
traces.append(scatter)
|
| 917 |
+
|
| 918 |
+
if visualization_type == "mesh" or visualization_type == "both":
|
| 919 |
+
vertices = np.asarray(merged_mesh.vertices)
|
| 920 |
+
triangles = np.asarray(merged_mesh.triangles)
|
| 921 |
+
|
| 922 |
+
if merged_mesh.has_vertex_colors():
|
| 923 |
+
vertex_colors = np.asarray(merged_mesh.vertex_colors)
|
| 924 |
+
colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 925 |
+
for r, g, b in vertex_colors]
|
| 926 |
+
|
| 927 |
+
mesh_trace = go.Mesh3d(
|
| 928 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 929 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 930 |
+
vertexcolor=colors_rgb,
|
| 931 |
+
opacity=0.95,
|
| 932 |
+
name='Merged Mesh',
|
| 933 |
+
lighting=dict(ambient=0.5, diffuse=0.8, specular=0.2),
|
| 934 |
+
lightposition=dict(x=100, y=100, z=100)
|
| 935 |
+
)
|
| 936 |
+
else:
|
| 937 |
+
mesh_trace = go.Mesh3d(
|
| 938 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 939 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 940 |
+
color='lightblue',
|
| 941 |
+
opacity=0.9,
|
| 942 |
+
name='Merged Mesh'
|
| 943 |
+
)
|
| 944 |
+
traces.append(mesh_trace)
|
| 945 |
+
|
| 946 |
+
plotly_fig = go.Figure(data=traces)
|
| 947 |
+
plotly_fig.update_layout(
|
| 948 |
+
scene=dict(
|
| 949 |
+
xaxis=dict(visible=False),
|
| 950 |
+
yaxis=dict(visible=False),
|
| 951 |
+
zaxis=dict(visible=False),
|
| 952 |
+
aspectmode='data',
|
| 953 |
+
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 954 |
+
),
|
| 955 |
+
margin=dict(l=0, r=0, t=30, b=0),
|
| 956 |
+
height=700,
|
| 957 |
+
title=f"Merged Reconstruction from {num_images} Images"
|
| 958 |
+
)
|
| 959 |
+
else:
|
| 960 |
+
# Fallback: show individual reconstructions side by side
|
| 961 |
+
for idx, result in enumerate(results):
|
| 962 |
+
points = np.asarray(result['point_cloud'].points)
|
| 963 |
+
colors = np.asarray(result['point_cloud'].colors)
|
| 964 |
+
|
| 965 |
+
# Offset each point cloud to separate them
|
| 966 |
+
offset = idx * 2
|
| 967 |
+
points[:, 0] += offset
|
| 968 |
+
|
| 969 |
+
if visualization_type == "point_cloud" or visualization_type == "both":
|
| 970 |
+
scatter = go.Scatter3d(
|
| 971 |
+
x=points[:, 0], y=points[:, 1], z=points[:, 2],
|
| 972 |
+
mode='markers',
|
| 973 |
+
marker=dict(
|
| 974 |
+
size=2,
|
| 975 |
+
color=['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 976 |
+
for r, g, b in colors],
|
| 977 |
+
),
|
| 978 |
+
name=f'Point Cloud {idx+1}'
|
| 979 |
+
)
|
| 980 |
+
traces.append(scatter)
|
| 981 |
+
|
| 982 |
+
if visualization_type == "mesh" or visualization_type == "both":
|
| 983 |
+
mesh = result['mesh']
|
| 984 |
+
vertices = np.asarray(mesh.vertices)
|
| 985 |
+
vertices[:, 0] += offset # Apply same offset
|
| 986 |
+
triangles = np.asarray(mesh.triangles)
|
| 987 |
+
|
| 988 |
+
if mesh.has_vertex_colors():
|
| 989 |
+
vertex_colors = np.asarray(mesh.vertex_colors)
|
| 990 |
+
colors_rgb = ['rgb({},{},{})'.format(int(r*255), int(g*255), int(b*255))
|
| 991 |
+
for r, g, b in vertex_colors]
|
| 992 |
+
|
| 993 |
+
mesh_trace = go.Mesh3d(
|
| 994 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 995 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 996 |
+
vertexcolor=colors_rgb,
|
| 997 |
+
opacity=0.95,
|
| 998 |
+
name=f'Mesh {idx+1}'
|
| 999 |
+
)
|
| 1000 |
+
else:
|
| 1001 |
+
mesh_trace = go.Mesh3d(
|
| 1002 |
+
x=vertices[:, 0], y=vertices[:, 1], z=vertices[:, 2],
|
| 1003 |
+
i=triangles[:, 0], j=triangles[:, 1], k=triangles[:, 2],
|
| 1004 |
+
color='lightblue',
|
| 1005 |
+
opacity=0.9,
|
| 1006 |
+
name=f'Mesh {idx+1}'
|
| 1007 |
+
)
|
| 1008 |
+
traces.append(mesh_trace)
|
| 1009 |
+
|
| 1010 |
+
plotly_fig = go.Figure(data=traces)
|
| 1011 |
+
plotly_fig.update_layout(
|
| 1012 |
+
scene=dict(
|
| 1013 |
+
xaxis=dict(visible=False),
|
| 1014 |
+
yaxis=dict(visible=False),
|
| 1015 |
+
zaxis=dict(visible=False),
|
| 1016 |
+
aspectmode='data',
|
| 1017 |
+
camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
|
| 1018 |
+
),
|
| 1019 |
+
margin=dict(l=0, r=0, t=30, b=0),
|
| 1020 |
+
height=700,
|
| 1021 |
+
title=f"Individual Reconstructions (Side by Side)"
|
| 1022 |
+
)
|
| 1023 |
+
|
| 1024 |
+
# 3. EXPORT FILES
|
| 1025 |
+
print("Exporting files...")
|
| 1026 |
+
temp_dir = tempfile.mkdtemp()
|
| 1027 |
+
|
| 1028 |
+
all_metrics = []
|
| 1029 |
+
for idx, result in enumerate(results):
|
| 1030 |
+
prefix = f"image_{idx+1}_" if num_images > 1 else ""
|
| 1031 |
+
|
| 1032 |
+
# Save point cloud
|
| 1033 |
+
pcd_path = Path(temp_dir) / f"{prefix}point_cloud.ply"
|
| 1034 |
+
o3d.io.write_point_cloud(str(pcd_path), result['point_cloud'])
|
| 1035 |
+
|
| 1036 |
+
# Save mesh
|
| 1037 |
+
mesh_path = Path(temp_dir) / f"{prefix}mesh.ply"
|
| 1038 |
+
o3d.io.write_triangle_mesh(str(mesh_path), result['mesh'])
|
| 1039 |
+
|
| 1040 |
+
mesh_obj_path = Path(temp_dir) / f"{prefix}mesh.obj"
|
| 1041 |
+
o3d.io.write_triangle_mesh(str(mesh_obj_path), result['mesh'])
|
| 1042 |
+
|
| 1043 |
+
mesh_stl_path = Path(temp_dir) / f"{prefix}mesh.stl"
|
| 1044 |
+
o3d.io.write_triangle_mesh(str(mesh_stl_path), result['mesh'])
|
| 1045 |
+
|
| 1046 |
+
all_metrics.append(result['metrics'])
|
| 1047 |
+
|
| 1048 |
+
# Save merged results if available
|
| 1049 |
+
if merged_pcd is not None and merged_mesh is not None:
|
| 1050 |
+
merged_pcd_path = Path(temp_dir) / "MERGED_point_cloud.ply"
|
| 1051 |
+
o3d.io.write_point_cloud(str(merged_pcd_path), merged_pcd)
|
| 1052 |
+
|
| 1053 |
+
merged_mesh_path = Path(temp_dir) / "MERGED_mesh.ply"
|
| 1054 |
+
o3d.io.write_triangle_mesh(str(merged_mesh_path), merged_mesh)
|
| 1055 |
+
|
| 1056 |
+
merged_obj_path = Path(temp_dir) / "MERGED_mesh.obj"
|
| 1057 |
+
o3d.io.write_triangle_mesh(str(merged_obj_path), merged_mesh)
|
| 1058 |
+
|
| 1059 |
+
merged_stl_path = Path(temp_dir) / "MERGED_mesh.stl"
|
| 1060 |
+
o3d.io.write_triangle_mesh(str(merged_stl_path), merged_mesh)
|
| 1061 |
+
|
| 1062 |
+
# Save combined metrics
|
| 1063 |
+
combined_metrics = {
|
| 1064 |
+
'total_images': num_images,
|
| 1065 |
+
'processing_date': datetime.now().isoformat(),
|
| 1066 |
+
'model_used': model_choice,
|
| 1067 |
+
'alignment_enabled': enable_alignment and num_images > 1,
|
| 1068 |
+
'alignment_successful': merged_pcd is not None,
|
| 1069 |
+
'individual_results': all_metrics
|
| 1070 |
+
}
|
| 1071 |
+
|
| 1072 |
+
if merged_mesh is not None:
|
| 1073 |
+
combined_metrics['merged_stats'] = {
|
| 1074 |
+
'points': len(merged_pcd.points),
|
| 1075 |
+
'vertices': len(merged_mesh.vertices),
|
| 1076 |
+
'triangles': len(merged_mesh.triangles),
|
| 1077 |
+
'is_watertight': merged_mesh.is_watertight()
|
| 1078 |
+
}
|
| 1079 |
+
|
| 1080 |
+
metrics_path = Path(temp_dir) / "metrics.json"
|
| 1081 |
+
with open(metrics_path, 'w') as f:
|
| 1082 |
+
json.dump(combined_metrics, f, indent=2, default=str)
|
| 1083 |
+
|
| 1084 |
+
# Create zip
|
| 1085 |
+
zip_filename = f"reconstruction_{num_images}_images.zip" if num_images > 1 else "reconstruction_complete.zip"
|
| 1086 |
+
zip_path = Path(temp_dir) / zip_filename
|
| 1087 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 1088 |
+
for file in Path(temp_dir).glob("*"):
|
| 1089 |
+
if file.suffix != '.zip':
|
| 1090 |
+
zipf.write(file, file.name)
|
| 1091 |
+
|
| 1092 |
+
print("Files exported!")
|
| 1093 |
+
|
| 1094 |
+
# 4. CREATE REPORT
|
| 1095 |
+
if num_images == 1:
|
| 1096 |
+
result = results[0]
|
| 1097 |
+
metrics = result['metrics']
|
| 1098 |
+
warnings = result['warnings']
|
| 1099 |
+
|
| 1100 |
+
warnings_section = ""
|
| 1101 |
+
if warnings:
|
| 1102 |
+
warnings_section = "### β οΈ Detected Challenging Conditions\n" + "\n".join(warnings) + "\n\n"
|
| 1103 |
+
|
| 1104 |
+
report = f"""
|
| 1105 |
+
## Reconstruction Complete!
|
| 1106 |
|
| 1107 |
+
{warnings_section}
|
| 1108 |
+
|
| 1109 |
+
### Performance Metrics
|
| 1110 |
+
- **Model Used**: {metrics['model_used']}
|
| 1111 |
+
- **Depth Estimation Time**: {metrics['depth_estimation_time']}
|
| 1112 |
+
- **Mesh Reconstruction Time**: {metrics['mesh_reconstruction_time']}
|
| 1113 |
+
- **Total Processing Time**: {metrics['total_time']}
|
| 1114 |
+
|
| 1115 |
+
### Point Cloud Statistics
|
| 1116 |
+
- **Initial Points**: {metrics['initial_points']:,}
|
| 1117 |
+
- **Outliers Removed**: {metrics['outliers_removed']:,} ({(metrics['outliers_removed']/metrics['initial_points']*100):.1f}%)
|
| 1118 |
+
- **Final Points**: {metrics['final_points']:,}
|
| 1119 |
+
|
| 1120 |
+
### Mesh Quality
|
| 1121 |
+
- **Vertices**: {metrics['vertices']:,}
|
| 1122 |
+
- **Triangles**: {metrics['triangles']:,}
|
| 1123 |
+
- **Edge Manifold**: {'β Good topology' if metrics['is_edge_manifold'] else 'β Has non-manifold edges'}
|
| 1124 |
+
- **Vertex Manifold**: {'β Clean vertices' if metrics['is_vertex_manifold'] else 'β Has non-manifold vertices'}
|
| 1125 |
+
- **Watertight**: {'β Closed surface (3D printable)' if metrics['is_watertight'] else 'β Has boundaries (needs repair for 3D printing)'}
|
| 1126 |
+
- **Surface Area**: {metrics['surface_area'] if isinstance(metrics['surface_area'], str) else f"{metrics['surface_area']:.2f}"}
|
| 1127 |
+
- **Volume**: {f"{metrics['volume']:.2f}" if metrics.get('volume') else 'N/A (not watertight)'}
|
| 1128 |
+
|
| 1129 |
+
### Explainability Metrics
|
| 1130 |
+
- **Average Uncertainty**: {metrics['avg_uncertainty']:.3f} (lower is better)
|
| 1131 |
+
- Uncertainty shows where the model is less confident
|
| 1132 |
+
- Check the red heatmap for spatial distribution of uncertainty
|
| 1133 |
+
|
| 1134 |
+
### Files Exported
|
| 1135 |
+
- Point Cloud: PLY format
|
| 1136 |
+
- Mesh: PLY, OBJ, STL formats
|
| 1137 |
+
- Quality Metrics: JSON
|
| 1138 |
|
| 1139 |
+
**Download the complete package below!**
|
| 1140 |
+
"""
|
| 1141 |
+
else:
|
| 1142 |
+
# Multiple images report
|
| 1143 |
+
total_time = sum(float(r['metrics']['total_time'].replace('s', '')) for r in results)
|
| 1144 |
+
total_points = sum(r['metrics']['final_points'] for r in results)
|
| 1145 |
+
total_vertices = sum(r['metrics']['vertices'] for r in results)
|
| 1146 |
+
|
| 1147 |
+
all_warnings = []
|
| 1148 |
+
for idx, result in enumerate(results):
|
| 1149 |
+
if result['warnings']:
|
| 1150 |
+
all_warnings.append(f"\n**Image {idx+1}:**\n" + "\n".join(result['warnings']))
|
| 1151 |
+
|
| 1152 |
+
warnings_section = ""
|
| 1153 |
+
if all_warnings:
|
| 1154 |
+
warnings_section = "### β οΈ Detected Challenging Conditions\n" + "\n".join(all_warnings) + "\n\n"
|
| 1155 |
+
|
| 1156 |
+
report = f"""
|
| 1157 |
+
## Multi-Image Reconstruction Complete!
|
| 1158 |
+
|
| 1159 |
+
Processed {num_images} images successfully.
|
| 1160 |
+
|
| 1161 |
+
{alignment_info}
|
| 1162 |
+
|
| 1163 |
+
{warnings_section}
|
| 1164 |
+
|
| 1165 |
+
### Overall Statistics
|
| 1166 |
+
- **Total Processing Time**: {total_time:.2f}s
|
| 1167 |
+
- **Total Final Points** (individual): {total_points:,}
|
| 1168 |
+
- **Total Vertices** (individual): {total_vertices:,}
|
| 1169 |
+
- **Model Used**: {model_choice}
|
| 1170 |
+
|
| 1171 |
+
### Individual Image Results
|
| 1172 |
+
|
| 1173 |
+
"""
|
| 1174 |
+
for idx, result in enumerate(results):
|
| 1175 |
+
m = result['metrics']
|
| 1176 |
+
report += f"""
|
| 1177 |
+
#### Image {idx+1}
|
| 1178 |
+
- Points: {m['final_points']:,}
|
| 1179 |
+
- Vertices: {m['vertices']:,}
|
| 1180 |
+
- Triangles: {m['triangles']:,}
|
| 1181 |
+
- Watertight: {'β' if m['is_watertight'] else 'β'}
|
| 1182 |
+
- Time: {m['total_time']}
|
| 1183 |
+
- Avg Uncertainty: {m['avg_uncertainty']:.3f}
|
| 1184 |
+
|
| 1185 |
+
"""
|
| 1186 |
+
|
| 1187 |
+
report += f"""
|
| 1188 |
+
### Files Exported
|
| 1189 |
+
- {num_images} Individual Point Clouds (PLY format)
|
| 1190 |
+
- {num_images} Individual Meshes (PLY, OBJ, STL formats)"""
|
| 1191 |
+
|
| 1192 |
+
if merged_pcd is not None:
|
| 1193 |
+
report += """
|
| 1194 |
+
- **MERGED_point_cloud.ply** - Unified aligned point cloud β
|
| 1195 |
+
- **MERGED_mesh.ply/obj/stl** - Unified aligned mesh β"""
|
| 1196 |
+
|
| 1197 |
+
report += """
|
| 1198 |
+
- Combined Metrics (JSON)
|
| 1199 |
+
|
| 1200 |
+
**Download the complete package below!**
|
| 1201 |
+
"""
|
| 1202 |
+
|
| 1203 |
+
# Create JSON output
|
| 1204 |
+
json_output = json.dumps(combined_metrics, indent=2, default=str)
|
| 1205 |
+
|
| 1206 |
+
print("SUCCESS! Returning results...")
|
| 1207 |
+
return depth_viz, plotly_fig, str(zip_path), report, json_output
|
| 1208 |
+
|
| 1209 |
+
except Exception as e:
|
| 1210 |
+
import traceback
|
| 1211 |
+
error_msg = f"Error during reconstruction:\n{str(e)}\n\nTraceback:\n{traceback.format_exc()}"
|
| 1212 |
+
print(error_msg)
|
| 1213 |
+
return None, None, None, error_msg, None
|
| 1214 |
|
| 1215 |
+
# ============================================================================
|
| 1216 |
+
# GRADIO INTERFACE
|
| 1217 |
+
# ============================================================================
|
|
|
|
|
|
|
| 1218 |
|
| 1219 |
+
with gr.Blocks(title="Advanced 3D Reconstruction", theme=gr.themes.Soft()) as demo:
|
| 1220 |
+
|
| 1221 |
+
gr.Markdown("""
|
| 1222 |
+
# ποΈ 3D Urban Reconstruction from Images
|
| 1223 |
+
|
| 1224 |
+
Transform 2D photographs into 3D spatial models with Responsible AI features
|
| 1225 |
+
|
| 1226 |
+
**NEW:** Multi-image support! Upload 1-8 images for more complete reconstructions.
|
| 1227 |
+
""")
|
| 1228 |
+
|
| 1229 |
+
# Responsible AI Warning Banner
|
| 1230 |
+
gr.Markdown("""
|
| 1231 |
+
<div style="background-color: #fff3cd; border-left: 4px solid #ffc107; padding: 15px; margin: 15px 0;">
|
| 1232 |
+
<strong>β οΈ Responsible Use Notice</strong><br>
|
| 1233 |
+
β’ Only process images you have rights to use<br>
|
| 1234 |
+
β’ Do not capture identifiable people without consent<br>
|
| 1235 |
+
β’ Be aware of model biases (trained primarily on Western indoor scenes)<br>
|
| 1236 |
+
β’ Check the "Responsible AI" tab for detailed ethical guidelines
|
| 1237 |
+
</div>
|
| 1238 |
+
""")
|
| 1239 |
+
|
| 1240 |
+
with gr.Tabs():
|
| 1241 |
+
|
| 1242 |
+
# ========== RECONSTRUCTION TAB ==========
|
| 1243 |
+
with gr.Tab("π§ Reconstruction"):
|
| 1244 |
+
with gr.Row():
|
| 1245 |
+
with gr.Column(scale=1):
|
| 1246 |
+
gr.Markdown("### πΈ Input Images")
|
| 1247 |
+
input_image = gr.File(
|
| 1248 |
+
file_count="multiple",
|
| 1249 |
+
file_types=["image"],
|
| 1250 |
+
label="Upload 1-8 Images (Single image for quick test, multiple for complete coverage)"
|
| 1251 |
+
)
|
| 1252 |
+
|
| 1253 |
+
gr.Markdown("""
|
| 1254 |
+
**Tips for multiple images:**
|
| 1255 |
+
- Capture object from different angles (360Β° coverage)
|
| 1256 |
+
- Ensure 30-50% overlap between views
|
| 1257 |
+
- Use consistent lighting across all shots
|
| 1258 |
+
- Keep camera distance similar
|
| 1259 |
+
- Automatic alignment will merge them into one model!
|
| 1260 |
+
""")
|
| 1261 |
+
|
| 1262 |
+
gr.Markdown("### βοΈ Model Settings")
|
| 1263 |
+
model_choice = gr.Radio(
|
| 1264 |
+
choices=["GLPN (Recommended)", "DPT (High Quality)"],
|
| 1265 |
+
value="GLPN (Recommended)",
|
| 1266 |
+
label="Depth Estimation Model",
|
| 1267 |
+
info="GLPN: Faster, good for indoor. DPT: Slower, better quality"
|
| 1268 |
+
)
|
| 1269 |
+
|
| 1270 |
+
visualization_type = gr.Radio(
|
| 1271 |
+
choices=["mesh", "point_cloud", "both"],
|
| 1272 |
+
value="mesh",
|
| 1273 |
+
label="3D Visualization Type",
|
| 1274 |
+
info="Mesh recommended for most users"
|
| 1275 |
+
)
|
| 1276 |
+
|
| 1277 |
+
enable_alignment = gr.Checkbox(
|
| 1278 |
+
value=True,
|
| 1279 |
+
label="Enable Automatic Alignment (for multiple images)",
|
| 1280 |
+
info="Uses ICP to automatically align and merge point clouds"
|
| 1281 |
+
)
|
| 1282 |
+
|
| 1283 |
+
reconstruct_btn = gr.Button("π Start Reconstruction", variant="primary", size="lg")
|
| 1284 |
+
|
| 1285 |
+
with gr.Column(scale=2):
|
| 1286 |
+
depth_output = gr.Image(label="Depth Maps & Uncertainty Analysis")
|
| 1287 |
+
viewer_3d = gr.Plot(label="Interactive 3D Viewer (Rotate, Zoom, Pan)")
|
| 1288 |
+
|
| 1289 |
+
with gr.Row():
|
| 1290 |
+
with gr.Column():
|
| 1291 |
+
metrics_output = gr.Markdown(label="Reconstruction Report")
|
| 1292 |
+
with gr.Column():
|
| 1293 |
+
json_output = gr.Textbox(label="Raw Metrics (JSON)", lines=10)
|
| 1294 |
+
|
| 1295 |
+
with gr.Row():
|
| 1296 |
+
download_output = gr.File(label="π¦ Download Complete Package (ZIP)")
|
| 1297 |
+
|
| 1298 |
+
# Process function needs to handle file objects from gr.File
|
| 1299 |
+
def process_uploaded_files(files, model, viz_type, align):
|
| 1300 |
+
if files is None:
|
| 1301 |
+
return None, None, None, "Please upload at least one image.", None
|
| 1302 |
+
|
| 1303 |
+
# Convert file objects to PIL Images
|
| 1304 |
+
images = []
|
| 1305 |
+
for file in files:
|
| 1306 |
+
img = Image.open(file.name)
|
| 1307 |
+
images.append(img)
|
| 1308 |
+
|
| 1309 |
+
return process_image(images, model, viz_type, align)
|
| 1310 |
+
|
| 1311 |
+
reconstruct_btn.click(
|
| 1312 |
+
fn=process_uploaded_files,
|
| 1313 |
+
inputs=[input_image, model_choice, visualization_type, enable_alignment],
|
| 1314 |
+
outputs=[depth_output, viewer_3d, download_output, metrics_output, json_output]
|
| 1315 |
+
)
|
| 1316 |
+
|
| 1317 |
+
# ========== RESPONSIBLE AI TAB ==========
|
| 1318 |
+
with gr.Tab("π‘οΈ Responsible AI & Ethics"):
|
| 1319 |
+
gr.Markdown(RESPONSIBLE_AI_TEXT)
|
| 1320 |
+
|
| 1321 |
+
gr.Markdown("""
|
| 1322 |
+
## Report Issues
|
| 1323 |
+
|
| 1324 |
+
If you observe:
|
| 1325 |
+
- Misuse of this technology
|
| 1326 |
+
- Significant bias in results
|
| 1327 |
+
- Privacy violations
|
| 1328 |
+
- Ethical concerns
|
| 1329 |
+
|
| 1330 |
+
Please contact: [Your institution's ethics board/contact]
|
| 1331 |
+
|
| 1332 |
+
## Acknowledgment of Limitations
|
| 1333 |
+
|
| 1334 |
+
This tool is provided for educational and research purposes. Users must:
|
| 1335 |
+
- Understand model limitations and biases
|
| 1336 |
+
- Use responsibly and ethically
|
| 1337 |
+
- Verify results with ground truth when critical
|
| 1338 |
+
- Not rely solely on AI for important decisions
|
| 1339 |
+
""")
|
| 1340 |
+
|
| 1341 |
+
# ========== THEORY TAB ==========
|
| 1342 |
+
with gr.Tab("π Theory & Background"):
|
| 1343 |
+
gr.Markdown(THEORY_TEXT)
|
| 1344 |
+
|
| 1345 |
+
gr.Markdown("""
|
| 1346 |
+
## Reconstruction Pipeline Details
|
| 1347 |
+
|
| 1348 |
+
This application uses an **enhanced 13-step automated pipeline** (with alignment):
|
| 1349 |
+
|
| 1350 |
+
**For Each Image:**
|
| 1351 |
+
1. **Image Preprocessing**: Resize to model requirements (divisible by 32)
|
| 1352 |
+
2. **Depth Estimation**: Neural network inference (GLPN or DPT)
|
| 1353 |
+
3. **Uncertainty Estimation**: Compute local depth variance as confidence measure
|
| 1354 |
+
4. **Failure Detection**: Identify challenging conditions (reflections, low contrast, etc.)
|
| 1355 |
+
5. **Point Cloud Generation**: Back-project using pinhole camera model
|
| 1356 |
+
6. **Outlier Removal**: Statistical filtering (20 neighbors, 2.0 std ratio)
|
| 1357 |
+
7. **Normal Estimation**: Local plane fitting for surface orientation
|
| 1358 |
+
8. **Mesh Reconstruction**: Poisson surface reconstruction (depth=10)
|
| 1359 |
+
9. **Quality Metrics**: Compute manifold properties and geometric measures
|
| 1360 |
+
|
| 1361 |
+
**For Multiple Images (Automatic Alignment):**
|
| 1362 |
+
10. **Feature Computation**: Extract FPFH descriptors from each point cloud
|
| 1363 |
+
11. **Global Registration**: RANSAC-based correspondence matching
|
| 1364 |
+
12. **ICP Refinement**: Iterative Closest Point for precise alignment
|
| 1365 |
+
13. **Merging & Export**: Combine aligned clouds, create unified mesh, export all formats
|
| 1366 |
+
|
| 1367 |
+
### Automatic Alignment Algorithm
|
| 1368 |
+
|
| 1369 |
+
**ICP (Iterative Closest Point):**
|
| 1370 |
+
- Industry-standard algorithm for point cloud registration
|
| 1371 |
+
- Iteratively minimizes distance between corresponding points
|
| 1372 |
+
- Achieves sub-millimeter accuracy in ideal conditions
|
| 1373 |
+
|
| 1374 |
+
**Process:**
|
| 1375 |
+
1. Downsample point clouds for speed (voxel size = 0.05)
|
| 1376 |
+
2. Compute FPFH features (Fast Point Feature Histograms)
|
| 1377 |
+
3. Find initial transformation with RANSAC (100,000 iterations)
|
| 1378 |
+
4. Refine with point-to-plane ICP (threshold = 0.02)
|
| 1379 |
+
5. Apply transformation and merge
|
| 1380 |
+
|
| 1381 |
+
**Quality Metrics:**
|
| 1382 |
+
- **Fitness**: Ratio of inlier correspondences (higher = better alignment)
|
| 1383 |
+
- **RMSE**: Root mean squared error of aligned points (lower = better)
|
| 1384 |
+
- Typical good values: Fitness > 0.7, RMSE < 0.05
|
| 1385 |
+
|
| 1386 |
+
## Model Comparison
|
| 1387 |
+
|
| 1388 |
+
| Feature | GLPN (Recommended) | DPT (High Quality) |
|
| 1389 |
+
|---------|-------------------|-------------------|
|
| 1390 |
+
| **Speed** | Fast (~0.3-2.5s) | Slower (~0.8-6.5s) |
|
| 1391 |
+
| **Quality** | Good | Excellent |
|
| 1392 |
+
| **Memory** | Low (~2GB) | High (~5GB) |
|
| 1393 |
+
| **Best For** | Indoor scenes, Real-time | Complex scenes, Highest quality |
|
| 1394 |
+
| **Training** | NYU Depth V2 (NYC indoors) | Multiple datasets |
|
| 1395 |
+
| **Geographic Bias** | High (Western indoor) | Moderate (more diverse) |
|
| 1396 |
+
|
| 1397 |
+
## Key References
|
| 1398 |
+
|
| 1399 |
+
1. **Kim, D., et al. (2022)**. "Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth." *CVPR 2022*
|
| 1400 |
+
2. **Ranftl, R., et al. (2021)**. "Vision Transformers for Dense Prediction." *ICCV 2021*
|
| 1401 |
+
3. **Kazhdan, M., et al. (2006)**. "Poisson Surface Reconstruction." *Eurographics Symposium on Geometry Processing*
|
| 1402 |
+
""")
|
| 1403 |
+
|
| 1404 |
+
# ========== USAGE GUIDE TAB ==========
|
| 1405 |
+
with gr.Tab("π Usage Guide"):
|
| 1406 |
+
gr.Markdown("""
|
| 1407 |
+
## How to Use This Application
|
| 1408 |
+
|
| 1409 |
+
### Step 1: Upload Image(s)
|
| 1410 |
+
|
| 1411 |
+
**Single Image Mode:**
|
| 1412 |
+
- Upload one JPG, PNG, or BMP file
|
| 1413 |
+
- Best for: Quick tests, simple objects, proof of concept
|
| 1414 |
+
- Limitation: Cannot see hidden surfaces
|
| 1415 |
+
|
| 1416 |
+
**Multiple Image Mode (NEW!):**
|
| 1417 |
+
- Upload 2-8 images of the same object/scene
|
| 1418 |
+
- Take photos from different angles (30-50% overlap recommended)
|
| 1419 |
+
- Best for: Complete 360Β° coverage, professional projects
|
| 1420 |
+
- Limitation: Requires manual alignment in external software
|
| 1421 |
+
|
| 1422 |
+
**Recommended Image Settings:**
|
| 1423 |
+
- Resolution: 512-1024px (optimal balance)
|
| 1424 |
+
- Lighting: Even, diffused (avoid harsh shadows)
|
| 1425 |
+
- Focus: Sharp, no motion blur
|
| 1426 |
+
- Scene: Textured objects with clear depth cues
|
| 1427 |
+
|
| 1428 |
+
### Step 2: Choose Model
|
| 1429 |
+
|
| 1430 |
+
**GLPN (Recommended):**
|
| 1431 |
+
- β
Fast processing (~0.3-2.5s)
|
| 1432 |
+
- β
Low memory requirements
|
| 1433 |
+
- β
Great for indoor scenes
|
| 1434 |
+
- β οΈ Trained on NYC apartments (geographic bias)
|
| 1435 |
+
- Best for: Quick iterations, indoor furniture, rooms
|
| 1436 |
+
|
| 1437 |
+
**DPT (High Quality):**
|
| 1438 |
+
- β
Superior quality
|
| 1439 |
+
- β
Better generalization
|
| 1440 |
+
- β
Handles complex scenes
|
| 1441 |
+
- β οΈ Slower processing (~0.8-6.5s)
|
| 1442 |
+
- β οΈ Higher memory usage (~5GB)
|
| 1443 |
+
- Best for: Final outputs, outdoor scenes, detailed work
|
| 1444 |
+
|
| 1445 |
+
### Step 3: Select Visualization
|
| 1446 |
+
- **Mesh**: Solid 3D surface (most intuitive)
|
| 1447 |
+
- **Point Cloud**: Individual colored 3D points (shows raw data)
|
| 1448 |
+
- **Both**: Side-by-side comparison
|
| 1449 |
+
|
| 1450 |
+
### Step 4: Review Results
|
| 1451 |
+
|
| 1452 |
+
**NEW: Uncertainty Maps**
|
| 1453 |
+
- Red areas = Model is less confident
|
| 1454 |
+
- Blue areas = Model is more confident
|
| 1455 |
+
- Use to identify problematic regions
|
| 1456 |
+
|
| 1457 |
+
**NEW: Automatic Warnings**
|
| 1458 |
+
The system now detects:
|
| 1459 |
+
- Very dark images
|
| 1460 |
+
- Low contrast/uniform textures
|
| 1461 |
+
- Potential reflective surfaces
|
| 1462 |
+
- Sharp discontinuities (transparent objects)
|
| 1463 |
+
- Low resolution inputs
|
| 1464 |
+
|
| 1465 |
+
### Step 5: Download & Use Files
|
| 1466 |
+
|
| 1467 |
+
**For Single Image:**
|
| 1468 |
+
- Download ZIP file with point cloud, mesh (PLY/OBJ/STL), and metrics
|
| 1469 |
+
|
| 1470 |
+
**For Multiple Images with Alignment:**
|
| 1471 |
+
- Download ZIP file containing:
|
| 1472 |
+
- Individual reconstructions (image_1_*, image_2_*, etc.)
|
| 1473 |
+
- **MERGED files** (automatically aligned and combined!) β
|
| 1474 |
+
- All formats: PLY, OBJ, STL
|
| 1475 |
+
- Metrics JSON with alignment quality
|
| 1476 |
+
|
| 1477 |
+
**The MERGED files are ready to use immediately - no manual alignment needed!**
|
| 1478 |
+
|
| 1479 |
+
### Understanding Alignment Results
|
| 1480 |
+
|
| 1481 |
+
**In the Report:**
|
| 1482 |
+
- **Translation Distance**: How far each image was moved to align (in arbitrary units)
|
| 1483 |
+
- **Merged Statistics**: Total points/vertices in unified model
|
| 1484 |
+
- **Watertight Status**: Whether merged mesh is 3D-printable
|
| 1485 |
+
|
| 1486 |
+
**If Alignment Fails:**
|
| 1487 |
+
- Not enough overlap between images
|
| 1488 |
+
- Very different viewpoints
|
| 1489 |
+
- Lack of distinctive features
|
| 1490 |
+
- Reflective/transparent surfaces
|
| 1491 |
+
- **Solution**: Retake photos with more overlap, or use manual alignment in CloudCompare
|
| 1492 |
+
|
| 1493 |
+
## Understanding Explainability Features
|
| 1494 |
+
|
| 1495 |
+
### Uncertainty Visualization
|
| 1496 |
+
- **What it shows**: Where the model is guessing vs confident
|
| 1497 |
+
- **How to use**: Avoid relying on high-uncertainty regions for measurements
|
| 1498 |
+
- **Threshold**: >0.7 uncertainty = very uncertain, <0.3 = confident
|
| 1499 |
+
|
| 1500 |
+
### Automatic Warning System
|
| 1501 |
+
The app now detects and warns about:
|
| 1502 |
+
|
| 1503 |
+
1. **Dark Images**: May reduce depth accuracy
|
| 1504 |
+
- Solution: Brighten image or use flash
|
| 1505 |
+
|
| 1506 |
+
2. **Low Contrast**: Uniform textures confuse depth estimation
|
| 1507 |
+
- Solution: Add textured reference objects
|
| 1508 |
+
|
| 1509 |
+
3. **Reflective Surfaces**: Mirrors, glass, polished metal
|
| 1510 |
+
- Solution: Use matte spray or avoid these materials
|
| 1511 |
+
|
| 1512 |
+
4. **Transparent Objects**: Glass, water, clear plastic
|
| 1513 |
+
- Solution: These cannot be reconstructed reliably
|
| 1514 |
+
|
| 1515 |
+
5. **Low Resolution**: <320x240 pixels
|
| 1516 |
+
- Solution: Use higher resolution camera
|
| 1517 |
+
|
| 1518 |
+
## Tips for Best Results
|
| 1519 |
+
|
| 1520 |
+
### DO:
|
| 1521 |
+
- β
Use well-lit images (natural diffused light best)
|
| 1522 |
+
- β
Include visible depth cues (corners, edges)
|
| 1523 |
+
- β
Use textured surfaces
|
| 1524 |
+
- β
Take multiple angles for complete coverage
|
| 1525 |
+
- β
Check uncertainty maps for problem areas
|
| 1526 |
+
- β
Read warnings and adjust accordingly
|
| 1527 |
+
|
| 1528 |
+
### AVOID:
|
| 1529 |
+
- β Motion blur or defocused images
|
| 1530 |
+
- β Reflective surfaces (mirrors, polished metal)
|
| 1531 |
+
- β Transparent objects (glass, clear plastic)
|
| 1532 |
+
- β Completely uniform textures (blank walls)
|
| 1533 |
+
- β Harsh shadows or backlighting
|
| 1534 |
+
- β Extreme close-ups or distant scenes
|
| 1535 |
+
|
| 1536 |
+
## Troubleshooting
|
| 1537 |
+
|
| 1538 |
+
**High uncertainty in depth map:**
|
| 1539 |
+
- Check warnings for specific issues
|
| 1540 |
+
- Try different lighting
|
| 1541 |
+
- Add textured objects for reference
|
| 1542 |
+
- Use DPT model instead of GLPN
|
| 1543 |
+
|
| 1544 |
+
**Poor alignment with multiple images:**
|
| 1545 |
+
- Ensure sufficient overlap (30-50%)
|
| 1546 |
+
- Use consistent lighting across all images
|
| 1547 |
+
- Maintain similar camera distance
|
| 1548 |
+
- Include distinctive features for matching
|
| 1549 |
+
- Avoid moving objects in scene
|
| 1550 |
+
- Try disabling alignment checkbox and use manual methods if needed
|
| 1551 |
+
|
| 1552 |
+
**Alignment takes too long:**
|
| 1553 |
+
- Normal for 4+ images (can take 2-5 minutes)
|
| 1554 |
+
- FPFH feature computation is intensive
|
| 1555 |
+
- Disable alignment if you prefer manual methods
|
| 1556 |
+
- Use fewer images for faster processing
|
| 1557 |
+
|
| 1558 |
+
**Model seems biased:**
|
| 1559 |
+
- Check "Responsible AI" tab for known limitations
|
| 1560 |
+
- GLPN works best on Western indoor scenes
|
| 1561 |
+
- Try DPT for non-Western or outdoor scenes
|
| 1562 |
+
- Document and report significant bias
|
| 1563 |
+
""")
|
| 1564 |
+
|
| 1565 |
+
# ========== CITATION TAB ==========
|
| 1566 |
+
with gr.Tab("π Citation & Credits"):
|
| 1567 |
+
gr.Markdown("""
|
| 1568 |
+
## Citation
|
| 1569 |
+
|
| 1570 |
+
If you use this tool in research, please cite:
|
| 1571 |
+
|
| 1572 |
+
### For GLPN Model:
|
| 1573 |
+
```bibtex
|
| 1574 |
+
@inproceedings{kim2022global,
|
| 1575 |
+
title={Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth},
|
| 1576 |
+
author={Kim, Doyeon and Ga, Woonghyun and Ahn, Pyungwhan and Joo, Donggyu and Chun, Sehwan and Kim, Junmo},
|
| 1577 |
+
booktitle={CVPR},
|
| 1578 |
+
year={2022}
|
| 1579 |
+
}
|
| 1580 |
+
```
|
| 1581 |
+
|
| 1582 |
+
### For DPT Model:
|
| 1583 |
+
```bibtex
|
| 1584 |
+
@inproceedings{ranftl2021vision,
|
| 1585 |
+
title={Vision Transformers for Dense Prediction},
|
| 1586 |
+
author={Ranftl, Ren{\'e} and Bochkovskiy, Alexey and Koltun, Vladlen},
|
| 1587 |
+
booktitle={ICCV},
|
| 1588 |
+
year={2021}
|
| 1589 |
+
}
|
| 1590 |
+
```
|
| 1591 |
+
|
| 1592 |
+
## Open Source Components
|
| 1593 |
+
|
| 1594 |
+
This application is built with:
|
| 1595 |
+
- **Transformers** (Hugging Face): Model inference
|
| 1596 |
+
- **Open3D**: Point cloud and mesh processing
|
| 1597 |
+
- **PyTorch**: Deep learning framework
|
| 1598 |
+
- **Plotly**: Interactive 3D visualization
|
| 1599 |
+
- **Gradio**: Web interface
|
| 1600 |
+
- **SciPy**: Uncertainty estimation
|
| 1601 |
+
- **Matplotlib**: Visualization
|
| 1602 |
+
|
| 1603 |
+
## Acknowledgments
|
| 1604 |
+
|
| 1605 |
+
- **NYU Depth V2 Dataset**: Training data for GLPN
|
| 1606 |
+
- **MIX 6 Dataset**: Training data for DPT
|
| 1607 |
+
- **Anthropic**: Responsible AI framework inspiration
|
| 1608 |
+
- **Open source community**: Essential tools and libraries
|
| 1609 |
+
|
| 1610 |
+
## Version History
|
| 1611 |
+
|
| 1612 |
+
**v2.0 (Current)** - Enhanced Responsible AI Version with Automatic Alignment
|
| 1613 |
+
- β¨ Multi-image support (1-8 images)
|
| 1614 |
+
- β¨ **Automatic alignment using ICP** (no manual work needed!)
|
| 1615 |
+
- β¨ **Automatic merging** into unified 3D model
|
| 1616 |
+
- β¨ Uncertainty estimation and visualization
|
| 1617 |
+
- β¨ Automatic failure case detection
|
| 1618 |
+
- β¨ Comprehensive warning system
|
| 1619 |
+
- β¨ Responsible AI documentation
|
| 1620 |
+
- β¨ Geographic bias disclosure
|
| 1621 |
+
- β¨ Privacy guidelines
|
| 1622 |
+
- β¨ Enhanced explainability
|
| 1623 |
+
|
| 1624 |
+
**v1.0** - Initial Release
|
| 1625 |
+
- Single image processing
|
| 1626 |
+
- GLPN and DPT models
|
| 1627 |
+
- Basic quality metrics
|
| 1628 |
+
- Multiple export formats
|
| 1629 |
+
""")
|
| 1630 |
+
|
| 1631 |
+
# ========== FOOTER ==========
|
| 1632 |
+
gr.Markdown("""
|
| 1633 |
+
---
|
| 1634 |
+
|
| 1635 |
+
## π Enhanced Features in This Version
|
| 1636 |
+
|
| 1637 |
+
**Multi-Image Support**: Process 1-8 images for comprehensive coverage
|
| 1638 |
+
|
| 1639 |
+
**Automatic Alignment**: ICP-based alignment automatically merges point clouds (no manual work!)
|
| 1640 |
+
|
| 1641 |
+
**Explainability**: Uncertainty maps show model confidence spatially
|
| 1642 |
+
|
| 1643 |
+
**Fairness**: Geographic bias documented, model limitations disclosed
|
| 1644 |
+
|
| 1645 |
+
**Privacy**: Clear guidelines, local processing, no data retention
|
| 1646 |
+
|
| 1647 |
+
**Safety**: Automatic detection of challenging conditions with warnings
|
| 1648 |
+
|
| 1649 |
+
---
|
| 1650 |
+
|
| 1651 |
+
**βοΈ Ethical Use Policy**: This tool is provided for educational and research purposes.
|
| 1652 |
+
Users are responsible for ensuring ethical and legal use of this technology.
|
| 1653 |
+
|
| 1654 |
+
**π§ Feedback**: Report issues, bias, or ethical concerns to your institution's ethics board.
|
| 1655 |
+
""")
|
| 1656 |
|
| 1657 |
+
# ============================================================================
|
| 1658 |
+
# LAUNCH
|
| 1659 |
+
# ============================================================================
|
| 1660 |
|
| 1661 |
+
if __name__ == "__main__":
|
| 1662 |
+
demo.launch(share=True)
|