Spaces:

Tohru127
/

codex-model

Running

App Files Files Community

codex-model / app.py

Tohru127

Update app.py

dc673c1 verified about 2 months ago

raw

history blame contribute delete

8.51 kB

	import os
	import io
	import tempfile
	import numpy as np
	from PIL import Image

	import gradio as gr
	import torch
	from transformers import GLPNForDepthEstimation, GLPNImageProcessor

	import open3d as o3d


	# ------------------------------
	# Model setup (loaded once)
	# ------------------------------
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	FE = GLPNImageProcessor.from_pretrained("vinvino02/glpn-nyu")
	MODEL = GLPNForDepthEstimation.from_pretrained("vinvino02/glpn-nyu").to(DEVICE)
	MODEL.eval()


	# ------------------------------
	# Utilities
	# ------------------------------
	def _resize_to_mult32(img: Image.Image, max_h=480):
	"""Resize keeping aspect, cap height to max_h, and make both dims multiple of 32."""
	new_h = min(max_h, img.height)
	new_h -= new_h % 32
	new_w = int(new_h * img.width / img.height)
	diff = new_w % 32
	new_w = new_w - diff if diff < 16 else new_w + (32 - diff)
	return img.resize((new_w, new_h), Image.BICUBIC)


	def predict_depth(image_pil: Image.Image):
	"""Run GLPN and return cropped RGB (as PIL) + raw depth (float32 numpy)."""
	img = _resize_to_mult32(image_pil.convert("RGB"))
	inputs = FE(images=img, return_tensors="pt").to(DEVICE)

	with torch.no_grad():
	outputs = MODEL(**inputs)
	pred = outputs.predicted_depth # (1, 1, H, W)

	# remove padding GLPN expects around borders (pad=16)
	pad = 16
	depth = pred.squeeze().float().cpu().numpy() * 1000.0 # scale for nicer contrast
	depth = depth[pad:-pad, pad:-pad]

	rgb = img.crop((pad, pad, img.width - pad, img.height - pad))
	return rgb, depth


	def depth_to_colormap(depth: np.ndarray):
	"""Return a PIL image (plasma colormap) from depth for preview."""
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import matplotlib.cm as cm

	d = depth.copy()
	d -= d.min()
	if d.max() > 0:
	d /= d.max()
	d8 = (d * 255).astype(np.uint8)

	colored = (cm.get_cmap("plasma")(d8)[:, :, :3] * 255).astype(np.uint8)
	return Image.fromarray(colored)


	def rgbd_to_pointcloud(rgb_pil: Image.Image, depth: np.ndarray):
	"""Create an Open3D point cloud from RGB + relative depth."""
	# Normalize depth to 0..1 then to 0..255 uint8 for Open3D RGBD convenience
	d = depth.copy()
	d -= d.min()
	if d.max() > 0:
	d /= d.max()
	depth_u8 = (d * 255).astype(np.uint8)

	rgb_np = np.array(rgb_pil) # H, W, 3 (uint8)

	depth_o3d = o3d.geometry.Image(depth_u8)
	color_o3d = o3d.geometry.Image(rgb_np)

	rgbd = o3d.geometry.RGBDImage.create_from_color_and_depth(
	color_o3d, depth_o3d, convert_rgb_to_intensity=False
	)

	h, w = rgb_np.shape[:2]
	intr = o3d.camera.PinholeCameraIntrinsic()
	intr.set_intrinsics(w, h, 500.0, 500.0, w / 2.0, h / 2.0)

	pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd, intr)

	# Clean & orient normals
	if len(pcd.points) > 0:
	_, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=2.0)
	pcd = pcd.select_by_index(ind)
	if len(pcd.points) > 0:
	pcd.estimate_normals()
	pcd.orient_normals_to_align_with_direction()
	return pcd


	def pointcloud_to_mesh(pcd: o3d.geometry.PointCloud, depth=10):
	if len(pcd.points) == 0:
	return None
	mesh, _ = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
	pcd, depth=depth, n_threads=1
	)
	# Rotate 180° around x for typical camera convention
	R = mesh.get_rotation_matrix_from_xyz((np.pi, 0, 0))
	mesh.rotate(R, center=(0, 0, 0))
	mesh.compute_vertex_normals()
	return mesh


	def save_o3d(obj, path):
	ext = os.path.splitext(path)[1].lower()
	if isinstance(obj, o3d.geometry.PointCloud):
	if ext == ".ply":
	o3d.io.write_point_cloud(path, obj)
	else:
	raise ValueError("Point cloud: please save as .ply")
	elif isinstance(obj, o3d.geometry.TriangleMesh):
	if ext in {".obj", ".ply"}:
	o3d.io.write_triangle_mesh(path, obj)
	else:
	raise ValueError("Mesh: use .obj or .ply")
	else:
	raise ValueError("Unsupported type for saving")


	def render_mesh_image(mesh: o3d.geometry.TriangleMesh, width=640, height=480):
	"""
	Try offscreen render for a preview PNG. If it fails (e.g., no EGL/OSMesa),
	we return None and rely on the Model3D viewer + downloads.
	"""
	try:
	from open3d.visualization import rendering

	# Ensure it has some color
	if not mesh.has_vertex_colors():
	mesh.paint_uniform_color([0.8, 0.8, 0.85])

	renderer = rendering.OffscreenRenderer(width, height)
	mat = rendering.MaterialRecord()
	mat.shader = "defaultLit"

	scene = renderer.scene
	scene.set_background([1, 1, 1, 1])
	scene.add_geometry("mesh", mesh, mat)

	bbox = mesh.get_axis_aligned_bounding_box()
	center = bbox.get_center()
	extent = bbox.get_extent()
	radius = np.linalg.norm(extent) * 0.8 + 1e-6

	cam = scene.camera
	cam.look_at(center, center + [0, 0, radius], [0, 1, 0])

	img_o3d = renderer.render_to_image()
	img = np.asarray(img_o3d)
	return Image.fromarray(img)
	except Exception:
	return None


	# ------------------------------
	# Gradio pipeline
	# ------------------------------
	def run_pipeline(image: Image.Image, poisson_depth: int = 10):
	"""
	Main function wired to Gradio:
	returns (depth_preview_image, mesh_preview_png, pcd_ply_path, mesh_obj_path)
	"""
	if image is None:
	return None, None, None, None

	# 1) depth
	rgb, depth = predict_depth(image)
	depth_vis = depth_to_colormap(depth)

	# 2) point cloud
	pcd = rgbd_to_pointcloud(rgb, depth)
	if len(pcd.points) == 0:
	return depth_vis, None, None, None

	# 3) mesh
	mesh = pointcloud_to_mesh(pcd, depth=poisson_depth)
	if mesh is None:
	# At least return PCD
	with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
	save_o3d(pcd, fpcd.name)
	pcd_path = fpcd.name
	return depth_vis, None, pcd_path, None

	# 4) save artifacts
	with tempfile.NamedTemporaryFile(suffix=".ply", delete=False) as fpcd:
	save_o3d(pcd, fpcd.name)
	pcd_path = fpcd.name

	# Save mesh in OBJ (works with Gradio Model3D)
	with tempfile.NamedTemporaryFile(suffix=".obj", delete=False) as fmesh:
	save_o3d(mesh, fmesh.name)
	mesh_obj_path = fmesh.name

	# 5) mesh preview (best effort)
	preview = render_mesh_image(mesh, 768, 512)

	return depth_vis, preview, pcd_path, mesh_obj_path


	# ------------------------------
	# Interface
	# ------------------------------
	TITLE = "Monocular Depth → Point Cloud → Poisson Mesh (GLPN + Open3D)"
	DESC = """
	Upload an image. We estimate relative depth (GLPN), build a point cloud, and reconstruct
	a mesh (Poisson). Outputs: depth preview, mesh preview (if renderer available),
	and downloads for .ply (point cloud) and .obj (mesh).
	Note: monocular depth lacks absolute scale; this is for visualization/demo purposes.
	"""

	with gr.Blocks(title="2D → 3D Reconstruction") as demo:
	gr.Markdown(f"# {TITLE}")
	gr.Markdown(DESC)

	with gr.Row():
	with gr.Column():
	in_img = gr.Image(
	type="pil",
	sources=["upload", "clipboard"],
	label="Input Image",
	image_mode="RGB"
	)
	poisson_depth = gr.Slider(5, 12, value=10, step=1, label="Poisson depth (mesh detail)")
	run_btn = gr.Button("Reconstruct 3D", variant="primary")

	with gr.Column():
	depth_out = gr.Image(label="Depth Map (colormap)")
	mesh_preview = gr.Image(label="Mesh Preview (offscreen render)", visible=True)

	with gr.Row():
	pcd_file = gr.File(label="Download Point Cloud (.ply)")
	mesh_obj_view = gr.Model3D(label="Mesh Viewer (.obj)")
	mesh_obj_file = gr.File(label="Download Mesh (.obj)")

	run_btn.click(
	fn=run_pipeline,
	inputs=[in_img, poisson_depth],
	outputs=[depth_out, mesh_preview, pcd_file, mesh_obj_view]
	)
	# Also expose mesh file separately (same path as viewer output)
	mesh_obj_view.change(lambda p: p, inputs=mesh_obj_view, outputs=mesh_obj_file)

	if __name__ == "__main__":
	# share=True creates a public link (useful on Spaces/Colab/local)
	demo.launch(share=True)