Scanspectrum1

Running

Scanspectrum1 / mapanything /utils /wai /camera.py

aknapitsch user

simpler inference and refactoring

37de32d 2 months ago

8.57 kB

	"""
	This utils script contains PORTAGE of wai-core camera methods for MapAnything.
	"""

	from typing import Any

	import numpy as np
	import torch
	from scipy.spatial.transform import Rotation, Slerp

	from mapanything.utils.wai.ops import get_dtype_device

	# constants regarding camera models
	PINHOLE_CAM_KEYS = ["fl_x", "fl_y", "cx", "cy", "h", "w"]
	DISTORTION_PARAM_KEYS = [
	"k1",
	"k2",
	"k3",
	"k4",
	"p1",
	"p2",
	] # order corresponds to the OpenCV convention
	CAMERA_KEYS = PINHOLE_CAM_KEYS + DISTORTION_PARAM_KEYS


	def interpolate_intrinsics(
	frame1: dict[str, Any],
	frame2: dict[str, Any],
	alpha: float,
	) -> dict[str, Any]:
	"""
	Interpolate camera intrinsics linearly.
	Args:
	frame1: The first frame dictionary.
	frame2: The second frame dictionary.
	alpha: Interpolation parameter. alpha = 0 for frame1, alpha = 1 for frame2.
	Returns:
	frame_inter: dictionary with new intrinsics.
	"""
	frame_inter = {}
	for key in CAMERA_KEYS:
	if key in frame1 and key in frame2:
	p1 = frame1[key]
	p2 = frame2[key]
	frame_inter[key] = (1 - alpha) * p1 + alpha * p2
	return frame_inter


	def interpolate_extrinsics(
	matrix1: list \| np.ndarray \| torch.Tensor,
	matrix2: list \| np.ndarray \| torch.Tensor,
	alpha: float,
	) -> list \| np.ndarray \| torch.Tensor:
	"""
	Interpolate camera extrinsics 4x4 matrices using SLERP.
	Args:
	matrix1: The first matrix.
	matrix2: The second matrix.
	alpha: Interpolation parameter. alpha = 0 for matrix1, alpha = 1 for matrix2.
	Returns:
	matrix: 4x4 interpolated matrix, same type.
	Raises:
	ValueError: If different type.
	"""
	if not isinstance(matrix1, type(matrix2)):
	raise ValueError("Both matrices should have the same type.")

	dtype, device = get_dtype_device(matrix1)
	if isinstance(matrix1, list):
	mtype = "list"
	matrix1 = np.array(matrix1)
	matrix2 = np.array(matrix2)
	elif isinstance(matrix1, np.ndarray):
	mtype = "numpy"
	elif isinstance(matrix1, torch.Tensor):
	mtype = "torch"
	matrix1 = matrix1.numpy()
	matrix2 = matrix2.numpy()
	else:
	raise ValueError(
	"Only list, numpy array and torch tensors are supported as inputs."
	)

	R1 = matrix1[:3, :3]
	t1 = matrix1[:3, 3]
	R2 = matrix2[:3, :3]
	t2 = matrix2[:3, 3]

	# interpolate translation
	t = (1 - alpha) * t1 + alpha * t2

	# interpolate rotations with SLERP
	R1_quat = Rotation.from_matrix(R1).as_quat()
	R2_quat = Rotation.from_matrix(R2).as_quat()
	rotation_slerp = Slerp([0, 1], Rotation(np.stack([R1_quat, R2_quat])))
	R = rotation_slerp(alpha).as_matrix()
	matrix_inter = np.eye(4)

	# combine together
	matrix_inter[:3, :3] = R
	matrix_inter[:3, 3] = t

	if mtype == "list":
	matrix_inter = matrix_inter.tolist()
	elif mtype == "torch":
	matrix_inter = torch.from_numpy(matrix_inter).to(dtype).to(device)
	elif mtype == "numpy":
	matrix_inter = matrix_inter.astype(dtype)

	return matrix_inter


	def convert_camera_coeffs_to_pinhole_matrix(
	scene_meta, frame, fmt="torch"
	) -> torch.Tensor \| np.ndarray \| list:
	"""
	Convert camera intrinsics from NeRFStudio format to a 3x3 intrinsics matrix.

	Args:
	scene_meta: Scene metadata containing camera parameters
	frame: Frame-specific camera parameters that override scene_meta

	Returns:
	torch.Tensor: 3x3 camera intrinsics matrix

	Raises:
	ValueError: If camera model is not PINHOLE or if distortion coefficients are present
	"""
	# Check if camera model is supported
	camera_model = frame.get("camera_model", scene_meta.get("camera_model"))
	if camera_model != "PINHOLE":
	raise ValueError("Only PINHOLE camera model supported")

	# Check for unsupported distortion coefficients
	if any(
	(frame.get(coeff, 0) != 0) or (scene_meta.get(coeff, 0) != 0)
	for coeff in DISTORTION_PARAM_KEYS
	):
	raise ValueError(
	"Pinhole camera does not support radial/tangential distortion -> Undistort first"
	)

	# Extract camera intrinsic parameters
	camera_coeffs = {}
	for coeff in ["fl_x", "fl_y", "cx", "cy"]:
	camera_coeffs[coeff] = frame.get(coeff, scene_meta.get(coeff))
	if camera_coeffs[coeff] is None:
	raise ValueError(f"Missing required camera parameter: {coeff}")

	# Create intrinsics matrix
	intrinsics = [
	[camera_coeffs["fl_x"], 0.0, camera_coeffs["cx"]],
	[0.0, camera_coeffs["fl_y"], camera_coeffs["cy"]],
	[0.0, 0.0, 1.0],
	]
	if fmt == "torch":
	intrinsics = torch.tensor(intrinsics)
	elif fmt == "np":
	intrinsics = np.array(intrinsics)

	return intrinsics


	def rotate_pinhole_90degcw(
	W: int, H: int, fx: float, fy: float, cx: float, cy: float
	) -> tuple[int, int, float, float, float, float]:
	"""Rotates the intrinsics of a pinhole camera model by 90 degrees clockwise."""
	W_new = H
	H_new = W
	fx_new = fy
	fy_new = fx
	cy_new = cx
	cx_new = H - 1 - cy
	return W_new, H_new, fx_new, fy_new, cx_new, cy_new


	def _gl_cv_cmat() -> np.ndarray:
	cmat = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
	return cmat


	def _apply_transformation(
	c2ws: torch.Tensor \| np.ndarray, cmat: np.ndarray
	) -> torch.Tensor \| np.ndarray:
	"""
	Convert camera poses using a provided conversion matrix.

	Args:
	c2ws (torch.Tensor or np.ndarray): Camera poses (batch_size, 4, 4) or (4, 4)
	cmat (torch.Tensor or np.ndarray): Conversion matrix (4, 4)

	Returns:
	torch.Tensor or np.ndarray: Transformed camera poses (batch_size, 4, 4) or (4, 4)
	"""
	if isinstance(c2ws, torch.Tensor):
	# Clone the input tensor to avoid modifying it in-place
	c2ws_transformed = c2ws.clone()
	# Apply the conversion matrix to the rotation part of the camera poses
	if len(c2ws.shape) == 3:
	c2ws_transformed[:, :3, :3] = c2ws_transformed[
	:, :3, :3
	] @ torch.from_numpy(cmat[:3, :3]).to(c2ws).unsqueeze(0)
	else:
	c2ws_transformed[:3, :3] = c2ws_transformed[:3, :3] @ torch.from_numpy(
	cmat[:3, :3]
	).to(c2ws)

	elif isinstance(c2ws, np.ndarray):
	# Clone the input array to avoid modifying it in-place
	c2ws_transformed = c2ws.copy()
	if len(c2ws.shape) == 3: # batched
	# Apply the conversion matrix to the rotation part of the camera poses
	c2ws_transformed[:, :3, :3] = np.einsum(
	"ijk,lk->ijl", c2ws_transformed[:, :3, :3], cmat[:3, :3]
	)
	else: # single 4x4 matrix
	# Apply the conversion matrix to the rotation part of the camera pose
	c2ws_transformed[:3, :3] = np.dot(c2ws_transformed[:3, :3], cmat[:3, :3])

	else:
	raise ValueError("Input data type not supported.")

	return c2ws_transformed


	def gl2cv(
	c2ws: torch.Tensor \| np.ndarray,
	return_cmat: bool = False,
	) -> torch.Tensor \| np.ndarray \| tuple[torch.Tensor \| np.ndarray, np.ndarray]:
	"""
	Convert camera poses from OpenGL to OpenCV coordinate system.

	Args:
	c2ws (torch.Tensor or np.ndarray): Camera poses (batch_size, 4, 4) or (4, 4)
	return_cmat (bool): If True, return the conversion matrix along with the transformed poses

	Returns:
	torch.Tensor or np.ndarray: Transformed camera poses (batch_size, 4, 4) or (4, 4)
	np.ndarray (optional): Conversion matrix if return_cmat is True
	"""
	cmat = _gl_cv_cmat()
	if return_cmat:
	return _apply_transformation(c2ws, cmat), cmat
	return _apply_transformation(c2ws, cmat)


	def intrinsics_to_fov(
	fx: torch.Tensor, fy: torch.Tensor, h: torch.Tensor, w: torch.Tensor
	) -> tuple[torch.Tensor, torch.Tensor]:
	"""
	Compute the horizontal and vertical fields of view in radians from camera intrinsics.

	Args:
	fx (torch.Tensor): focal x
	fy (torch.Tensor): focal y
	h (torch.Tensor): Image height(s) with shape (B,).
	w (torch.Tensor): Image width(s) with shape (B,).

	Returns:
	tuple[torch.Tensor, torch.Tensor]: A tuple containing the horizontal and vertical fields
	of view in radians, both with shape (N,).
	"""
	return 2 * torch.atan((w / 2) / fx), 2 * torch.atan((h / 2) / fy)