Spaces:
Running
Running
| """ | |
| This utils script contains PORTAGE of wai-core camera methods for MapAnything. | |
| """ | |
| from typing import Any | |
| import numpy as np | |
| import torch | |
| from scipy.spatial.transform import Rotation, Slerp | |
| from mapanything.utils.wai.ops import get_dtype_device | |
| # constants regarding camera models | |
| PINHOLE_CAM_KEYS = ["fl_x", "fl_y", "cx", "cy", "h", "w"] | |
| DISTORTION_PARAM_KEYS = [ | |
| "k1", | |
| "k2", | |
| "k3", | |
| "k4", | |
| "p1", | |
| "p2", | |
| ] # order corresponds to the OpenCV convention | |
| CAMERA_KEYS = PINHOLE_CAM_KEYS + DISTORTION_PARAM_KEYS | |
| def interpolate_intrinsics( | |
| frame1: dict[str, Any], | |
| frame2: dict[str, Any], | |
| alpha: float, | |
| ) -> dict[str, Any]: | |
| """ | |
| Interpolate camera intrinsics linearly. | |
| Args: | |
| frame1: The first frame dictionary. | |
| frame2: The second frame dictionary. | |
| alpha: Interpolation parameter. alpha = 0 for frame1, alpha = 1 for frame2. | |
| Returns: | |
| frame_inter: dictionary with new intrinsics. | |
| """ | |
| frame_inter = {} | |
| for key in CAMERA_KEYS: | |
| if key in frame1 and key in frame2: | |
| p1 = frame1[key] | |
| p2 = frame2[key] | |
| frame_inter[key] = (1 - alpha) * p1 + alpha * p2 | |
| return frame_inter | |
| def interpolate_extrinsics( | |
| matrix1: list | np.ndarray | torch.Tensor, | |
| matrix2: list | np.ndarray | torch.Tensor, | |
| alpha: float, | |
| ) -> list | np.ndarray | torch.Tensor: | |
| """ | |
| Interpolate camera extrinsics 4x4 matrices using SLERP. | |
| Args: | |
| matrix1: The first matrix. | |
| matrix2: The second matrix. | |
| alpha: Interpolation parameter. alpha = 0 for matrix1, alpha = 1 for matrix2. | |
| Returns: | |
| matrix: 4x4 interpolated matrix, same type. | |
| Raises: | |
| ValueError: If different type. | |
| """ | |
| if not isinstance(matrix1, type(matrix2)): | |
| raise ValueError("Both matrices should have the same type.") | |
| dtype, device = get_dtype_device(matrix1) | |
| if isinstance(matrix1, list): | |
| mtype = "list" | |
| matrix1 = np.array(matrix1) | |
| matrix2 = np.array(matrix2) | |
| elif isinstance(matrix1, np.ndarray): | |
| mtype = "numpy" | |
| elif isinstance(matrix1, torch.Tensor): | |
| mtype = "torch" | |
| matrix1 = matrix1.numpy() | |
| matrix2 = matrix2.numpy() | |
| else: | |
| raise ValueError( | |
| "Only list, numpy array and torch tensors are supported as inputs." | |
| ) | |
| R1 = matrix1[:3, :3] | |
| t1 = matrix1[:3, 3] | |
| R2 = matrix2[:3, :3] | |
| t2 = matrix2[:3, 3] | |
| # interpolate translation | |
| t = (1 - alpha) * t1 + alpha * t2 | |
| # interpolate rotations with SLERP | |
| R1_quat = Rotation.from_matrix(R1).as_quat() | |
| R2_quat = Rotation.from_matrix(R2).as_quat() | |
| rotation_slerp = Slerp([0, 1], Rotation(np.stack([R1_quat, R2_quat]))) | |
| R = rotation_slerp(alpha).as_matrix() | |
| matrix_inter = np.eye(4) | |
| # combine together | |
| matrix_inter[:3, :3] = R | |
| matrix_inter[:3, 3] = t | |
| if mtype == "list": | |
| matrix_inter = matrix_inter.tolist() | |
| elif mtype == "torch": | |
| matrix_inter = torch.from_numpy(matrix_inter).to(dtype).to(device) | |
| elif mtype == "numpy": | |
| matrix_inter = matrix_inter.astype(dtype) | |
| return matrix_inter | |
| def convert_camera_coeffs_to_pinhole_matrix( | |
| scene_meta, frame, fmt="torch" | |
| ) -> torch.Tensor | np.ndarray | list: | |
| """ | |
| Convert camera intrinsics from NeRFStudio format to a 3x3 intrinsics matrix. | |
| Args: | |
| scene_meta: Scene metadata containing camera parameters | |
| frame: Frame-specific camera parameters that override scene_meta | |
| Returns: | |
| torch.Tensor: 3x3 camera intrinsics matrix | |
| Raises: | |
| ValueError: If camera model is not PINHOLE or if distortion coefficients are present | |
| """ | |
| # Check if camera model is supported | |
| camera_model = frame.get("camera_model", scene_meta.get("camera_model")) | |
| if camera_model != "PINHOLE": | |
| raise ValueError("Only PINHOLE camera model supported") | |
| # Check for unsupported distortion coefficients | |
| if any( | |
| (frame.get(coeff, 0) != 0) or (scene_meta.get(coeff, 0) != 0) | |
| for coeff in DISTORTION_PARAM_KEYS | |
| ): | |
| raise ValueError( | |
| "Pinhole camera does not support radial/tangential distortion -> Undistort first" | |
| ) | |
| # Extract camera intrinsic parameters | |
| camera_coeffs = {} | |
| for coeff in ["fl_x", "fl_y", "cx", "cy"]: | |
| camera_coeffs[coeff] = frame.get(coeff, scene_meta.get(coeff)) | |
| if camera_coeffs[coeff] is None: | |
| raise ValueError(f"Missing required camera parameter: {coeff}") | |
| # Create intrinsics matrix | |
| intrinsics = [ | |
| [camera_coeffs["fl_x"], 0.0, camera_coeffs["cx"]], | |
| [0.0, camera_coeffs["fl_y"], camera_coeffs["cy"]], | |
| [0.0, 0.0, 1.0], | |
| ] | |
| if fmt == "torch": | |
| intrinsics = torch.tensor(intrinsics) | |
| elif fmt == "np": | |
| intrinsics = np.array(intrinsics) | |
| return intrinsics | |
| def rotate_pinhole_90degcw( | |
| W: int, H: int, fx: float, fy: float, cx: float, cy: float | |
| ) -> tuple[int, int, float, float, float, float]: | |
| """Rotates the intrinsics of a pinhole camera model by 90 degrees clockwise.""" | |
| W_new = H | |
| H_new = W | |
| fx_new = fy | |
| fy_new = fx | |
| cy_new = cx | |
| cx_new = H - 1 - cy | |
| return W_new, H_new, fx_new, fy_new, cx_new, cy_new | |
| def _gl_cv_cmat() -> np.ndarray: | |
| cmat = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) | |
| return cmat | |
| def _apply_transformation( | |
| c2ws: torch.Tensor | np.ndarray, cmat: np.ndarray | |
| ) -> torch.Tensor | np.ndarray: | |
| """ | |
| Convert camera poses using a provided conversion matrix. | |
| Args: | |
| c2ws (torch.Tensor or np.ndarray): Camera poses (batch_size, 4, 4) or (4, 4) | |
| cmat (torch.Tensor or np.ndarray): Conversion matrix (4, 4) | |
| Returns: | |
| torch.Tensor or np.ndarray: Transformed camera poses (batch_size, 4, 4) or (4, 4) | |
| """ | |
| if isinstance(c2ws, torch.Tensor): | |
| # Clone the input tensor to avoid modifying it in-place | |
| c2ws_transformed = c2ws.clone() | |
| # Apply the conversion matrix to the rotation part of the camera poses | |
| if len(c2ws.shape) == 3: | |
| c2ws_transformed[:, :3, :3] = c2ws_transformed[ | |
| :, :3, :3 | |
| ] @ torch.from_numpy(cmat[:3, :3]).to(c2ws).unsqueeze(0) | |
| else: | |
| c2ws_transformed[:3, :3] = c2ws_transformed[:3, :3] @ torch.from_numpy( | |
| cmat[:3, :3] | |
| ).to(c2ws) | |
| elif isinstance(c2ws, np.ndarray): | |
| # Clone the input array to avoid modifying it in-place | |
| c2ws_transformed = c2ws.copy() | |
| if len(c2ws.shape) == 3: # batched | |
| # Apply the conversion matrix to the rotation part of the camera poses | |
| c2ws_transformed[:, :3, :3] = np.einsum( | |
| "ijk,lk->ijl", c2ws_transformed[:, :3, :3], cmat[:3, :3] | |
| ) | |
| else: # single 4x4 matrix | |
| # Apply the conversion matrix to the rotation part of the camera pose | |
| c2ws_transformed[:3, :3] = np.dot(c2ws_transformed[:3, :3], cmat[:3, :3]) | |
| else: | |
| raise ValueError("Input data type not supported.") | |
| return c2ws_transformed | |
| def gl2cv( | |
| c2ws: torch.Tensor | np.ndarray, | |
| return_cmat: bool = False, | |
| ) -> torch.Tensor | np.ndarray | tuple[torch.Tensor | np.ndarray, np.ndarray]: | |
| """ | |
| Convert camera poses from OpenGL to OpenCV coordinate system. | |
| Args: | |
| c2ws (torch.Tensor or np.ndarray): Camera poses (batch_size, 4, 4) or (4, 4) | |
| return_cmat (bool): If True, return the conversion matrix along with the transformed poses | |
| Returns: | |
| torch.Tensor or np.ndarray: Transformed camera poses (batch_size, 4, 4) or (4, 4) | |
| np.ndarray (optional): Conversion matrix if return_cmat is True | |
| """ | |
| cmat = _gl_cv_cmat() | |
| if return_cmat: | |
| return _apply_transformation(c2ws, cmat), cmat | |
| return _apply_transformation(c2ws, cmat) | |
| def intrinsics_to_fov( | |
| fx: torch.Tensor, fy: torch.Tensor, h: torch.Tensor, w: torch.Tensor | |
| ) -> tuple[torch.Tensor, torch.Tensor]: | |
| """ | |
| Compute the horizontal and vertical fields of view in radians from camera intrinsics. | |
| Args: | |
| fx (torch.Tensor): focal x | |
| fy (torch.Tensor): focal y | |
| h (torch.Tensor): Image height(s) with shape (B,). | |
| w (torch.Tensor): Image width(s) with shape (B,). | |
| Returns: | |
| tuple[torch.Tensor, torch.Tensor]: A tuple containing the horizontal and vertical fields | |
| of view in radians, both with shape (N,). | |
| """ | |
| return 2 * torch.atan((w / 2) / fx), 2 * torch.atan((h / 2) / fy) | |