Spaces:
Runtime error
Runtime error
| from abc import ABC, abstractmethod | |
| from dataclasses import dataclass | |
| from typing import Dict, List, Tuple | |
| import numpy as np | |
| class Camera(ABC): | |
| """ | |
| An object describing how a camera corresponds to pixels in an image. | |
| """ | |
| def image_coords(self) -> np.ndarray: | |
| """ | |
| :return: ([self.height, self.width, 2]).reshape(self.height * self.width, 2) image coordinates | |
| """ | |
| def camera_rays(self, coords: np.ndarray) -> np.ndarray: | |
| """ | |
| For every (x, y) coordinate in a rendered image, compute the ray of the | |
| corresponding pixel. | |
| :param coords: an [N x 2] integer array of 2D image coordinates. | |
| :return: an [N x 2 x 3] array of [2 x 3] (origin, direction) tuples. | |
| The direction should always be unit length. | |
| """ | |
| def depth_directions(self, coords: np.ndarray) -> np.ndarray: | |
| """ | |
| For every (x, y) coordinate in a rendered image, get the direction that | |
| corresponds to "depth" in an RGBD rendering. | |
| This may raise an exception if there is no "D" channel in the | |
| corresponding ViewData. | |
| :param coords: an [N x 2] integer array of 2D image coordinates. | |
| :return: an [N x 3] array of normalized depth directions. | |
| """ | |
| _ = coords | |
| raise NotImplementedError | |
| def center_crop(self) -> "Camera": | |
| """ | |
| Creates a new camera with the same intrinsics and direction as this one, | |
| but with a center crop to a square of the smaller dimension. | |
| """ | |
| def resize_image(self, width: int, height: int) -> "Camera": | |
| """ | |
| Creates a new camera with the same intrinsics and direction as this one, | |
| but with resized image dimensions. | |
| """ | |
| def scale_scene(self, factor: float) -> "Camera": | |
| """ | |
| Creates a new camera with the same intrinsics and direction as this one, | |
| but with the scene rescaled by the given factor. | |
| """ | |
| class ProjectiveCamera(Camera): | |
| """ | |
| A Camera implementation for a standard pinhole camera. | |
| The camera rays shoot away from the origin in the z direction, with the x | |
| and y directions corresponding to the positive horizontal and vertical axes | |
| in image space. | |
| """ | |
| origin: np.ndarray | |
| x: np.ndarray | |
| y: np.ndarray | |
| z: np.ndarray | |
| width: int | |
| height: int | |
| x_fov: float | |
| y_fov: float | |
| def image_coords(self) -> np.ndarray: | |
| ind = np.arange(self.width * self.height) | |
| coords = np.stack([ind % self.width, ind // self.width], axis=1).astype(np.float32) | |
| return coords | |
| def camera_rays(self, coords: np.ndarray) -> np.ndarray: | |
| fracs = (coords / (np.array([self.width, self.height], dtype=np.float32) - 1)) * 2 - 1 | |
| fracs = fracs * np.tan(np.array([self.x_fov, self.y_fov]) / 2) | |
| directions = self.z + self.x * fracs[:, :1] + self.y * fracs[:, 1:] | |
| directions = directions / np.linalg.norm(directions, axis=-1, keepdims=True) | |
| return np.stack([np.broadcast_to(self.origin, directions.shape), directions], axis=1) | |
| def depth_directions(self, coords: np.ndarray) -> np.ndarray: | |
| return np.tile((self.z / np.linalg.norm(self.z))[None], [len(coords), 1]) | |
| def resize_image(self, width: int, height: int) -> "ProjectiveCamera": | |
| """ | |
| Creates a new camera for the resized view assuming the aspect ratio does not change. | |
| """ | |
| assert width * self.height == height * self.width, "The aspect ratio should not change." | |
| return ProjectiveCamera( | |
| origin=self.origin, | |
| x=self.x, | |
| y=self.y, | |
| z=self.z, | |
| width=width, | |
| height=height, | |
| x_fov=self.x_fov, | |
| y_fov=self.y_fov, | |
| ) | |
| def center_crop(self) -> "ProjectiveCamera": | |
| """ | |
| Creates a new camera for the center-cropped view | |
| """ | |
| size = min(self.width, self.height) | |
| fov = min(self.x_fov, self.y_fov) | |
| return ProjectiveCamera( | |
| origin=self.origin, | |
| x=self.x, | |
| y=self.y, | |
| z=self.z, | |
| width=size, | |
| height=size, | |
| x_fov=fov, | |
| y_fov=fov, | |
| ) | |
| def scale_scene(self, factor: float) -> "ProjectiveCamera": | |
| """ | |
| Creates a new camera with the same intrinsics and direction as this one, | |
| but with the camera frame rescaled by the given factor. | |
| """ | |
| return ProjectiveCamera( | |
| origin=self.origin * factor, | |
| x=self.x, | |
| y=self.y, | |
| z=self.z, | |
| width=self.width, | |
| height=self.height, | |
| x_fov=self.x_fov, | |
| y_fov=self.y_fov, | |
| ) | |
| class ViewData(ABC): | |
| """ | |
| A collection of rendered camera views of a scene or object. | |
| This is a generalization of a NeRF dataset, since NeRF datasets only encode | |
| RGB or RGBA data, whereas this dataset supports arbitrary channels. | |
| """ | |
| def num_views(self) -> int: | |
| """ | |
| The number of rendered views. | |
| """ | |
| def channel_names(self) -> List[str]: | |
| """ | |
| Get all of the supported channels available for the views. | |
| This can be arbitrary, but there are some standard names: | |
| "R", "G", "B", "A" (alpha), and "D" (depth). | |
| """ | |
| def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]: | |
| """ | |
| Load the given channels from the view at the given index. | |
| :return: a tuple (camera_view, data), where data is a float array of | |
| shape [height x width x num_channels]. | |
| """ | |
| class MemoryViewData(ViewData): | |
| """ | |
| A ViewData that is implemented in memory. | |
| """ | |
| def __init__(self, channels: Dict[str, np.ndarray], cameras: List[Camera]): | |
| assert all(v.shape[0] == len(cameras) for v in channels.values()) | |
| self.channels = channels | |
| self.cameras = cameras | |
| def num_views(self) -> int: | |
| return len(self.cameras) | |
| def channel_names(self) -> List[str]: | |
| return list(self.channels.keys()) | |
| def load_view(self, index: int, channels: List[str]) -> Tuple[Camera, np.ndarray]: | |
| outputs = [self.channels[channel][index] for channel in channels] | |
| return self.cameras[index], np.stack(outputs, axis=-1) | |