|  |  | 
					
						
						|  | import collections | 
					
						
						|  | import math | 
					
						
						|  | from typing import List | 
					
						
						|  | import torch | 
					
						
						|  | from torch import nn | 
					
						
						|  |  | 
					
						
						|  | from detectron2.config import configurable | 
					
						
						|  | from detectron2.layers import ShapeSpec, move_device_like | 
					
						
						|  | from detectron2.structures import Boxes, RotatedBoxes | 
					
						
						|  | from detectron2.utils.registry import Registry | 
					
						
						|  |  | 
					
						
						|  | ANCHOR_GENERATOR_REGISTRY = Registry("ANCHOR_GENERATOR") | 
					
						
						|  | ANCHOR_GENERATOR_REGISTRY.__doc__ = """ | 
					
						
						|  | Registry for modules that creates object detection anchors for feature maps. | 
					
						
						|  |  | 
					
						
						|  | The registered object will be called with `obj(cfg, input_shape)`. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class BufferList(nn.Module): | 
					
						
						|  | """ | 
					
						
						|  | Similar to nn.ParameterList, but for buffers | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | def __init__(self, buffers): | 
					
						
						|  | super().__init__() | 
					
						
						|  | for i, buffer in enumerate(buffers): | 
					
						
						|  |  | 
					
						
						|  | self.register_buffer(str(i), buffer, persistent=False) | 
					
						
						|  |  | 
					
						
						|  | def __len__(self): | 
					
						
						|  | return len(self._buffers) | 
					
						
						|  |  | 
					
						
						|  | def __iter__(self): | 
					
						
						|  | return iter(self._buffers.values()) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def _create_grid_offsets( | 
					
						
						|  | size: List[int], stride: int, offset: float, target_device_tensor: torch.Tensor | 
					
						
						|  | ): | 
					
						
						|  | grid_height, grid_width = size | 
					
						
						|  | shifts_x = move_device_like( | 
					
						
						|  | torch.arange(offset * stride, grid_width * stride, step=stride, dtype=torch.float32), | 
					
						
						|  | target_device_tensor, | 
					
						
						|  | ) | 
					
						
						|  | shifts_y = move_device_like( | 
					
						
						|  | torch.arange(offset * stride, grid_height * stride, step=stride, dtype=torch.float32), | 
					
						
						|  | target_device_tensor, | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) | 
					
						
						|  | shift_x = shift_x.reshape(-1) | 
					
						
						|  | shift_y = shift_y.reshape(-1) | 
					
						
						|  | return shift_x, shift_y | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def _broadcast_params(params, num_features, name): | 
					
						
						|  | """ | 
					
						
						|  | If one size (or aspect ratio) is specified and there are multiple feature | 
					
						
						|  | maps, we "broadcast" anchors of that single size (or aspect ratio) | 
					
						
						|  | over all feature maps. | 
					
						
						|  |  | 
					
						
						|  | If params is list[float], or list[list[float]] with len(params) == 1, repeat | 
					
						
						|  | it num_features time. | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | list[list[float]]: param for each feature | 
					
						
						|  | """ | 
					
						
						|  | assert isinstance( | 
					
						
						|  | params, collections.abc.Sequence | 
					
						
						|  | ), f"{name} in anchor generator has to be a list! Got {params}." | 
					
						
						|  | assert len(params), f"{name} in anchor generator cannot be empty!" | 
					
						
						|  | if not isinstance(params[0], collections.abc.Sequence): | 
					
						
						|  | return [params] * num_features | 
					
						
						|  | if len(params) == 1: | 
					
						
						|  | return list(params) * num_features | 
					
						
						|  | assert len(params) == num_features, ( | 
					
						
						|  | f"Got {name} of length {len(params)} in anchor generator, " | 
					
						
						|  | f"but the number of input features is {num_features}!" | 
					
						
						|  | ) | 
					
						
						|  | return params | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | @ANCHOR_GENERATOR_REGISTRY.register() | 
					
						
						|  | class DefaultAnchorGenerator(nn.Module): | 
					
						
						|  | """ | 
					
						
						|  | Compute anchors in the standard ways described in | 
					
						
						|  | "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks". | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | box_dim: torch.jit.Final[int] = 4 | 
					
						
						|  | """ | 
					
						
						|  | the dimension of each anchor box. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | @configurable | 
					
						
						|  | def __init__(self, *, sizes, aspect_ratios, strides, offset=0.5): | 
					
						
						|  | """ | 
					
						
						|  | This interface is experimental. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | sizes (list[list[float]] or list[float]): | 
					
						
						|  | If ``sizes`` is list[list[float]], ``sizes[i]`` is the list of anchor sizes | 
					
						
						|  | (i.e. sqrt of anchor area) to use for the i-th feature map. | 
					
						
						|  | If ``sizes`` is list[float], ``sizes`` is used for all feature maps. | 
					
						
						|  | Anchor sizes are given in absolute lengths in units of | 
					
						
						|  | the input image; they do not dynamically scale if the input image size changes. | 
					
						
						|  | aspect_ratios (list[list[float]] or list[float]): list of aspect ratios | 
					
						
						|  | (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. | 
					
						
						|  | strides (list[int]): stride of each input feature. | 
					
						
						|  | offset (float): Relative offset between the center of the first anchor and the top-left | 
					
						
						|  | corner of the image. Value has to be in [0, 1). | 
					
						
						|  | Recommend to use 0.5, which means half stride. | 
					
						
						|  | """ | 
					
						
						|  | super().__init__() | 
					
						
						|  |  | 
					
						
						|  | self.strides = strides | 
					
						
						|  | self.num_features = len(self.strides) | 
					
						
						|  | sizes = _broadcast_params(sizes, self.num_features, "sizes") | 
					
						
						|  | aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") | 
					
						
						|  | self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios) | 
					
						
						|  |  | 
					
						
						|  | self.offset = offset | 
					
						
						|  | assert 0.0 <= self.offset < 1.0, self.offset | 
					
						
						|  |  | 
					
						
						|  | @classmethod | 
					
						
						|  | def from_config(cls, cfg, input_shape: List[ShapeSpec]): | 
					
						
						|  | return { | 
					
						
						|  | "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, | 
					
						
						|  | "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, | 
					
						
						|  | "strides": [x.stride for x in input_shape], | 
					
						
						|  | "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | def _calculate_anchors(self, sizes, aspect_ratios): | 
					
						
						|  | cell_anchors = [ | 
					
						
						|  | self.generate_cell_anchors(s, a).float() for s, a in zip(sizes, aspect_ratios) | 
					
						
						|  | ] | 
					
						
						|  | return BufferList(cell_anchors) | 
					
						
						|  |  | 
					
						
						|  | @property | 
					
						
						|  | @torch.jit.unused | 
					
						
						|  | def num_cell_anchors(self): | 
					
						
						|  | """ | 
					
						
						|  | Alias of `num_anchors`. | 
					
						
						|  | """ | 
					
						
						|  | return self.num_anchors | 
					
						
						|  |  | 
					
						
						|  | @property | 
					
						
						|  | @torch.jit.unused | 
					
						
						|  | def num_anchors(self): | 
					
						
						|  | """ | 
					
						
						|  | Returns: | 
					
						
						|  | list[int]: Each int is the number of anchors at every pixel | 
					
						
						|  | location, on that feature map. | 
					
						
						|  | For example, if at every pixel we use anchors of 3 aspect | 
					
						
						|  | ratios and 5 sizes, the number of anchors is 15. | 
					
						
						|  | (See also ANCHOR_GENERATOR.SIZES and ANCHOR_GENERATOR.ASPECT_RATIOS in config) | 
					
						
						|  |  | 
					
						
						|  | In standard RPN models, `num_anchors` on every feature map is the same. | 
					
						
						|  | """ | 
					
						
						|  | return [len(cell_anchors) for cell_anchors in self.cell_anchors] | 
					
						
						|  |  | 
					
						
						|  | def _grid_anchors(self, grid_sizes: List[List[int]]): | 
					
						
						|  | """ | 
					
						
						|  | Returns: | 
					
						
						|  | list[Tensor]: #featuremap tensors, each is (#locations x #cell_anchors) x 4 | 
					
						
						|  | """ | 
					
						
						|  | anchors = [] | 
					
						
						|  |  | 
					
						
						|  | buffers: List[torch.Tensor] = [x[1] for x in self.cell_anchors.named_buffers()] | 
					
						
						|  | for size, stride, base_anchors in zip(grid_sizes, self.strides, buffers): | 
					
						
						|  | shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) | 
					
						
						|  | shifts = torch.stack((shift_x, shift_y, shift_x, shift_y), dim=1) | 
					
						
						|  |  | 
					
						
						|  | anchors.append((shifts.view(-1, 1, 4) + base_anchors.view(1, -1, 4)).reshape(-1, 4)) | 
					
						
						|  |  | 
					
						
						|  | return anchors | 
					
						
						|  |  | 
					
						
						|  | def generate_cell_anchors(self, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)): | 
					
						
						|  | """ | 
					
						
						|  | Generate a tensor storing canonical anchor boxes, which are all anchor | 
					
						
						|  | boxes of different sizes and aspect_ratios centered at (0, 0). | 
					
						
						|  | We can later build the set of anchors for a full feature map by | 
					
						
						|  | shifting and tiling these tensors (see `meth:_grid_anchors`). | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | sizes (tuple[float]): | 
					
						
						|  | aspect_ratios (tuple[float]]): | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | Tensor of shape (len(sizes) * len(aspect_ratios), 4) storing anchor boxes | 
					
						
						|  | in XYXY format. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | anchors = [] | 
					
						
						|  | for size in sizes: | 
					
						
						|  | area = size**2.0 | 
					
						
						|  | for aspect_ratio in aspect_ratios: | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | w = math.sqrt(area / aspect_ratio) | 
					
						
						|  | h = aspect_ratio * w | 
					
						
						|  | x0, y0, x1, y1 = -w / 2.0, -h / 2.0, w / 2.0, h / 2.0 | 
					
						
						|  | anchors.append([x0, y0, x1, y1]) | 
					
						
						|  | return torch.tensor(anchors) | 
					
						
						|  |  | 
					
						
						|  | def forward(self, features: List[torch.Tensor]): | 
					
						
						|  | """ | 
					
						
						|  | Args: | 
					
						
						|  | features (list[Tensor]): list of backbone feature maps on which to generate anchors. | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | list[Boxes]: a list of Boxes containing all the anchors for each feature map | 
					
						
						|  | (i.e. the cell anchors repeated over all locations in the feature map). | 
					
						
						|  | The number of anchors of each feature map is Hi x Wi x num_cell_anchors, | 
					
						
						|  | where Hi, Wi are resolution of the feature map divided by anchor stride. | 
					
						
						|  | """ | 
					
						
						|  | grid_sizes = [feature_map.shape[-2:] for feature_map in features] | 
					
						
						|  | anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) | 
					
						
						|  | return [Boxes(x) for x in anchors_over_all_feature_maps] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | @ANCHOR_GENERATOR_REGISTRY.register() | 
					
						
						|  | class RotatedAnchorGenerator(nn.Module): | 
					
						
						|  | """ | 
					
						
						|  | Compute rotated anchors used by Rotated RPN (RRPN), described in | 
					
						
						|  | "Arbitrary-Oriented Scene Text Detection via Rotation Proposals". | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | box_dim: int = 5 | 
					
						
						|  | """ | 
					
						
						|  | the dimension of each anchor box. | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | @configurable | 
					
						
						|  | def __init__(self, *, sizes, aspect_ratios, strides, angles, offset=0.5): | 
					
						
						|  | """ | 
					
						
						|  | This interface is experimental. | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | sizes (list[list[float]] or list[float]): | 
					
						
						|  | If sizes is list[list[float]], sizes[i] is the list of anchor sizes | 
					
						
						|  | (i.e. sqrt of anchor area) to use for the i-th feature map. | 
					
						
						|  | If sizes is list[float], the sizes are used for all feature maps. | 
					
						
						|  | Anchor sizes are given in absolute lengths in units of | 
					
						
						|  | the input image; they do not dynamically scale if the input image size changes. | 
					
						
						|  | aspect_ratios (list[list[float]] or list[float]): list of aspect ratios | 
					
						
						|  | (i.e. height / width) to use for anchors. Same "broadcast" rule for `sizes` applies. | 
					
						
						|  | strides (list[int]): stride of each input feature. | 
					
						
						|  | angles (list[list[float]] or list[float]): list of angles (in degrees CCW) | 
					
						
						|  | to use for anchors. Same "broadcast" rule for `sizes` applies. | 
					
						
						|  | offset (float): Relative offset between the center of the first anchor and the top-left | 
					
						
						|  | corner of the image. Value has to be in [0, 1). | 
					
						
						|  | Recommend to use 0.5, which means half stride. | 
					
						
						|  | """ | 
					
						
						|  | super().__init__() | 
					
						
						|  |  | 
					
						
						|  | self.strides = strides | 
					
						
						|  | self.num_features = len(self.strides) | 
					
						
						|  | sizes = _broadcast_params(sizes, self.num_features, "sizes") | 
					
						
						|  | aspect_ratios = _broadcast_params(aspect_ratios, self.num_features, "aspect_ratios") | 
					
						
						|  | angles = _broadcast_params(angles, self.num_features, "angles") | 
					
						
						|  | self.cell_anchors = self._calculate_anchors(sizes, aspect_ratios, angles) | 
					
						
						|  |  | 
					
						
						|  | self.offset = offset | 
					
						
						|  | assert 0.0 <= self.offset < 1.0, self.offset | 
					
						
						|  |  | 
					
						
						|  | @classmethod | 
					
						
						|  | def from_config(cls, cfg, input_shape: List[ShapeSpec]): | 
					
						
						|  | return { | 
					
						
						|  | "sizes": cfg.MODEL.ANCHOR_GENERATOR.SIZES, | 
					
						
						|  | "aspect_ratios": cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS, | 
					
						
						|  | "strides": [x.stride for x in input_shape], | 
					
						
						|  | "offset": cfg.MODEL.ANCHOR_GENERATOR.OFFSET, | 
					
						
						|  | "angles": cfg.MODEL.ANCHOR_GENERATOR.ANGLES, | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | def _calculate_anchors(self, sizes, aspect_ratios, angles): | 
					
						
						|  | cell_anchors = [ | 
					
						
						|  | self.generate_cell_anchors(size, aspect_ratio, angle).float() | 
					
						
						|  | for size, aspect_ratio, angle in zip(sizes, aspect_ratios, angles) | 
					
						
						|  | ] | 
					
						
						|  | return BufferList(cell_anchors) | 
					
						
						|  |  | 
					
						
						|  | @property | 
					
						
						|  | def num_cell_anchors(self): | 
					
						
						|  | """ | 
					
						
						|  | Alias of `num_anchors`. | 
					
						
						|  | """ | 
					
						
						|  | return self.num_anchors | 
					
						
						|  |  | 
					
						
						|  | @property | 
					
						
						|  | def num_anchors(self): | 
					
						
						|  | """ | 
					
						
						|  | Returns: | 
					
						
						|  | list[int]: Each int is the number of anchors at every pixel | 
					
						
						|  | location, on that feature map. | 
					
						
						|  | For example, if at every pixel we use anchors of 3 aspect | 
					
						
						|  | ratios, 2 sizes and 5 angles, the number of anchors is 30. | 
					
						
						|  | (See also ANCHOR_GENERATOR.SIZES, ANCHOR_GENERATOR.ASPECT_RATIOS | 
					
						
						|  | and ANCHOR_GENERATOR.ANGLES in config) | 
					
						
						|  |  | 
					
						
						|  | In standard RRPN models, `num_anchors` on every feature map is the same. | 
					
						
						|  | """ | 
					
						
						|  | return [len(cell_anchors) for cell_anchors in self.cell_anchors] | 
					
						
						|  |  | 
					
						
						|  | def _grid_anchors(self, grid_sizes): | 
					
						
						|  | anchors = [] | 
					
						
						|  | for size, stride, base_anchors in zip(grid_sizes, self.strides, self.cell_anchors): | 
					
						
						|  | shift_x, shift_y = _create_grid_offsets(size, stride, self.offset, base_anchors) | 
					
						
						|  | zeros = torch.zeros_like(shift_x) | 
					
						
						|  | shifts = torch.stack((shift_x, shift_y, zeros, zeros, zeros), dim=1) | 
					
						
						|  |  | 
					
						
						|  | anchors.append((shifts.view(-1, 1, 5) + base_anchors.view(1, -1, 5)).reshape(-1, 5)) | 
					
						
						|  |  | 
					
						
						|  | return anchors | 
					
						
						|  |  | 
					
						
						|  | def generate_cell_anchors( | 
					
						
						|  | self, | 
					
						
						|  | sizes=(32, 64, 128, 256, 512), | 
					
						
						|  | aspect_ratios=(0.5, 1, 2), | 
					
						
						|  | angles=(-90, -60, -30, 0, 30, 60, 90), | 
					
						
						|  | ): | 
					
						
						|  | """ | 
					
						
						|  | Generate a tensor storing canonical anchor boxes, which are all anchor | 
					
						
						|  | boxes of different sizes, aspect_ratios, angles centered at (0, 0). | 
					
						
						|  | We can later build the set of anchors for a full feature map by | 
					
						
						|  | shifting and tiling these tensors (see `meth:_grid_anchors`). | 
					
						
						|  |  | 
					
						
						|  | Args: | 
					
						
						|  | sizes (tuple[float]): | 
					
						
						|  | aspect_ratios (tuple[float]]): | 
					
						
						|  | angles (tuple[float]]): | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | Tensor of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) | 
					
						
						|  | storing anchor boxes in (x_ctr, y_ctr, w, h, angle) format. | 
					
						
						|  | """ | 
					
						
						|  | anchors = [] | 
					
						
						|  | for size in sizes: | 
					
						
						|  | area = size**2.0 | 
					
						
						|  | for aspect_ratio in aspect_ratios: | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | w = math.sqrt(area / aspect_ratio) | 
					
						
						|  | h = aspect_ratio * w | 
					
						
						|  | anchors.extend([0, 0, w, h, a] for a in angles) | 
					
						
						|  |  | 
					
						
						|  | return torch.tensor(anchors) | 
					
						
						|  |  | 
					
						
						|  | def forward(self, features): | 
					
						
						|  | """ | 
					
						
						|  | Args: | 
					
						
						|  | features (list[Tensor]): list of backbone feature maps on which to generate anchors. | 
					
						
						|  |  | 
					
						
						|  | Returns: | 
					
						
						|  | list[RotatedBoxes]: a list of Boxes containing all the anchors for each feature map | 
					
						
						|  | (i.e. the cell anchors repeated over all locations in the feature map). | 
					
						
						|  | The number of anchors of each feature map is Hi x Wi x num_cell_anchors, | 
					
						
						|  | where Hi, Wi are resolution of the feature map divided by anchor stride. | 
					
						
						|  | """ | 
					
						
						|  | grid_sizes = [feature_map.shape[-2:] for feature_map in features] | 
					
						
						|  | anchors_over_all_feature_maps = self._grid_anchors(grid_sizes) | 
					
						
						|  | return [RotatedBoxes(x) for x in anchors_over_all_feature_maps] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def build_anchor_generator(cfg, input_shape): | 
					
						
						|  | """ | 
					
						
						|  | Built an anchor generator from `cfg.MODEL.ANCHOR_GENERATOR.NAME`. | 
					
						
						|  | """ | 
					
						
						|  | anchor_generator = cfg.MODEL.ANCHOR_GENERATOR.NAME | 
					
						
						|  | return ANCHOR_GENERATOR_REGISTRY.get(anchor_generator)(cfg, input_shape) | 
					
						
						|  |  |