Spaces:
Running
on
Zero
Running
on
Zero
| import torch | |
| from torch import nn, Tensor | |
| import torch.nn.functional as F | |
| from functools import partial | |
| from typing import Callable, Optional, Sequence, Tuple, Union, Any, List, TypeVar, List | |
| from types import FunctionType | |
| from itertools import repeat | |
| import warnings | |
| import os | |
| from collections.abc import Iterable | |
| V = TypeVar("V") | |
| curr_dir = os.path.dirname(os.path.abspath(__file__)) | |
| vgg_urls = { | |
| "vgg11": "https://download.pytorch.org/models/vgg11-8a719046.pth", | |
| "vgg11_bn": "https://download.pytorch.org/models/vgg11_bn-6002323d.pth", | |
| "vgg13": "https://download.pytorch.org/models/vgg13-19584684.pth", | |
| "vgg13_bn": "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth", | |
| "vgg16": "https://download.pytorch.org/models/vgg16-397923af.pth", | |
| "vgg16_bn": "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth", | |
| "vgg19": "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth", | |
| "vgg19_bn": "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth", | |
| } | |
| vgg_cfgs = { | |
| "A": [64, "M", 128, "M", 256, 256, "M", 512, 512, "M", 512, 512], | |
| "B": [64, 64, "M", 128, 128, "M", 256, 256, "M", 512, 512, "M", 512, 512], | |
| "D": [64, 64, "M", 128, 128, "M", 256, 256, 256, "M", 512, 512, 512, "M", 512, 512, 512], | |
| "E": [64, 64, "M", 128, 128, "M", 256, 256, 256, 256, "M", 512, 512, 512, 512, "M", 512, 512, 512, 512] | |
| } | |
| def _log_api_usage_once(obj: Any) -> None: | |
| """ | |
| Logs API usage(module and name) within an organization. | |
| In a large ecosystem, it's often useful to track the PyTorch and | |
| TorchVision APIs usage. This API provides the similar functionality to the | |
| logging module in the Python stdlib. It can be used for debugging purpose | |
| to log which methods are used and by default it is inactive, unless the user | |
| manually subscribes a logger via the `SetAPIUsageLogger method <https://github.com/pytorch/pytorch/blob/eb3b9fe719b21fae13c7a7cf3253f970290a573e/c10/util/Logging.cpp#L114>`_. | |
| Please note it is triggered only once for the same API call within a process. | |
| It does not collect any data from open-source users since it is no-op by default. | |
| For more information, please refer to | |
| * PyTorch note: https://pytorch.org/docs/stable/notes/large_scale_deployments.html#api-usage-logging; | |
| * Logging policy: https://github.com/pytorch/vision/issues/5052; | |
| Args: | |
| obj (class instance or method): an object to extract info from. | |
| """ | |
| module = obj.__module__ | |
| if not module.startswith("torchvision"): | |
| module = f"torchvision.internal.{module}" | |
| name = obj.__class__.__name__ | |
| if isinstance(obj, FunctionType): | |
| name = obj.__name__ | |
| torch._C._log_api_usage_once(f"{module}.{name}") | |
| def _make_ntuple(x: Any, n: int) -> Tuple[Any, ...]: | |
| """ | |
| Make n-tuple from input x. If x is an iterable, then we just convert it to tuple. | |
| Otherwise, we will make a tuple of length n, all with value of x. | |
| reference: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/utils.py#L8 | |
| Args: | |
| x (Any): input value | |
| n (int): length of the resulting tuple | |
| """ | |
| if isinstance(x, Iterable): | |
| return tuple(x) | |
| return tuple(repeat(x, n)) | |
| class ConvNormActivation(torch.nn.Sequential): | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| out_channels: int, | |
| kernel_size: Union[int, Tuple[int, ...]] = 3, | |
| stride: Union[int, Tuple[int, ...]] = 1, | |
| padding: Optional[Union[int, Tuple[int, ...], str]] = None, | |
| groups: int = 1, | |
| norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, | |
| activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, | |
| dilation: Union[int, Tuple[int, ...]] = 1, | |
| inplace: Optional[bool] = True, | |
| bias: Optional[bool] = None, | |
| conv_layer: Callable[..., torch.nn.Module] = torch.nn.Conv2d, | |
| ) -> None: | |
| if padding is None: | |
| if isinstance(kernel_size, int) and isinstance(dilation, int): | |
| padding = (kernel_size - 1) // 2 * dilation | |
| else: | |
| _conv_dim = len(kernel_size) if isinstance(kernel_size, Sequence) else len(dilation) | |
| kernel_size = _make_ntuple(kernel_size, _conv_dim) | |
| dilation = _make_ntuple(dilation, _conv_dim) | |
| padding = tuple((kernel_size[i] - 1) // 2 * dilation[i] for i in range(_conv_dim)) | |
| if bias is None: | |
| bias = norm_layer is None | |
| layers = [ | |
| conv_layer( | |
| in_channels, | |
| out_channels, | |
| kernel_size, | |
| stride, | |
| padding, | |
| dilation=dilation, | |
| groups=groups, | |
| bias=bias, | |
| ) | |
| ] | |
| if norm_layer is not None: | |
| layers.append(norm_layer(out_channels)) | |
| if activation_layer is not None: | |
| params = {} if inplace is None else {"inplace": inplace} | |
| layers.append(activation_layer(**params)) | |
| super().__init__(*layers) | |
| _log_api_usage_once(self) | |
| self.out_channels = out_channels | |
| if self.__class__ == ConvNormActivation: | |
| warnings.warn( | |
| "Don't use ConvNormActivation directly, please use Conv2dNormActivation and Conv3dNormActivation instead." | |
| ) | |
| class Conv2dNormActivation(ConvNormActivation): | |
| """ | |
| Configurable block used for Convolution2d-Normalization-Activation blocks. | |
| Args: | |
| in_channels (int): Number of channels in the input image | |
| out_channels (int): Number of channels produced by the Convolution-Normalization-Activation block | |
| kernel_size: (int, optional): Size of the convolving kernel. Default: 3 | |
| stride (int, optional): Stride of the convolution. Default: 1 | |
| padding (int, tuple or str, optional): Padding added to all four sides of the input. Default: None, in which case it will be calculated as ``padding = (kernel_size - 1) // 2 * dilation`` | |
| groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1 | |
| norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the convolution layer. If ``None`` this layer won't be used. Default: ``torch.nn.BatchNorm2d`` | |
| activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the conv layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` | |
| dilation (int): Spacing between kernel elements. Default: 1 | |
| inplace (bool): Parameter for the activation layer, which can optionally do the operation in-place. Default ``True`` | |
| bias (bool, optional): Whether to use bias in the convolution layer. By default, biases are included if ``norm_layer is None``. | |
| """ | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| out_channels: int, | |
| kernel_size: Union[int, Tuple[int, int]] = 3, | |
| stride: Union[int, Tuple[int, int]] = 1, | |
| padding: Optional[Union[int, Tuple[int, int], str]] = None, | |
| groups: int = 1, | |
| norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, | |
| activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, | |
| dilation: Union[int, Tuple[int, int]] = 1, | |
| inplace: Optional[bool] = True, | |
| bias: Optional[bool] = None, | |
| ) -> None: | |
| super().__init__( | |
| in_channels, | |
| out_channels, | |
| kernel_size, | |
| stride, | |
| padding, | |
| groups, | |
| norm_layer, | |
| activation_layer, | |
| dilation, | |
| inplace, | |
| bias, | |
| torch.nn.Conv2d, | |
| ) | |
| class MLP(torch.nn.Sequential): | |
| """This block implements the multi-layer perceptron (MLP) module. | |
| Args: | |
| in_channels (int): Number of channels of the input | |
| hidden_channels (List[int]): List of the hidden channel dimensions | |
| norm_layer (Callable[..., torch.nn.Module], optional): Norm layer that will be stacked on top of the linear layer. If ``None`` this layer won't be used. Default: ``None`` | |
| activation_layer (Callable[..., torch.nn.Module], optional): Activation function which will be stacked on top of the normalization layer (if not None), otherwise on top of the linear layer. If ``None`` this layer won't be used. Default: ``torch.nn.ReLU`` | |
| inplace (bool, optional): Parameter for the activation layer, which can optionally do the operation in-place. | |
| Default is ``None``, which uses the respective default values of the ``activation_layer`` and Dropout layer. | |
| bias (bool): Whether to use bias in the linear layer. Default ``True`` | |
| dropout (float): The probability for the dropout layer. Default: 0.0 | |
| """ | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| hidden_channels: List[int], | |
| norm_layer: Optional[Callable[..., torch.nn.Module]] = None, | |
| activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, | |
| inplace: Optional[bool] = None, | |
| bias: bool = True, | |
| dropout: float = 0.0, | |
| ): | |
| # The addition of `norm_layer` is inspired from the implementation of TorchMultimodal: | |
| # https://github.com/facebookresearch/multimodal/blob/5dec8a/torchmultimodal/modules/layers/mlp.py | |
| params = {} if inplace is None else {"inplace": inplace} | |
| layers = [] | |
| in_dim = in_channels | |
| for hidden_dim in hidden_channels[:-1]: | |
| layers.append(torch.nn.Linear(in_dim, hidden_dim, bias=bias)) | |
| if norm_layer is not None: | |
| layers.append(norm_layer(hidden_dim)) | |
| layers.append(activation_layer(**params)) | |
| layers.append(torch.nn.Dropout(dropout, **params)) | |
| in_dim = hidden_dim | |
| layers.append(torch.nn.Linear(in_dim, hidden_channels[-1], bias=bias)) | |
| layers.append(torch.nn.Dropout(dropout, **params)) | |
| super().__init__(*layers) | |
| _log_api_usage_once(self) | |
| def conv3x3( | |
| in_channels: int, | |
| out_channels: int, | |
| stride: int = 1, | |
| groups: int = 1, | |
| dilation: int = 1, | |
| ) -> nn.Conv2d: | |
| """3x3 convolution with padding""" | |
| return nn.Conv2d( | |
| in_channels, | |
| out_channels, | |
| kernel_size=3, | |
| stride=stride, | |
| padding=dilation, | |
| groups=groups, | |
| bias=False, | |
| dilation=dilation, | |
| ) | |
| def conv1x1(in_channels: int, out_channels: int, stride: int = 1) -> nn.Conv2d: | |
| """1x1 convolution""" | |
| return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False) | |
| class BasicBlock(nn.Module): | |
| expansion: int = 1 | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| out_channels: int, | |
| stride: int = 1, | |
| groups: int = 1, | |
| base_width: int = 64, | |
| dilation: int = 1, | |
| norm_layer: Optional[Callable[..., nn.Module]] = None, | |
| **kwargs: Any, | |
| ) -> None: | |
| super().__init__() | |
| if norm_layer is None: | |
| norm_layer = nn.BatchNorm2d | |
| if groups != 1 or base_width != 64: | |
| raise ValueError("BasicBlock only supports groups=1 and base_width=64") | |
| if dilation > 1: | |
| raise NotImplementedError("Dilation > 1 not supported in BasicBlock") | |
| # Both self.conv1 and self.downsample layers downsample the input when stride != 1 | |
| self.conv1 = conv3x3(in_channels, out_channels, stride) | |
| self.bn1 = norm_layer(out_channels) | |
| self.relu = nn.ReLU(inplace=True) | |
| self.conv2 = conv3x3(out_channels, out_channels) | |
| self.bn2 = norm_layer(out_channels) | |
| self.stride = stride | |
| if in_channels != out_channels: | |
| self.downsample = nn.Sequential( | |
| conv1x1(in_channels, out_channels), | |
| nn.BatchNorm2d(out_channels), | |
| ) | |
| else: | |
| self.downsample = nn.Identity() | |
| def forward(self, x: Tensor) -> Tensor: | |
| identity = x | |
| out = self.conv1(x) | |
| out = self.bn1(out) | |
| out = self.relu(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| out += self.downsample(identity) | |
| out = self.relu(out) | |
| return out | |
| class Bottleneck(nn.Module): | |
| # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) | |
| # while original implementation places the stride at the first 1x1 convolution(self.conv1) | |
| # according to "Deep residual learning for image recognition" https://arxiv.org/abs/1512.03385. | |
| # This variant is also known as ResNet V1.5 and improves accuracy according to | |
| # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. | |
| def __init__( | |
| self, | |
| in_channels: int, | |
| out_channels: int, | |
| stride: int = 1, | |
| groups: int = 1, | |
| base_width: int = 64, | |
| dilation: int = 1, | |
| expansion: int = 4, | |
| norm_layer: Optional[Callable[..., nn.Module]] = None, | |
| **kwargs: Any, | |
| ) -> None: | |
| super().__init__() | |
| if norm_layer is None: | |
| norm_layer = nn.BatchNorm2d | |
| width = int(out_channels * (base_width / 64.0)) * groups | |
| self.expansion = expansion | |
| # Both self.conv2 and self.downsample layers downsample the input when stride != 1 | |
| self.conv1 = conv1x1(in_channels, width) | |
| self.bn1 = norm_layer(width) | |
| self.conv2 = conv3x3(width, width, stride, groups, dilation) | |
| self.bn2 = norm_layer(width) | |
| self.conv3 = conv1x1(width, out_channels * self.expansion) | |
| self.bn3 = norm_layer(out_channels * self.expansion) | |
| self.relu = nn.ReLU(inplace=True) | |
| self.stride = stride | |
| if in_channels != out_channels: | |
| self.downsample = nn.Sequential( | |
| conv1x1(in_channels, out_channels), | |
| nn.BatchNorm2d(out_channels), | |
| ) | |
| else: | |
| self.downsample = nn.Identity() | |
| def forward(self, x: Tensor) -> Tensor: | |
| identity = x | |
| out = self.conv1(x) | |
| out = self.bn1(out) | |
| out = self.relu(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| out = self.relu(out) | |
| out = self.conv3(out) | |
| out = self.bn3(out) | |
| out += self.downsample(identity) | |
| out = self.relu(out) | |
| return out | |
| def _init_weights(model: nn.Module) -> None: | |
| for m in model.modules(): | |
| if isinstance(m, nn.Conv2d): | |
| nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") | |
| if m.bias is not None: | |
| nn.init.constant_(m.bias, 0.) | |
| elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): | |
| nn.init.constant_(m.weight, 1.) | |
| if m.bias is not None: | |
| nn.init.constant_(m.bias, 0.) | |
| elif isinstance(m, nn.Linear): | |
| nn.init.normal_(m.weight, std=0.01) | |
| if m.bias is not None: | |
| nn.init.constant_(m.bias, 0.) | |
| class Upsample(nn.Module): | |
| def __init__( | |
| self, | |
| size: Union[int, Tuple[int, int]] = None, | |
| scale_factor: Union[float, Tuple[float, float]] = None, | |
| mode: str = "nearest", | |
| align_corners: bool = False, | |
| antialias: bool = False, | |
| ) -> None: | |
| super().__init__() | |
| self.interpolate = partial( | |
| F.interpolate, | |
| size=size, | |
| scale_factor=scale_factor, | |
| mode=mode, | |
| align_corners=align_corners, | |
| antialias=antialias, | |
| ) | |
| def forward(self, x: Tensor) -> Tensor: | |
| return self.interpolate(x) | |
| def make_vgg_layers(cfg: List[Union[str, int]], in_channels: int = 3, batch_norm: bool = False, dilation: int = 1) -> nn.Sequential: | |
| layers = [] | |
| for v in cfg: | |
| if v == "M": | |
| layers += [nn.MaxPool2d(kernel_size=2, stride=2)] | |
| elif v == "U": | |
| layers += [Upsample(scale_factor=2, mode="bilinear")] | |
| else: | |
| conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=dilation, dilation=dilation) | |
| if batch_norm: | |
| layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] | |
| else: | |
| layers += [conv2d, nn.ReLU(inplace=True)] | |
| in_channels = v | |
| return nn.Sequential(*layers) | |
| def make_resnet_layers( | |
| block: Union[BasicBlock, Bottleneck], | |
| cfg: List[Union[int, str]], | |
| in_channels: int, | |
| dilation: int = 1, | |
| expansion: int = 1, | |
| ) -> nn.Sequential: | |
| layers = [] | |
| for v in cfg: | |
| if v == "U": | |
| layers.append(Upsample(scale_factor=2, mode="bilinear")) | |
| else: | |
| layers.append(block( | |
| in_channels=in_channels, | |
| out_channels=v, | |
| dilation=dilation, | |
| expansion=expansion, | |
| )) | |
| in_channels = v | |
| layers = nn.Sequential(*layers) | |
| layers.apply(_init_weights) | |
| return layers | |