Spaces:
Runtime error
Runtime error
| # Lint as: python2, python3 | |
| # Copyright 2019 The TensorFlow Authors All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Augment slim.conv2d with optional Weight Standardization (WS). | |
| WS is a normalization method to accelerate micro-batch training. When used with | |
| Group Normalization and trained with 1 image/GPU, WS is able to match or | |
| outperform the performances of BN trained with large batch sizes. | |
| [1] Siyuan Qiao, Huiyu Wang, Chenxi Liu, Wei Shen, Alan Yuille | |
| Weight Standardization. arXiv:1903.10520 | |
| [2] Lei Huang, Xianglong Liu, Yang Liu, Bo Lang, Dacheng Tao | |
| Centered Weight Normalization in Accelerating Training of Deep Neural | |
| Networks. ICCV 2017 | |
| """ | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import tensorflow as tf | |
| from tensorflow.contrib import framework as contrib_framework | |
| from tensorflow.contrib import layers as contrib_layers | |
| from tensorflow.contrib.layers.python.layers import layers | |
| from tensorflow.contrib.layers.python.layers import utils | |
| class Conv2D(tf.keras.layers.Conv2D, tf.layers.Layer): | |
| """2D convolution layer (e.g. spatial convolution over images). | |
| This layer creates a convolution kernel that is convolved | |
| (actually cross-correlated) with the layer input to produce a tensor of | |
| outputs. If `use_bias` is True (and a `bias_initializer` is provided), | |
| a bias vector is created and added to the outputs. Finally, if | |
| `activation` is not `None`, it is applied to the outputs as well. | |
| """ | |
| def __init__(self, | |
| filters, | |
| kernel_size, | |
| strides=(1, 1), | |
| padding='valid', | |
| data_format='channels_last', | |
| dilation_rate=(1, 1), | |
| activation=None, | |
| use_bias=True, | |
| kernel_initializer=None, | |
| bias_initializer=tf.zeros_initializer(), | |
| kernel_regularizer=None, | |
| bias_regularizer=None, | |
| use_weight_standardization=False, | |
| activity_regularizer=None, | |
| kernel_constraint=None, | |
| bias_constraint=None, | |
| trainable=True, | |
| name=None, | |
| **kwargs): | |
| """Constructs the 2D convolution layer. | |
| Args: | |
| filters: Integer, the dimensionality of the output space (i.e. the number | |
| of filters in the convolution). | |
| kernel_size: An integer or tuple/list of 2 integers, specifying the height | |
| and width of the 2D convolution window. Can be a single integer to | |
| specify the same value for all spatial dimensions. | |
| strides: An integer or tuple/list of 2 integers, specifying the strides of | |
| the convolution along the height and width. Can be a single integer to | |
| specify the same value for all spatial dimensions. Specifying any stride | |
| value != 1 is incompatible with specifying any `dilation_rate` value != | |
| 1. | |
| padding: One of `"valid"` or `"same"` (case-insensitive). | |
| data_format: A string, one of `channels_last` (default) or | |
| `channels_first`. The ordering of the dimensions in the inputs. | |
| `channels_last` corresponds to inputs with shape `(batch, height, width, | |
| channels)` while `channels_first` corresponds to inputs with shape | |
| `(batch, channels, height, width)`. | |
| dilation_rate: An integer or tuple/list of 2 integers, specifying the | |
| dilation rate to use for dilated convolution. Can be a single integer to | |
| specify the same value for all spatial dimensions. Currently, specifying | |
| any `dilation_rate` value != 1 is incompatible with specifying any | |
| stride value != 1. | |
| activation: Activation function. Set it to None to maintain a linear | |
| activation. | |
| use_bias: Boolean, whether the layer uses a bias. | |
| kernel_initializer: An initializer for the convolution kernel. | |
| bias_initializer: An initializer for the bias vector. If None, the default | |
| initializer will be used. | |
| kernel_regularizer: Optional regularizer for the convolution kernel. | |
| bias_regularizer: Optional regularizer for the bias vector. | |
| use_weight_standardization: Boolean, whether the layer uses weight | |
| standardization. | |
| activity_regularizer: Optional regularizer function for the output. | |
| kernel_constraint: Optional projection function to be applied to the | |
| kernel after being updated by an `Optimizer` (e.g. used to implement | |
| norm constraints or value constraints for layer weights). The function | |
| must take as input the unprojected variable and must return the | |
| projected variable (which must have the same shape). Constraints are not | |
| safe to use when doing asynchronous distributed training. | |
| bias_constraint: Optional projection function to be applied to the bias | |
| after being updated by an `Optimizer`. | |
| trainable: Boolean, if `True` also add variables to the graph collection | |
| `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). | |
| name: A string, the name of the layer. | |
| **kwargs: Arbitrary keyword arguments passed to tf.keras.layers.Conv2D | |
| """ | |
| super(Conv2D, self).__init__( | |
| filters=filters, | |
| kernel_size=kernel_size, | |
| strides=strides, | |
| padding=padding, | |
| data_format=data_format, | |
| dilation_rate=dilation_rate, | |
| activation=activation, | |
| use_bias=use_bias, | |
| kernel_initializer=kernel_initializer, | |
| bias_initializer=bias_initializer, | |
| kernel_regularizer=kernel_regularizer, | |
| bias_regularizer=bias_regularizer, | |
| activity_regularizer=activity_regularizer, | |
| kernel_constraint=kernel_constraint, | |
| bias_constraint=bias_constraint, | |
| trainable=trainable, | |
| name=name, | |
| **kwargs) | |
| self.use_weight_standardization = use_weight_standardization | |
| def call(self, inputs): | |
| if self.use_weight_standardization: | |
| mean, var = tf.nn.moments(self.kernel, [0, 1, 2], keep_dims=True) | |
| kernel = (self.kernel - mean) / tf.sqrt(var + 1e-5) | |
| outputs = self._convolution_op(inputs, kernel) | |
| else: | |
| outputs = self._convolution_op(inputs, self.kernel) | |
| if self.use_bias: | |
| if self.data_format == 'channels_first': | |
| if self.rank == 1: | |
| # tf.nn.bias_add does not accept a 1D input tensor. | |
| bias = tf.reshape(self.bias, (1, self.filters, 1)) | |
| outputs += bias | |
| else: | |
| outputs = tf.nn.bias_add(outputs, self.bias, data_format='NCHW') | |
| else: | |
| outputs = tf.nn.bias_add(outputs, self.bias, data_format='NHWC') | |
| if self.activation is not None: | |
| return self.activation(outputs) | |
| return outputs | |
| def conv2d(inputs, | |
| num_outputs, | |
| kernel_size, | |
| stride=1, | |
| padding='SAME', | |
| data_format=None, | |
| rate=1, | |
| activation_fn=tf.nn.relu, | |
| normalizer_fn=None, | |
| normalizer_params=None, | |
| weights_initializer=contrib_layers.xavier_initializer(), | |
| weights_regularizer=None, | |
| biases_initializer=tf.zeros_initializer(), | |
| biases_regularizer=None, | |
| use_weight_standardization=False, | |
| reuse=None, | |
| variables_collections=None, | |
| outputs_collections=None, | |
| trainable=True, | |
| scope=None): | |
| """Adds a 2D convolution followed by an optional batch_norm layer. | |
| `convolution` creates a variable called `weights`, representing the | |
| convolutional kernel, that is convolved (actually cross-correlated) with the | |
| `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is | |
| provided (such as `batch_norm`), it is then applied. Otherwise, if | |
| `normalizer_fn` is None and a `biases_initializer` is provided then a `biases` | |
| variable would be created and added the activations. Finally, if | |
| `activation_fn` is not `None`, it is applied to the activations as well. | |
| Performs atrous convolution with input stride/dilation rate equal to `rate` | |
| if a value > 1 for any dimension of `rate` is specified. In this case | |
| `stride` values != 1 are not supported. | |
| Args: | |
| inputs: A Tensor of rank N+2 of shape `[batch_size] + input_spatial_shape + | |
| [in_channels]` if data_format does not start with "NC" (default), or | |
| `[batch_size, in_channels] + input_spatial_shape` if data_format starts | |
| with "NC". | |
| num_outputs: Integer, the number of output filters. | |
| kernel_size: A sequence of N positive integers specifying the spatial | |
| dimensions of the filters. Can be a single integer to specify the same | |
| value for all spatial dimensions. | |
| stride: A sequence of N positive integers specifying the stride at which to | |
| compute output. Can be a single integer to specify the same value for all | |
| spatial dimensions. Specifying any `stride` value != 1 is incompatible | |
| with specifying any `rate` value != 1. | |
| padding: One of `"VALID"` or `"SAME"`. | |
| data_format: A string or None. Specifies whether the channel dimension of | |
| the `input` and output is the last dimension (default, or if `data_format` | |
| does not start with "NC"), or the second dimension (if `data_format` | |
| starts with "NC"). For N=1, the valid values are "NWC" (default) and | |
| "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW". For | |
| N=3, the valid values are "NDHWC" (default) and "NCDHW". | |
| rate: A sequence of N positive integers specifying the dilation rate to use | |
| for atrous convolution. Can be a single integer to specify the same value | |
| for all spatial dimensions. Specifying any `rate` value != 1 is | |
| incompatible with specifying any `stride` value != 1. | |
| activation_fn: Activation function. The default value is a ReLU function. | |
| Explicitly set it to None to skip it and maintain a linear activation. | |
| normalizer_fn: Normalization function to use instead of `biases`. If | |
| `normalizer_fn` is provided then `biases_initializer` and | |
| `biases_regularizer` are ignored and `biases` are not created nor added. | |
| default set to None for no normalizer function | |
| normalizer_params: Normalization function parameters. | |
| weights_initializer: An initializer for the weights. | |
| weights_regularizer: Optional regularizer for the weights. | |
| biases_initializer: An initializer for the biases. If None skip biases. | |
| biases_regularizer: Optional regularizer for the biases. | |
| use_weight_standardization: Boolean, whether the layer uses weight | |
| standardization. | |
| reuse: Whether or not the layer and its variables should be reused. To be | |
| able to reuse the layer scope must be given. | |
| variables_collections: Optional list of collections for all the variables or | |
| a dictionary containing a different list of collection per variable. | |
| outputs_collections: Collection to add the outputs. | |
| trainable: If `True` also add variables to the graph collection | |
| `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable). | |
| scope: Optional scope for `variable_scope`. | |
| Returns: | |
| A tensor representing the output of the operation. | |
| Raises: | |
| ValueError: If `data_format` is invalid. | |
| ValueError: Both 'rate' and `stride` are not uniformly 1. | |
| """ | |
| if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']: | |
| raise ValueError('Invalid data_format: %r' % (data_format,)) | |
| # pylint: disable=protected-access | |
| layer_variable_getter = layers._build_variable_getter({ | |
| 'bias': 'biases', | |
| 'kernel': 'weights' | |
| }) | |
| # pylint: enable=protected-access | |
| with tf.variable_scope( | |
| scope, 'Conv', [inputs], reuse=reuse, | |
| custom_getter=layer_variable_getter) as sc: | |
| inputs = tf.convert_to_tensor(inputs) | |
| input_rank = inputs.get_shape().ndims | |
| if input_rank != 4: | |
| raise ValueError('Convolution expects input with rank %d, got %d' % | |
| (4, input_rank)) | |
| data_format = ('channels_first' if data_format and | |
| data_format.startswith('NC') else 'channels_last') | |
| layer = Conv2D( | |
| filters=num_outputs, | |
| kernel_size=kernel_size, | |
| strides=stride, | |
| padding=padding, | |
| data_format=data_format, | |
| dilation_rate=rate, | |
| activation=None, | |
| use_bias=not normalizer_fn and biases_initializer, | |
| kernel_initializer=weights_initializer, | |
| bias_initializer=biases_initializer, | |
| kernel_regularizer=weights_regularizer, | |
| bias_regularizer=biases_regularizer, | |
| use_weight_standardization=use_weight_standardization, | |
| activity_regularizer=None, | |
| trainable=trainable, | |
| name=sc.name, | |
| dtype=inputs.dtype.base_dtype, | |
| _scope=sc, | |
| _reuse=reuse) | |
| outputs = layer.apply(inputs) | |
| # Add variables to collections. | |
| # pylint: disable=protected-access | |
| layers._add_variable_to_collections(layer.kernel, variables_collections, | |
| 'weights') | |
| if layer.use_bias: | |
| layers._add_variable_to_collections(layer.bias, variables_collections, | |
| 'biases') | |
| # pylint: enable=protected-access | |
| if normalizer_fn is not None: | |
| normalizer_params = normalizer_params or {} | |
| outputs = normalizer_fn(outputs, **normalizer_params) | |
| if activation_fn is not None: | |
| outputs = activation_fn(outputs) | |
| return utils.collect_named_outputs(outputs_collections, sc.name, outputs) | |
| def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): | |
| """Strided 2-D convolution with 'SAME' padding. | |
| When stride > 1, then we do explicit zero-padding, followed by conv2d with | |
| 'VALID' padding. | |
| Note that | |
| net = conv2d_same(inputs, num_outputs, 3, stride=stride) | |
| is equivalent to | |
| net = conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') | |
| net = subsample(net, factor=stride) | |
| whereas | |
| net = conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') | |
| is different when the input's height or width is even, which is why we add the | |
| current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). | |
| Args: | |
| inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. | |
| num_outputs: An integer, the number of output filters. | |
| kernel_size: An int with the kernel_size of the filters. | |
| stride: An integer, the output stride. | |
| rate: An integer, rate for atrous convolution. | |
| scope: Scope. | |
| Returns: | |
| output: A 4-D tensor of size [batch, height_out, width_out, channels] with | |
| the convolution output. | |
| """ | |
| if stride == 1: | |
| return conv2d( | |
| inputs, | |
| num_outputs, | |
| kernel_size, | |
| stride=1, | |
| rate=rate, | |
| padding='SAME', | |
| scope=scope) | |
| else: | |
| kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) | |
| pad_total = kernel_size_effective - 1 | |
| pad_beg = pad_total // 2 | |
| pad_end = pad_total - pad_beg | |
| inputs = tf.pad(inputs, | |
| [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) | |
| return conv2d( | |
| inputs, | |
| num_outputs, | |
| kernel_size, | |
| stride=stride, | |
| rate=rate, | |
| padding='VALID', | |
| scope=scope) | |