Spaces:

venite
/

sat3density

Runtime error

App Files Files Community

sat3density / imaginaire /layers /conv.py

venite

initial

f670afc about 2 years ago

raw

history blame contribute delete

65.3 kB

	# Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	#
	# This work is made available under the Nvidia Source Code License-NC.
	# To view a copy of this license, check out LICENSE.md
	import warnings
	from types import SimpleNamespace

	import torch
	from torch import nn
	from torch.nn import functional as F

	from .misc import ApplyNoise
	from imaginaire.third_party.upfirdn2d.upfirdn2d import Blur


	class _BaseConvBlock(nn.Module):
	r"""An abstract wrapper class that wraps a torch convolution or linear layer
	with normalization and nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params, activation_norm_type, activation_norm_params, nonlinearity,
	inplace_nonlinearity, apply_noise, blur, order, input_dim, clamp, blur_kernel, output_scale,
	init_gain):
	super().__init__()
	from .nonlinearity import get_nonlinearity_layer
	from .weight_norm import get_weight_norm_layer
	from .activation_norm import get_activation_norm_layer
	self.weight_norm_type = weight_norm_type
	self.stride = stride
	self.clamp = clamp
	self.init_gain = init_gain

	# Nonlinearity layer.
	if 'fused' in nonlinearity:
	# Fusing nonlinearity with bias.
	lr_mul = getattr(weight_norm_params, 'lr_mul', 1)
	conv_before_nonlinearity = order.find('C') < order.find('A')
	if conv_before_nonlinearity:
	assert bias is True
	bias = False
	channel = out_channels if conv_before_nonlinearity else in_channels
	nonlinearity_layer = get_nonlinearity_layer(
	nonlinearity, inplace=inplace_nonlinearity,
	num_channels=channel, lr_mul=lr_mul)
	else:
	nonlinearity_layer = get_nonlinearity_layer(
	nonlinearity, inplace=inplace_nonlinearity)

	# Noise injection layer.
	if apply_noise:
	order = order.replace('C', 'CG')
	noise_layer = ApplyNoise()
	else:
	noise_layer = None

	# Convolutional layer.
	if blur:
	assert blur_kernel is not None
	if stride == 2:
	# Blur - Conv - Noise - Activate
	p = (len(blur_kernel) - 2) + (kernel_size - 1)
	pad0, pad1 = (p + 1) // 2, p // 2
	padding = 0
	blur_layer = Blur(
	blur_kernel, pad=(pad0, pad1), padding_mode=padding_mode
	)
	order = order.replace('C', 'BC')
	elif stride == 0.5:
	# Conv - Blur - Noise - Activate
	padding = 0
	p = (len(blur_kernel) - 2) - (kernel_size - 1)
	pad0, pad1 = (p + 1) // 2 + 1, p // 2 + 1
	blur_layer = Blur(
	blur_kernel, pad=(pad0, pad1), padding_mode=padding_mode
	)
	order = order.replace('C', 'CB')
	elif stride == 1:
	# No blur for now
	blur_layer = nn.Identity()
	else:
	raise NotImplementedError
	else:
	blur_layer = nn.Identity()

	if weight_norm_params is None:
	weight_norm_params = SimpleNamespace()
	weight_norm = get_weight_norm_layer(
	weight_norm_type, **vars(weight_norm_params))
	conv_layer = weight_norm(self._get_conv_layer(
	in_channels, out_channels, kernel_size, stride, padding, dilation,
	groups, bias, padding_mode, input_dim))

	# Normalization layer.
	conv_before_norm = order.find('C') < order.find('N')
	norm_channels = out_channels if conv_before_norm else in_channels
	if activation_norm_params is None:
	activation_norm_params = SimpleNamespace()
	activation_norm_layer = get_activation_norm_layer(
	norm_channels,
	activation_norm_type,
	input_dim,
	**vars(activation_norm_params))

	# Mapping from operation names to layers.
	mappings = {'C': {'conv': conv_layer},
	'N': {'norm': activation_norm_layer},
	'A': {'nonlinearity': nonlinearity_layer}}
	mappings.update({'B': {'blur': blur_layer}})
	mappings.update({'G': {'noise': noise_layer}})

	# All layers in order.
	self.layers = nn.ModuleDict()
	for op in order:
	if list(mappings[op].values())[0] is not None:
	self.layers.update(mappings[op])

	# Whether this block expects conditional inputs.
	self.conditional = \
	getattr(conv_layer, 'conditional', False) or \
	getattr(activation_norm_layer, 'conditional', False)

	# Scale the output by a learnable scaler parameter.
	if output_scale is not None:
	self.output_scale = nn.Parameter(torch.tensor(output_scale))
	else:
	self.register_parameter("output_scale", None)

	def forward(self, x, cond_inputs, *kw_cond_inputs):
	r"""

	Args:
	x (tensor): Input tensor.
	cond_inputs (list of tensors) : Conditional input tensors.
	kw_cond_inputs (dict) : Keyword conditional inputs.
	"""
	for key, layer in self.layers.items():
	if getattr(layer, 'conditional', False):
	# Layers that require conditional inputs.
	x = layer(x, cond_inputs, *kw_cond_inputs)
	else:
	x = layer(x)
	if self.clamp is not None and isinstance(layer, nn.Conv2d):
	x.clamp_(max=self.clamp)
	if key == 'conv':
	if self.output_scale is not None:
	x = x * self.output_scale
	return x

	def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	input_dim):
	# Returns the convolutional layer.
	if input_dim == 0:
	layer = nn.Linear(in_channels, out_channels, bias)
	else:
	if stride < 1: # Fractionally-strided convolution.
	padding_mode = 'zeros'
	assert padding == 0
	layer_type = getattr(nn, f'ConvTranspose{input_dim}d')
	stride = round(1 / stride)
	else:
	layer_type = getattr(nn, f'Conv{input_dim}d')
	layer = layer_type(
	in_channels, out_channels, kernel_size, stride, padding,
	dilation=dilation, groups=groups, bias=bias,
	padding_mode=padding_mode
	)

	return layer

	def __repr__(self):
	main_str = self._get_name() + '('
	child_lines = []
	for name, layer in self.layers.items():
	mod_str = repr(layer)
	if name == 'conv' and self.weight_norm_type != 'none' and \
	self.weight_norm_type != '':
	mod_str = mod_str[:-1] + \
	', weight_norm={}'.format(self.weight_norm_type) + ')'
	if name == 'conv' and getattr(layer, 'base_lr_mul', 1) != 1:
	mod_str = mod_str[:-1] + \
	', lr_mul={}'.format(layer.base_lr_mul) + ')'
	mod_str = self._addindent(mod_str, 2)
	child_lines.append(mod_str)
	if len(child_lines) == 1:
	main_str += child_lines[0]
	else:
	main_str += '\n ' + '\n '.join(child_lines) + '\n'

	main_str += ')'
	return main_str

	@staticmethod
	def _addindent(s_, numSpaces):
	s = s_.split('\n')
	# don't do anything for single-line stuff
	if len(s) == 1:
	return s_
	first = s.pop(0)
	s = [(numSpaces * ' ') + line for line in s]
	s = '\n'.join(s)
	s = first + '\n' + s
	return s


	class ModulatedConv2dBlock(_BaseConvBlock):
	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=True, blur=True, order='CNA', demodulate=True,
	eps=True, style_dim=None, clamp=None, blur_kernel=(1, 3, 3, 1), output_scale=None, init_gain=1.0):
	self.eps = eps
	self.demodulate = demodulate
	assert style_dim is not None

	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise, blur,
	order, 2, clamp, blur_kernel, output_scale, init_gain)
	self.modulation = LinearBlock(style_dim, in_channels,
	weight_norm_type=weight_norm_type,
	weight_norm_params=weight_norm_params)

	def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	input_dim):
	assert input_dim == 2
	layer = ModulatedConv2d(
	in_channels, out_channels, kernel_size, stride, padding,
	dilation, groups, bias, padding_mode, self.demodulate, self.eps)
	return layer

	def forward(self, x, cond_inputs, *kw_cond_inputs):
	for layer in self.layers.values():
	if getattr(layer, 'conditional', False):
	# Layers that require conditional inputs.
	assert len(cond_inputs) == 1
	style = cond_inputs[0]
	x = layer(
	x, self.modulation(style), **kw_cond_inputs
	)
	else:
	x = layer(x)
	if self.clamp is not None and isinstance(layer, ModulatedConv2d):
	x.clamp_(max=self.clamp)
	return x

	def __repr__(self):
	main_str = self._get_name() + '('
	child_lines = []
	for name, layer in self.layers.items():
	mod_str = repr(layer)
	if name == 'conv' and self.weight_norm_type != 'none' and \
	self.weight_norm_type != '':
	mod_str = mod_str[:-1] + \
	', weight_norm={}'.format(self.weight_norm_type) + \
	', demodulate={}'.format(self.demodulate) + ')'
	mod_str = self._addindent(mod_str, 2)
	child_lines.append(mod_str)
	child_lines.append(
	self._addindent('Modulation(' + repr(self.modulation) + ')', 2)
	)
	if len(child_lines) == 1:
	main_str += child_lines[0]
	else:
	main_str += '\n ' + '\n '.join(child_lines) + '\n'

	main_str += ')'
	return main_str


	class ModulatedConv2d(nn.Module):
	def __init__(self, in_channels, out_channels, kernel_size, stride, padding,
	dilation, groups, bias, padding_mode, demodulate=True,
	eps=1e-8):
	# in_channels, out_channels, kernel_size, stride, padding,
	# dilation, groups, bias, padding_mode
	assert dilation == 1 and groups == 1

	super().__init__()

	self.eps = eps
	self.kernel_size = kernel_size
	self.in_channels = in_channels
	self.out_channels = out_channels
	self.padding = padding
	self.stride = stride
	self.padding_mode = padding_mode
	# kernel_size // 2
	# assert self.padding == padding

	self.weight = nn.Parameter(
	torch.randn(out_channels, in_channels, kernel_size, kernel_size)
	)

	if bias:
	self.bias = nn.Parameter(torch.Tensor(out_channels))
	else:
	# noinspection PyTypeChecker
	self.register_parameter('bias', None)

	# self.modulation = LinearBlock(style_dim, in_channels,
	# weight_norm_type=weight_norm_type)
	self.demodulate = demodulate
	self.conditional = True

	def forward(self, x, style, **_kwargs):
	batch, in_channel, height, width = x.shape

	# style = self.modulation(style).view(batch, 1, in_channel, 1, 1)
	# We assume the modulation layer is outside this module.
	style = style.view(batch, 1, in_channel, 1, 1)
	weight = self.weight.unsqueeze(0) * style

	if self.demodulate:
	demod = torch.rsqrt(
	weight.pow(2).sum([2, 3, 4]) + self.eps)
	weight = weight * demod.view(batch, self.out_channels, 1, 1, 1)

	weight = weight.view(
	batch * self.out_channels,
	in_channel, self.kernel_size, self.kernel_size
	)
	if self.bias is not None:
	bias = self.bias.repeat(batch)
	else:
	bias = self.bias

	x = x.view(1, batch * in_channel, height, width)

	if self.padding_mode != 'zeros':
	x = F.pad(x, self._reversed_padding_repeated_twice,
	mode=self.padding_mode)
	padding = (0, 0)
	else:
	padding = self.padding

	if self.stride == 0.5:
	weight = weight.view(
	batch, self.out_channels, in_channel,
	self.kernel_size, self.kernel_size
	)
	weight = weight.transpose(1, 2).reshape(
	batch * in_channel, self.out_channels,
	self.kernel_size, self.kernel_size
	)
	out = F.conv_transpose2d(
	x, weight, bias, padding=padding, stride=2, groups=batch
	)

	elif self.stride == 2:
	out = F.conv2d(
	x, weight, bias, padding=padding, stride=2, groups=batch
	)

	else:
	out = F.conv2d(x, weight, bias, padding=padding, groups=batch)

	_, _, height, width = out.shape
	out = out.view(batch, self.out_channels, height, width)

	return out

	def extra_repr(self):
	s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}'
	', stride={stride}')
	if self.bias is None:
	s += ', bias=False'
	if self.padding_mode != 'zeros':
	s += ', padding_mode={padding_mode}'
	return s.format(**self.__dict__)


	class LinearBlock(_BaseConvBlock):
	r"""A Wrapper class that wraps ``torch.nn.Linear`` with normalization and
	nonlinearity.

	Args:
	in_features (int): Number of channels in the input tensor.
	out_features (int): Number of channels in the output tensor.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, add
	Gaussian noise with learnable magnitude after the
	fully-connected layer.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: fully-connected,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	"""

	def __init__(self, in_features, out_features, bias=True,
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, order='CNA', clamp=None, blur_kernel=(1, 3, 3, 1), output_scale=None,
	init_gain=1.0, **_kwargs):
	if bool(_kwargs):
	warnings.warn(f"Unused keyword arguments {_kwargs}")
	super().__init__(in_features, out_features, None, None,
	None, None, None, bias,
	None, weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise,
	False, order, 0, clamp, blur_kernel, output_scale,
	init_gain)


	class EmbeddingBlock(_BaseConvBlock):
	def __init__(self, in_features, out_features, bias=True,
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, order='CNA', clamp=None, output_scale=None,
	init_gain=1.0, **_kwargs):
	if bool(_kwargs):
	warnings.warn(f"Unused keyword arguments {_kwargs}")
	super().__init__(in_features, out_features, None, None,
	None, None, None, bias,
	None, weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise,
	False, order, 0, clamp, None, output_scale,
	init_gain)

	def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	input_dim):
	assert input_dim == 0
	return nn.Embedding(in_channels, out_channels)


	class Embedding2dBlock(_BaseConvBlock):
	def __init__(self, in_features, out_features, bias=True,
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, order='CNA', clamp=None, output_scale=None,
	init_gain=1.0, **_kwargs):
	if bool(_kwargs):
	warnings.warn(f"Unused keyword arguments {_kwargs}")
	super().__init__(in_features, out_features, None, None,
	None, None, None, bias,
	None, weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise,
	False, order, 0, clamp, None, output_scale,
	init_gain)

	def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	input_dim):
	assert input_dim == 0
	return Embedding2d(in_channels, out_channels)


	class Conv1dBlock(_BaseConvBlock):
	r"""A Wrapper class that wraps ``torch.nn.Conv1d`` with normalization and
	nonlinearity.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, blur=False, order='CNA', clamp=None, output_scale=None, init_gain=1.0, **_kwargs):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise,
	blur, order, 1, clamp, None, output_scale, init_gain)


	class Conv2dBlock(_BaseConvBlock):
	r"""A Wrapper class that wraps ``torch.nn.Conv2d`` with normalization and
	nonlinearity.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, blur=False, order='CNA', clamp=None, blur_kernel=(1, 3, 3, 1),
	output_scale=None, init_gain=1.0):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	apply_noise, blur, order, 2, clamp, blur_kernel, output_scale, init_gain)


	class Conv3dBlock(_BaseConvBlock):
	r"""A Wrapper class that wraps ``torch.nn.Conv3d`` with normalization and
	nonlinearity.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, blur=False, order='CNA', clamp=None, blur_kernel=(1, 3, 3, 1), output_scale=None,
	init_gain=1.0):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	apply_noise, blur, order, 3, clamp, blur_kernel, output_scale, init_gain)


	class _BaseHyperConvBlock(_BaseConvBlock):
	r"""An abstract wrapper class that wraps a hyper convolutional layer
	with normalization and nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias,
	padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise, blur,
	is_hyper_conv, is_hyper_norm, order, input_dim, clamp=None, blur_kernel=(1, 3, 3, 1),
	output_scale=None, init_gain=1.0):
	self.is_hyper_conv = is_hyper_conv
	if is_hyper_conv:
	weight_norm_type = 'none'
	if is_hyper_norm:
	activation_norm_type = 'hyper_' + activation_norm_type
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise, blur,
	order, input_dim, clamp, blur_kernel, output_scale, init_gain)

	def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	input_dim):
	if input_dim == 0:
	raise ValueError('HyperLinearBlock is not supported.')
	else:
	name = 'HyperConv' if self.is_hyper_conv else 'nn.Conv'
	layer_type = eval(name + '%dd' % input_dim)
	layer = layer_type(
	in_channels, out_channels, kernel_size, stride, padding,
	dilation, groups, bias, padding_mode)
	return layer


	class HyperConv2dBlock(_BaseHyperConvBlock):
	r"""A Wrapper class that wraps ``HyperConv2d`` with normalization and
	nonlinearity.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	is_hyper_conv (bool, optional, default=False): If ``True``, use
	``HyperConv2d``, otherwise use ``torch.nn.Conv2d``.
	is_hyper_norm (bool, optional, default=False): If ``True``, use
	hyper normalizations.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	is_hyper_conv=False, is_hyper_norm=False,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, blur=False, order='CNA', clamp=None):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise, blur,
	is_hyper_conv, is_hyper_norm, order, 2, clamp)


	class HyperConv2d(nn.Module):
	r"""Hyper Conv2d initialization.

	Args:
	in_channels (int): Dummy parameter.
	out_channels (int): Dummy parameter.
	kernel_size (int or tuple): Dummy parameter.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution. Default: 1
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	padding_mode (string, optional, default='zeros'):
	``'zeros'``, ``'reflect'``, ``'replicate'``
	or ``'circular'``.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True): If ``True``,
	adds a learnable bias to the output.
	"""

	def __init__(self, in_channels=0, out_channels=0, kernel_size=3,
	stride=1, padding=1, dilation=1, groups=1, bias=True,
	padding_mode='zeros'):
	super().__init__()
	self.stride = stride
	self.padding = padding
	self.dilation = dilation
	self.groups = groups
	self.use_bias = bias
	self.padding_mode = padding_mode
	self.conditional = True

	def forward(self, x, args, conv_weights=(None, None), *kwargs):
	r"""Hyper Conv2d forward. Convolve x using the provided weight and bias.

	Args:
	x (N x C x H x W tensor): Input tensor.
	conv_weights (N x C2 x C1 x k x k tensor or list of tensors):
	Convolution weights or [weight, bias].
	Returns:
	y (N x C2 x H x W tensor): Output tensor.
	"""
	if conv_weights is None:
	conv_weight, conv_bias = None, None
	elif isinstance(conv_weights, torch.Tensor):
	conv_weight, conv_bias = conv_weights, None
	else:
	conv_weight, conv_bias = conv_weights

	if conv_weight is None:
	return x
	if conv_bias is None:
	if self.use_bias:
	raise ValueError('bias not provided but set to true during '
	'initialization')
	conv_bias = [None] * x.size(0)
	if self.padding_mode != 'zeros':
	x = F.pad(x, [self.padding] * 4, mode=self.padding_mode)
	padding = 0
	else:
	padding = self.padding

	y = None
	# noinspection PyArgumentList
	for i in range(x.size(0)):
	if self.stride >= 1:
	yi = F.conv2d(x[i: i + 1],
	weight=conv_weight[i], bias=conv_bias[i],
	stride=self.stride, padding=padding,
	dilation=self.dilation, groups=self.groups)
	else:
	yi = F.conv_transpose2d(x[i: i + 1], weight=conv_weight[i],
	bias=conv_bias[i], padding=self.padding,
	stride=int(1 / self.stride),
	dilation=self.dilation,
	output_padding=self.padding,
	groups=self.groups)
	y = torch.cat([y, yi]) if y is not None else yi
	return y


	class _BasePartialConvBlock(_BaseConvBlock):
	r"""An abstract wrapper class that wraps a partial convolutional layer
	with normalization and nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	multi_channel, return_mask,
	apply_noise, order, input_dim, clamp=None, blur_kernel=(1, 3, 3, 1), output_scale=None, init_gain=1.0):
	self.multi_channel = multi_channel
	self.return_mask = return_mask
	self.partial_conv = True
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity, apply_noise,
	False, order, input_dim, clamp, blur_kernel, output_scale, init_gain)

	def _get_conv_layer(self, in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	input_dim):
	if input_dim == 2:
	layer_type = PartialConv2d
	elif input_dim == 3:
	layer_type = PartialConv3d
	else:
	raise ValueError('Partial conv only supports 2D and 3D conv now.')
	layer = layer_type(
	in_channels, out_channels, kernel_size, stride, padding,
	dilation, groups, bias, padding_mode,
	multi_channel=self.multi_channel, return_mask=self.return_mask)
	return layer

	def forward(self, x, cond_inputs, mask_in=None, *kw_cond_inputs):
	r"""

	Args:
	x (tensor): Input tensor.
	cond_inputs (list of tensors) : Conditional input tensors.
	mask_in (tensor, optional, default=``None``) If not ``None``,
	it masks the valid input region.
	kw_cond_inputs (dict) : Keyword conditional inputs.
	Returns:
	(tuple):
	- x (tensor): Output tensor.
	- mask_out (tensor, optional): Masks the valid output region.
	"""
	mask_out = None
	for layer in self.layers.values():
	if getattr(layer, 'conditional', False):
	x = layer(x, cond_inputs, *kw_cond_inputs)
	elif getattr(layer, 'partial_conv', False):
	x = layer(x, mask_in=mask_in, **kw_cond_inputs)
	if type(x) == tuple:
	x, mask_out = x
	else:
	x = layer(x)

	if mask_out is not None:
	return x, mask_out
	return x


	class PartialConv2dBlock(_BasePartialConvBlock):
	r"""A Wrapper class that wraps ``PartialConv2d`` with normalization and
	nonlinearity.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	multi_channel (bool, optional, default=False): If ``True``, use
	different masks for different channels.
	return_mask (bool, optional, default=True): If ``True``, the
	forward call also returns a new mask.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	multi_channel=False, return_mask=True,
	apply_noise=False, order='CNA', clamp=None):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	multi_channel, return_mask, apply_noise, order, 2,
	clamp)


	class PartialConv3dBlock(_BasePartialConvBlock):
	r"""A Wrapper class that wraps ``PartialConv3d`` with normalization and
	nonlinearity.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	multi_channel (bool, optional, default=False): If ``True``, use
	different masks for different channels.
	return_mask (bool, optional, default=True): If ``True``, the
	forward call also returns a new mask.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	multi_channel=False, return_mask=True,
	apply_noise=False, order='CNA', clamp=None):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	multi_channel, return_mask, apply_noise, order, 3,
	clamp)


	class _MultiOutBaseConvBlock(_BaseConvBlock):
	r"""An abstract wrapper class that wraps a hyper convolutional layer with
	normalization and nonlinearity. It can return multiple outputs, if some
	layers in the block return more than one output.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params, activation_norm_type, activation_norm_params, nonlinearity,
	inplace_nonlinearity, apply_noise, blur, order, input_dim, clamp=None, blur_kernel=(1, 3, 3, 1),
	output_scale=None, init_gain=1.0):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	apply_noise, blur, order, input_dim, clamp, blur_kernel, output_scale, init_gain)
	self.multiple_outputs = True

	def forward(self, x, cond_inputs, *kw_cond_inputs):
	r"""

	Args:
	x (tensor): Input tensor.
	cond_inputs (list of tensors) : Conditional input tensors.
	kw_cond_inputs (dict) : Keyword conditional inputs.
	Returns:
	(tuple):
	- x (tensor): Main output tensor.
	- other_outputs (list of tensors): Other output tensors.
	"""
	other_outputs = []
	for layer in self.layers.values():
	if getattr(layer, 'conditional', False):
	x = layer(x, cond_inputs, *kw_cond_inputs)
	if getattr(layer, 'multiple_outputs', False):
	x, other_output = layer(x)
	other_outputs.append(other_output)
	else:
	x = layer(x)
	return (x, *other_outputs)


	class MultiOutConv2dBlock(_MultiOutBaseConvBlock):
	r"""A Wrapper class that wraps ``torch.nn.Conv2d`` with normalization and
	nonlinearity. It can return multiple outputs, if some layers in the block
	return more than one output.

	Args:
	in_channels (int): Number of channels in the input tensor.
	out_channels (int): Number of channels in the output tensor.
	kernel_size (int or tuple): Size of the convolving kernel.
	stride (int or float or tuple, optional, default=1):
	Stride of the convolution.
	padding (int or tuple, optional, default=0):
	Zero-padding added to both sides of the input.
	dilation (int or tuple, optional, default=1):
	Spacing between kernel elements.
	groups (int, optional, default=1): Number of blocked connections
	from input channels to output channels.
	bias (bool, optional, default=True):
	If ``True``, adds a learnable bias to the output.
	padding_mode (string, optional, default='zeros'): Type of padding:
	``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``.
	weight_norm_type (str, optional, default='none'):
	Type of weight normalization.
	``'none'``, ``'spectral'``, ``'weight'``
	or ``'weight_demod'``.
	weight_norm_params (obj, optional, default=None):
	Parameters of weight normalization.
	If not ``None``, ``weight_norm_params.__dict__`` will be used as
	keyword arguments when initializing weight normalization.
	activation_norm_type (str, optional, default='none'):
	Type of activation normalization.
	``'none'``, ``'instance'``, ``'batch'``, ``'sync_batch'``,
	``'layer'``, ``'layer_2d'``, ``'group'``, ``'adaptive'``,
	``'spatially_adaptive'`` or ``'hyper_spatially_adaptive'``.
	activation_norm_params (obj, optional, default=None):
	Parameters of activation normalization.
	If not ``None``, ``activation_norm_params.__dict__`` will be used as
	keyword arguments when initializing activation normalization.
	nonlinearity (str, optional, default='none'):
	Type of nonlinear activation function.
	``'none'``, ``'relu'``, ``'leakyrelu'``, ``'prelu'``,
	``'tanh'`` , ``'sigmoid'`` or ``'softmax'``.
	inplace_nonlinearity (bool, optional, default=False): If ``True``,
	set ``inplace=True`` when initializing the nonlinearity layer.
	apply_noise (bool, optional, default=False): If ``True``, adds
	Gaussian noise with learnable magnitude to the convolution output.
	order (str, optional, default='CNA'): Order of operations.
	``'C'``: convolution,
	``'N'``: normalization,
	``'A'``: nonlinear activation.
	For example, a block initialized with ``order='CNA'`` will
	do convolution first, then normalization, then nonlinearity.
	"""

	def __init__(self, in_channels, out_channels, kernel_size, stride=1,
	padding=0, dilation=1, groups=1, bias=True,
	padding_mode='zeros',
	weight_norm_type='none', weight_norm_params=None,
	activation_norm_type='none', activation_norm_params=None,
	nonlinearity='none', inplace_nonlinearity=False,
	apply_noise=False, blur=False, order='CNA', clamp=None):
	super().__init__(in_channels, out_channels, kernel_size, stride,
	padding, dilation, groups, bias, padding_mode,
	weight_norm_type, weight_norm_params,
	activation_norm_type, activation_norm_params,
	nonlinearity, inplace_nonlinearity,
	apply_noise, blur, order, 2, clamp)


	###############################################################################
	# BSD 3-Clause License
	#
	# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
	#
	# Author & Contact: Guilin Liu (guilinl@nvidia.com)
	###############################################################################
	class PartialConv2d(nn.Conv2d):
	r"""Partial 2D convolution in
	"Image inpainting for irregular holes using partial convolutions."
	Liu et al., ECCV 2018
	"""

	def __init__(self, args, multi_channel=False, return_mask=True, *kwargs):
	# whether the mask is multi-channel or not
	self.multi_channel = multi_channel
	self.return_mask = return_mask
	super(PartialConv2d, self).__init__(args, *kwargs)

	if self.multi_channel:
	self.weight_maskUpdater = torch.ones(self.out_channels,
	self.in_channels,
	self.kernel_size[0],
	self.kernel_size[1])
	else:
	self.weight_maskUpdater = torch.ones(1, 1, self.kernel_size[0],
	self.kernel_size[1])

	shape = self.weight_maskUpdater.shape
	self.slide_winsize = shape[1] * shape[2] * shape[3]

	self.last_size = (None, None, None, None)
	self.update_mask = None
	self.mask_ratio = None
	self.partial_conv = True

	def forward(self, x, mask_in=None):
	r"""

	Args:
	x (tensor): Input tensor.
	mask_in (tensor, optional, default=``None``) If not ``None``,
	it masks the valid input region.
	"""
	assert len(x.shape) == 4
	if mask_in is not None or self.last_size != tuple(x.shape):
	self.last_size = tuple(x.shape)

	with torch.no_grad():
	if self.weight_maskUpdater.type() != x.type():
	self.weight_maskUpdater = self.weight_maskUpdater.to(x)

	if mask_in is None:
	# If mask is not provided, create a mask.
	if self.multi_channel:
	mask = torch.ones(x.data.shape[0],
	x.data.shape[1],
	x.data.shape[2],
	x.data.shape[3]).to(x)
	else:
	mask = torch.ones(1, 1, x.data.shape[2],
	x.data.shape[3]).to(x)
	else:
	mask = mask_in

	self.update_mask = F.conv2d(mask, self.weight_maskUpdater,
	bias=None, stride=self.stride,
	padding=self.padding,
	dilation=self.dilation, groups=1)

	# For mixed precision training, eps from 1e-8 to 1e-6.
	eps = 1e-6
	self.mask_ratio = self.slide_winsize / (self.update_mask + eps)
	self.update_mask = torch.clamp(self.update_mask, 0, 1)
	self.mask_ratio = torch.mul(self.mask_ratio, self.update_mask)

	raw_out = super(PartialConv2d, self).forward(
	torch.mul(x, mask) if mask_in is not None else x)

	if self.bias is not None:
	bias_view = self.bias.view(1, self.out_channels, 1, 1)
	output = torch.mul(raw_out - bias_view, self.mask_ratio) + bias_view
	output = torch.mul(output, self.update_mask)
	else:
	output = torch.mul(raw_out, self.mask_ratio)

	if self.return_mask:
	return output, self.update_mask
	else:
	return output


	class PartialConv3d(nn.Conv3d):
	r"""Partial 3D convolution in
	"Image inpainting for irregular holes using partial convolutions."
	Liu et al., ECCV 2018
	"""

	def __init__(self, args, multi_channel=False, return_mask=True, *kwargs):
	# whether the mask is multi-channel or not
	self.multi_channel = multi_channel
	self.return_mask = return_mask
	super(PartialConv3d, self).__init__(args, *kwargs)

	if self.multi_channel:
	self.weight_maskUpdater = \
	torch.ones(self.out_channels, self.in_channels,
	self.kernel_size[0], self.kernel_size[1],
	self.kernel_size[2])
	else:
	self.weight_maskUpdater = torch.ones(1, 1, self.kernel_size[0],
	self.kernel_size[1],
	self.kernel_size[2])
	self.weight_maskUpdater = self.weight_maskUpdater.to('cuda')

	shape = self.weight_maskUpdater.shape
	self.slide_winsize = shape[1] * shape[2] * shape[3] * shape[4]
	self.partial_conv = True

	def forward(self, x, mask_in=None):
	r"""

	Args:
	x (tensor): Input tensor.
	mask_in (tensor, optional, default=``None``) If not ``None``, it
	masks the valid input region.
	"""
	assert len(x.shape) == 5

	with torch.no_grad():
	mask = mask_in
	update_mask = F.conv3d(mask, self.weight_maskUpdater, bias=None,
	stride=self.stride, padding=self.padding,
	dilation=self.dilation, groups=1)

	mask_ratio = self.slide_winsize / (update_mask + 1e-8)
	update_mask = torch.clamp(update_mask, 0, 1)
	mask_ratio = torch.mul(mask_ratio, update_mask)

	raw_out = super(PartialConv3d, self).forward(torch.mul(x, mask_in))

	if self.bias is not None:
	bias_view = self.bias.view(1, self.out_channels, 1, 1, 1)
	output = torch.mul(raw_out - bias_view, mask_ratio) + bias_view
	if mask_in is not None:
	output = torch.mul(output, update_mask)
	else:
	output = torch.mul(raw_out, mask_ratio)

	if self.return_mask:
	return output, update_mask
	else:
	return output


	class Embedding2d(nn.Embedding):
	def __init__(self, in_channels, out_channels):
	super().__init__(in_channels, out_channels)

	def forward(self, x):
	return F.embedding(
	x.squeeze(1).long(), self.weight, self.padding_idx, self.max_norm,
	self.norm_type, self.scale_grad_by_freq, self.sparse).permute(0, 3, 1, 2).contiguous()