Spaces:
Runtime error
Runtime error
| # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # The gca code was heavily based on https://github.com/Yaoyi-Li/GCA-Matting | |
| # and https://github.com/open-mmlab/mmediting | |
| import paddle | |
| import paddle.nn as nn | |
| import paddle.nn.functional as F | |
| from paddleseg.cvlibs import manager, param_init | |
| from paddleseg.utils import utils | |
| from ppmatting.models.layers import GuidedCxtAtten | |
| class ResNet_D(nn.Layer): | |
| def __init__(self, | |
| input_channels, | |
| layers, | |
| late_downsample=False, | |
| pretrained=None): | |
| super().__init__() | |
| self.pretrained = pretrained | |
| self._norm_layer = nn.BatchNorm | |
| self.inplanes = 64 | |
| self.late_downsample = late_downsample | |
| self.midplanes = 64 if late_downsample else 32 | |
| self.start_stride = [1, 2, 1, 2] if late_downsample else [2, 1, 2, 1] | |
| self.conv1 = nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| input_channels, | |
| 32, | |
| kernel_size=3, | |
| stride=self.start_stride[0], | |
| padding=1, | |
| bias_attr=False)) | |
| self.conv2 = nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| 32, | |
| self.midplanes, | |
| kernel_size=3, | |
| stride=self.start_stride[1], | |
| padding=1, | |
| bias_attr=False)) | |
| self.conv3 = nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| self.midplanes, | |
| self.inplanes, | |
| kernel_size=3, | |
| stride=self.start_stride[2], | |
| padding=1, | |
| bias_attr=False)) | |
| self.bn1 = self._norm_layer(32) | |
| self.bn2 = self._norm_layer(self.midplanes) | |
| self.bn3 = self._norm_layer(self.inplanes) | |
| self.activation = nn.ReLU() | |
| self.layer1 = self._make_layer( | |
| BasicBlock, 64, layers[0], stride=self.start_stride[3]) | |
| self.layer2 = self._make_layer(BasicBlock, 128, layers[1], stride=2) | |
| self.layer3 = self._make_layer(BasicBlock, 256, layers[2], stride=2) | |
| self.layer_bottleneck = self._make_layer( | |
| BasicBlock, 512, layers[3], stride=2) | |
| self.init_weight() | |
| def _make_layer(self, block, planes, block_num, stride=1): | |
| if block_num == 0: | |
| return nn.Sequential(nn.Identity()) | |
| norm_layer = self._norm_layer | |
| downsample = None | |
| if stride != 1: | |
| downsample = nn.Sequential( | |
| nn.AvgPool2D(2, stride), | |
| nn.utils.spectral_norm( | |
| conv1x1(self.inplanes, planes * block.expansion)), | |
| norm_layer(planes * block.expansion), ) | |
| elif self.inplanes != planes * block.expansion: | |
| downsample = nn.Sequential( | |
| nn.utils.spectral_norm( | |
| conv1x1(self.inplanes, planes * block.expansion, stride)), | |
| norm_layer(planes * block.expansion), ) | |
| layers = [block(self.inplanes, planes, stride, downsample, norm_layer)] | |
| self.inplanes = planes * block.expansion | |
| for _ in range(1, block_num): | |
| layers.append(block(self.inplanes, planes, norm_layer=norm_layer)) | |
| return nn.Sequential(*layers) | |
| def forward(self, x): | |
| x = self.conv1(x) | |
| x = self.bn1(x) | |
| x = self.activation(x) | |
| x = self.conv2(x) | |
| x = self.bn2(x) | |
| x1 = self.activation(x) # N x 32 x 256 x 256 | |
| x = self.conv3(x1) | |
| x = self.bn3(x) | |
| x2 = self.activation(x) # N x 64 x 128 x 128 | |
| x3 = self.layer1(x2) # N x 64 x 128 x 128 | |
| x4 = self.layer2(x3) # N x 128 x 64 x 64 | |
| x5 = self.layer3(x4) # N x 256 x 32 x 32 | |
| x = self.layer_bottleneck(x5) # N x 512 x 16 x 16 | |
| return x, (x1, x2, x3, x4, x5) | |
| def init_weight(self): | |
| for layer in self.sublayers(): | |
| if isinstance(layer, nn.Conv2D): | |
| if hasattr(layer, "weight_orig"): | |
| param = layer.weight_orig | |
| else: | |
| param = layer.weight | |
| param_init.xavier_uniform(param) | |
| elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)): | |
| param_init.constant_init(layer.weight, value=1.0) | |
| param_init.constant_init(layer.bias, value=0.0) | |
| elif isinstance(layer, BasicBlock): | |
| param_init.constant_init(layer.bn2.weight, value=0.0) | |
| if self.pretrained is not None: | |
| utils.load_pretrained_model(self, self.pretrained) | |
| class ResShortCut_D(ResNet_D): | |
| def __init__(self, | |
| input_channels, | |
| layers, | |
| late_downsample=False, | |
| pretrained=None): | |
| super().__init__( | |
| input_channels, | |
| layers, | |
| late_downsample=late_downsample, | |
| pretrained=pretrained) | |
| self.shortcut_inplane = [input_channels, self.midplanes, 64, 128, 256] | |
| self.shortcut_plane = [32, self.midplanes, 64, 128, 256] | |
| self.shortcut = nn.LayerList() | |
| for stage, inplane in enumerate(self.shortcut_inplane): | |
| self.shortcut.append( | |
| self._make_shortcut(inplane, self.shortcut_plane[stage])) | |
| def _make_shortcut(self, inplane, planes): | |
| return nn.Sequential( | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| inplane, planes, kernel_size=3, padding=1, | |
| bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(planes), | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| planes, planes, kernel_size=3, padding=1, bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(planes)) | |
| def forward(self, x): | |
| out = self.conv1(x) | |
| out = self.bn1(out) | |
| out = self.activation(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| x1 = self.activation(out) # N x 32 x 256 x 256 | |
| out = self.conv3(x1) | |
| out = self.bn3(out) | |
| out = self.activation(out) | |
| x2 = self.layer1(out) # N x 64 x 128 x 128 | |
| x3 = self.layer2(x2) # N x 128 x 64 x 64 | |
| x4 = self.layer3(x3) # N x 256 x 32 x 32 | |
| out = self.layer_bottleneck(x4) # N x 512 x 16 x 16 | |
| fea1 = self.shortcut[0](x) # input image and trimap | |
| fea2 = self.shortcut[1](x1) | |
| fea3 = self.shortcut[2](x2) | |
| fea4 = self.shortcut[3](x3) | |
| fea5 = self.shortcut[4](x4) | |
| return out, { | |
| 'shortcut': (fea1, fea2, fea3, fea4, fea5), | |
| 'image': x[:, :3, ...] | |
| } | |
| class ResGuidedCxtAtten(ResNet_D): | |
| def __init__(self, | |
| input_channels, | |
| layers, | |
| late_downsample=False, | |
| pretrained=None): | |
| super().__init__( | |
| input_channels, | |
| layers, | |
| late_downsample=late_downsample, | |
| pretrained=pretrained) | |
| self.input_channels = input_channels | |
| self.shortcut_inplane = [input_channels, self.midplanes, 64, 128, 256] | |
| self.shortcut_plane = [32, self.midplanes, 64, 128, 256] | |
| self.shortcut = nn.LayerList() | |
| for stage, inplane in enumerate(self.shortcut_inplane): | |
| self.shortcut.append( | |
| self._make_shortcut(inplane, self.shortcut_plane[stage])) | |
| self.guidance_head = nn.Sequential( | |
| nn.Pad2D( | |
| 1, mode="reflect"), | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| 3, 16, kernel_size=3, padding=0, stride=2, | |
| bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(16), | |
| nn.Pad2D( | |
| 1, mode="reflect"), | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| 16, 32, kernel_size=3, padding=0, stride=2, | |
| bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(32), | |
| nn.Pad2D( | |
| 1, mode="reflect"), | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| 32, | |
| 128, | |
| kernel_size=3, | |
| padding=0, | |
| stride=2, | |
| bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(128)) | |
| self.gca = GuidedCxtAtten(128, 128) | |
| self.init_weight() | |
| def init_weight(self): | |
| for layer in self.sublayers(): | |
| if isinstance(layer, nn.Conv2D): | |
| initializer = nn.initializer.XavierUniform() | |
| if hasattr(layer, "weight_orig"): | |
| param = layer.weight_orig | |
| else: | |
| param = layer.weight | |
| initializer(param, param.block) | |
| elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)): | |
| param_init.constant_init(layer.weight, value=1.0) | |
| param_init.constant_init(layer.bias, value=0.0) | |
| elif isinstance(layer, BasicBlock): | |
| param_init.constant_init(layer.bn2.weight, value=0.0) | |
| if self.pretrained is not None: | |
| utils.load_pretrained_model(self, self.pretrained) | |
| def _make_shortcut(self, inplane, planes): | |
| return nn.Sequential( | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| inplane, planes, kernel_size=3, padding=1, | |
| bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(planes), | |
| nn.utils.spectral_norm( | |
| nn.Conv2D( | |
| planes, planes, kernel_size=3, padding=1, bias_attr=False)), | |
| nn.ReLU(), | |
| self._norm_layer(planes)) | |
| def forward(self, x): | |
| out = self.conv1(x) | |
| out = self.bn1(out) | |
| out = self.activation(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| x1 = self.activation(out) # N x 32 x 256 x 256 | |
| out = self.conv3(x1) | |
| out = self.bn3(out) | |
| out = self.activation(out) | |
| im_fea = self.guidance_head( | |
| x[:, :3, ...]) # downsample origin image and extract features | |
| if self.input_channels == 6: | |
| unknown = F.interpolate( | |
| x[:, 4:5, ...], scale_factor=1 / 8, mode='nearest') | |
| else: | |
| unknown = x[:, 3:, ...].equal(paddle.to_tensor([1.])) | |
| unknown = paddle.cast(unknown, dtype='float32') | |
| unknown = F.interpolate(unknown, scale_factor=1 / 8, mode='nearest') | |
| x2 = self.layer1(out) # N x 64 x 128 x 128 | |
| x3 = self.layer2(x2) # N x 128 x 64 x 64 | |
| x3 = self.gca(im_fea, x3, unknown) # contextual attention | |
| x4 = self.layer3(x3) # N x 256 x 32 x 32 | |
| out = self.layer_bottleneck(x4) # N x 512 x 16 x 16 | |
| fea1 = self.shortcut[0](x) # input image and trimap | |
| fea2 = self.shortcut[1](x1) | |
| fea3 = self.shortcut[2](x2) | |
| fea4 = self.shortcut[3](x3) | |
| fea5 = self.shortcut[4](x4) | |
| return out, { | |
| 'shortcut': (fea1, fea2, fea3, fea4, fea5), | |
| 'image_fea': im_fea, | |
| 'unknown': unknown, | |
| } | |
| class BasicBlock(nn.Layer): | |
| expansion = 1 | |
| def __init__(self, | |
| inplanes, | |
| planes, | |
| stride=1, | |
| downsample=None, | |
| norm_layer=None): | |
| super().__init__() | |
| if norm_layer is None: | |
| norm_layer = nn.BatchNorm | |
| # Both self.conv1 and self.downsample layers downsample the input when stride != 1 | |
| self.conv1 = nn.utils.spectral_norm(conv3x3(inplanes, planes, stride)) | |
| self.bn1 = norm_layer(planes) | |
| self.activation = nn.ReLU() | |
| self.conv2 = nn.utils.spectral_norm(conv3x3(planes, planes)) | |
| self.bn2 = norm_layer(planes) | |
| self.downsample = downsample | |
| self.stride = stride | |
| def forward(self, x): | |
| identity = x | |
| out = self.conv1(x) | |
| out = self.bn1(out) | |
| out = self.activation(out) | |
| out = self.conv2(out) | |
| out = self.bn2(out) | |
| if self.downsample is not None: | |
| identity = self.downsample(x) | |
| out += identity | |
| out = self.activation(out) | |
| return out | |
| def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): | |
| """3x3 convolution with padding""" | |
| return nn.Conv2D( | |
| in_planes, | |
| out_planes, | |
| kernel_size=3, | |
| stride=stride, | |
| padding=dilation, | |
| groups=groups, | |
| bias_attr=False, | |
| dilation=dilation) | |
| def conv1x1(in_planes, out_planes, stride=1): | |
| """1x1 convolution""" | |
| return nn.Conv2D( | |
| in_planes, out_planes, kernel_size=1, stride=stride, bias_attr=False) | |