Spaces:

csxmli
/

marconetplusplus

Running on Zero

App Files Files Community

marconetplusplus / networks /psp_encoder_arch.py

csxmli

Upload

981b0ab verified 3 months ago

raw

history blame contribute delete

12 kB

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from torch.nn import BatchNorm2d as BatchNorm

	import math
	from .prior_arch import PixelNorm, EqualLinear
	import torchvision
	from torchvision.utils import save_image

	def GroupNorm(in_channels):
	return torch.nn.GroupNorm(num_groups=in_channels//16, num_channels=in_channels, eps=1e-6, affine=False)

	Norm = GroupNorm

	class BasicBlock(nn.Module):
	def __init__(self, inplanes, planes, stride=1, downsample=None):
	super(BasicBlock, self).__init__()
	self.conv1 = conv1x1(inplanes, planes)
	self.norm1 = Norm(planes)
	self.relu1 = nn.LeakyReLU(0.2)
	self.conv2 = conv3x3(planes, planes, stride)
	self.relu2 = nn.LeakyReLU(0.2)
	self.norm2 = Norm(planes)
	self.downsample = downsample
	self.stride = stride
	self.relu3 = nn.LeakyReLU(0.2)

	def forward(self, x):
	residual = x
	out = self.conv1(x)
	out = self.norm1(out)
	out = self.relu1(out)
	out = self.conv2(out)
	out = self.norm2(out)
	out = self.relu2(out)
	if self.downsample is not None:
	residual = self.downsample(x)
	out = out + residual
	out = self.relu3(out)
	return out


	class PSPEncoder(nn.Module):
	def __init__(self, block=BasicBlock, layers=[3, 4, 6, 6, 3], strides=[(2,2),(1,2),(2,2),(1,2),(2,2)]):
	self.inplanes = 32
	super(PSPEncoder, self).__init__()
	self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1,
	bias=False)
	self.relu = nn.LeakyReLU(0.2)

	feature_out_dim = 256
	self.layer1 = self._make_layer(block, 32, layers[0], stride=strides[0])
	self.layer2 = self._make_layer(block, 64, layers[1], stride=strides[1])
	self.layer3 = self._make_layer(block, 128, layers[2], stride=strides[2])
	self.layer4 = self._make_layer(block, 256, layers[3], stride=strides[3])
	self.layer5 = self._make_layer(block, 512, layers[4], stride=strides[4])

	self.layer512_to_outdim = nn.Sequential(
	nn.Conv2d(512, feature_out_dim, kernel_size=1, stride=1, bias=False),
	nn.LeakyReLU(0.2)
	)
	self.layer256_to_512 = nn.Sequential(
	nn.Conv2d(256, 512, kernel_size=1, stride=1, bias=False),
	nn.LeakyReLU(0.2)
	)


	self.down_h = 1
	for stride in strides:
	self.down_h *= stride[0]
	self.size_h = 32 // self.down_h * 2


	self.feature2w = nn.Sequential(
	PixelNorm(),
	EqualLinear(self.size_hself.size_hfeature_out_dim, 512, bias=True, bias_init_val=0, lr_mul=1, activation='fused_lrelu'),
	EqualLinear(512, 512, bias=True, bias_init_val=0, lr_mul=1, activation='fused_lrelu'),
	)

	for m in self.modules():
	if isinstance(m, nn.Conv2d):
	n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
	m.weight.data.normal_(0, math.sqrt(2. / n))


	def _make_layer(self, block, planes, blocks, stride=1):
	downsample = None
	if stride != 1 or self.inplanes != planes:
	downsample = nn.Sequential(
	nn.Conv2d(self.inplanes, planes,
	kernel_size=1, stride=stride, bias=False),
	nn.LeakyReLU(0.2)
	)
	# GroupNorm(planes),
	layers = []
	layers.append(block(self.inplanes, planes, stride, downsample))
	self.inplanes = planes
	for i in range(1, blocks):
	layers.append(block(self.inplanes, planes))
	return nn.Sequential(*layers)


	def _check_outliers(self, crop_feature, target_width):
	B, C, H, W = crop_feature.size()
	if W != target_width:
	return F.interpolate(crop_feature, size=(H, target_width), mode='bilinear', align_corners=True)
	else:
	return crop_feature

	def _check_outliers_pad(self, crop_feature, start, end, max_lr_width, center_loc, extend_W):
	_, _, H, W = crop_feature.size()
	fill_value = crop_feature.mean().item()
	if start == 0 and end == max_lr_width:
	crop_feature = torchvision.transforms.Pad([extend_W//2-center_loc, 0, extend_W-W-(extend_W//2-center_loc), 0], fill=fill_value, padding_mode='constant')(crop_feature)
	else:
	if start == 0:
	crop_feature = torchvision.transforms.Pad([extend_W-W, 0, 0, 0], fill=fill_value, padding_mode='constant')(crop_feature)
	if end == max_lr_width:
	crop_feature = torchvision.transforms.Pad([0, 0, extend_W-W, 0], fill=fill_value, padding_mode='constant')(crop_feature)

	# if crop_feature.size(3) != extend_W:
	# print([222, crop_feature.size(), extend_W])
	# crop_feature = torchvision.transforms.Pad([(extend_W-W)//2, 0, extend_W-W-(extend_W-W)//2, 0], fill=0, padding_mode='constant')(crop_feature)

	return crop_feature


	def forward(self, x, locs):
	w_b = []
	extend_W = 32*4
	max_lr_width = x.size(3)
	for b in range(locs.size(0)): #locs: 0~2048
	x_for_w = []
	for c in range(locs.size(1)):
	center_loc = (locs[b][c]/4).int()
	start_x = max(0, center_loc - extend_W//2)
	end_x = min(center_loc + extend_W//2, max_lr_width)
	crop_x = x[b:b+1, :, :, start_x:end_x].detach()
	crop_x = self._check_outliers_pad(crop_x, start_x, end_x, max_lr_width, center_loc, extend_W) #

	x_for_w.append(crop_x)
	# crop_x[...,62:66] = 1
	# save_image((crop_x+1)/2, 'trs_{}.png'.format(c))

	x_for_w = torch.cat(x_for_w, dim=0)

	x_c1 = self.conv1(x_for_w) #1
	x_c1 = self.relu(x_c1)
	x_l1 = self.layer1(x_c1) #2
	x_l2 = self.layer2(x_l1) #1 [2, 64, 16, 256])
	x_l3 = self.layer3(x_l2) #2 torch.Size([2, 128, 8, 128]
	x_l4 = self.layer4(x_l3) #1 torch.Size([2, 256, 8, 128])
	x_l5 = self.layer5(x_l4) #2, torch.Size([2, 512, 4, 64])
	pyramid_x1 = _upsample_add(x_l5, self.layer256_to_512(x_l4))
	pyramid_x = self.layer512_to_outdim(pyramid_x1)
	w_each_b = self.feature2w(pyramid_x.view(pyramid_x.size(0), -1)) #

	w_c = w_each_b
	w_b.append(w_c)
	w_b = torch.stack(w_b, dim=0)

	return w_b



	# w_b = []
	# for b in range(locs.size(0)): #locs: 0~2048
	# w_c = []
	# for c in range(locs.size(1)):
	# if locs[b][c] < 2048:
	# center_loc = (locs[b][c]/4).int() # 32*512
	# start_x = center_loc - 16
	# end_x = center_loc + 16

	# crop_x0 = x[b:b+1, :, :, start_x:end_x].clone()
	# crop_x = self._check_outliers_pad(crop_x0, start_x, end_x) # 1, 512, 4, 4 or 1, 512, 8, 8

	# # save_image(crop_x[0], 'ss_{}.png'.format(c))
	# x_c1 = self.conv1(crop_x) #1
	# x_c1 = self.relu(x_c1)
	# x_l1 = self.layer1(x_c1) #2
	# x_l2 = self.layer2(x_l1) #1 [2, 64, 16, 256])
	# x_l3 = self.layer3(x_l2) #2 torch.Size([2, 128, 8, 128]
	# x_l4 = self.layer4(x_l3) #1 torch.Size([2, 256, 8, 128])
	# x_l5 = self.layer5(x_l4) #2, torch.Size([2, 512, 4, 64])
	# pyramid_x1 = _upsample_add(x_l5, self.layer256_to_512(x_l4))
	# pyramid_x = self.layer512_to_outdim(pyramid_x1)

	# w = self.feature2w(pyramid_x.view(1, -1)) # 1*512
	# w_c.append(w.squeeze(0))
	# else:
	# w_c.append(w.squeeze(0).detach()*0)
	# w_c = torch.stack(w_c, dim=0)
	# w_b.append(w_c)
	# w_b = torch.stack(w_b, dim=0)
	# print(w_b.size())
	# return w_b #, lr




	# # lr = x.clone()
	# x_c1 = self.conv1(x) #1
	# x_c1 = self.relu(x_c1)
	# x_l1 = self.layer1(x_c1) #2
	# x_l2 = self.layer2(x_l1) #1 [2, 64, 16, 256])
	# x_l3 = self.layer3(x_l2) #2 torch.Size([2, 128, 8, 128]
	# x_l4 = self.layer4(x_l3) #1 torch.Size([2, 256, 8, 128])
	# x_l5 = self.layer5(x_l4) #2, torch.Size([2, 512, 4, 64]) B, 512, 4, 64, 17M parameters

	# pyramid_x1 = _upsample_add(x_l5, self.layer256_to_512(x_l4))
	# pyramid_x = self.layer512_to_outdim(pyramid_x1)
	# # pyramid_x2 = _upsample_add(self.layer128_to_outdim(x_l3), pyramid_x1)
	# B, C, H, W = pyramid_x.size()
	# w_b = []
	# for b in range(locs.size(0)): #locs: 0~2048
	# w_c = []
	# for c in range(locs.size(1)):
	# if locs[b][c] < 2048:
	# center_loc = (locs[b][c]/4/self.down_h).int() # from 32512 to 464
	# start_x = max(0, center_loc-self.size_h//2)
	# end_x = min(center_loc+self.size_h//2, 512//self.down_h)
	# # crop_feature = pyramid_x2[b:b+1, :, :, start_x:end_x].clone()

	# # if end_x - start_x != self.size_h:
	# # bgfill = torch.zeros((B, C, H, self.size_h), dtype=pyramid_x2.dtype, layout=pyramid_x2.layout, device=pyramid_x2.device)
	# # bgfill[:, :, :, self.size_h//2 - (center_loc - start_x):self.size_h//2 - (center_loc - start_x) + end_x - start_x] += pyramid_x2[b:b+1, :, :, start_x:end_x].clone()
	# # crop_feature = bgfill.clone()
	# # else:
	# # crop_feature = pyramid_x2[b:b+1, :, :, start_x:end_x].clone()

	# crop_feature = pyramid_x[b:b+1, :, :, start_x:end_x].clone()
	# crop_feature = self._check_outliers(crop_feature, self.size_h) # 1, 512, 4, 4 or 1, 512, 8, 8

	# # crop_feature = self._check_outliers(crop_feature, self.size_h, start_x, end_x) # 1, 512, 4, 4 or 1, 512, 8, 8
	# print(crop_feature.size())
	# w = self.feature2w(crop_feature.view(1, -1)) # 1*512
	# w_c.append(w.squeeze(0))

	# else:
	# w_c.append(w.squeeze(0).detach()*0)

	# # lr[b:b+1, :, :, center_loc-1:center_loc+1] = 255

	# w_c = torch.stack(w_c, dim=0)
	# w_b.append(w_c)
	# w_b = torch.stack(w_b, dim=0)

	# return w_b #, x #, lr




	def GroupNorm(in_channels):
	return torch.nn.GroupNorm(num_groups=in_channels//32, num_channels=in_channels, eps=1e-6, affine=False)


	def conv1x1(in_planes, out_planes, stride=1):
	return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


	def conv3x3(in_planes, out_planes, stride=1):
	"3x3 convolution with padding"
	return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
	padding=1, bias=False)




	def _upsample_add(x, y):
	'''Upsample and add two feature maps.
	Args:
	x: (Variable) top feature map to be upsampled.
	y: (Variable) lateral feature map.
	Returns:
	(Variable) added feature map.
	Note in PyTorch, when input size is odd, the upsampled feature map
	with `F.upsample(..., scale_factor=2, mode='nearest')`
	maybe not equal to the lateral feature map size.
	e.g.
	original input size: [N,_,15,15] ->
	conv2d feature map size: [N,_,8,8] ->
	upsampled feature map size: [N,_,16,16]
	So we choose bilinear upsample which supports arbitrary output sizes.
	'''
	_, _, H, W = y.size()
	return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y