Spaces:

ohayonguy
/

PMRF

Running on Zero

PMRF / utils /create_arch.py

ohayonguy

first commit fixed

b7f3942 about 1 year ago

4.63 kB

	from arch.hourglass import image_transformer_v2 as itv2
	from arch.hourglass.image_transformer_v2 import ImageTransformerDenoiserModelV2
	from arch.swinir.swinir import SwinIR


	def create_arch(arch, condition_channels=0):
	# arch should be, e.g., swinir_XL, or hdit_XL
	arch_name, arch_size = arch.split('_')
	arch_config = arch_configs[arch_name][arch_size].copy()
	arch_config['in_channels'] += condition_channels
	return arch_name_to_object[arch_name](**arch_config)


	arch_configs = {
	'hdit': {
	"ImageNet256Sp4": {
	'in_channels': 3,
	'out_channels': 3,
	'widths': [256, 512, 1024],
	'depths': [2, 2, 8],
	'patch_size': [4, 4],
	'self_attns': [
	{"type": "neighborhood", "d_head": 64, "kernel_size": 7},
	{"type": "neighborhood", "d_head": 64, "kernel_size": 7},
	{"type": "global", "d_head": 64}
	],
	'mapping_depth': 2,
	'mapping_width': 768,
	'dropout_rate': [0, 0, 0],
	'mapping_dropout_rate': 0.0
	},
	"XL2": {
	'in_channels': 3,
	'out_channels': 3,
	'widths': [384, 768],
	'depths': [2, 11],
	'patch_size': [4, 4],
	'self_attns': [
	{"type": "neighborhood", "d_head": 64, "kernel_size": 7},
	{"type": "global", "d_head": 64}
	],
	'mapping_depth': 2,
	'mapping_width': 768,
	'dropout_rate': [0, 0],
	'mapping_dropout_rate': 0.0
	}

	},
	'swinir': {
	"M": {
	'in_channels': 3,
	'out_channels': 3,
	'embed_dim': 120,
	'depths': [6, 6, 6, 6, 6],
	'num_heads': [6, 6, 6, 6, 6],
	'resi_connection': '1conv',
	'sf': 8

	},
	"L": {
	'in_channels': 3,
	'out_channels': 3,
	'embed_dim': 180,
	'depths': [6, 6, 6, 6, 6, 6, 6, 6],
	'num_heads': [6, 6, 6, 6, 6, 6, 6, 6],
	'resi_connection': '1conv',
	'sf': 8
	},
	},
	}


	def create_swinir_model(in_channels, out_channels, embed_dim, depths, num_heads, resi_connection,
	sf):
	return SwinIR(
	img_size=64,
	patch_size=1,
	in_chans=in_channels,
	num_out_ch=out_channels,
	embed_dim=embed_dim,
	depths=depths,
	num_heads=num_heads,
	window_size=8,
	mlp_ratio=2,
	sf=sf,
	img_range=1.0,
	upsampler="nearest+conv",
	resi_connection=resi_connection,
	unshuffle=True,
	unshuffle_scale=8
	)


	def create_hdit_model(widths,
	depths,
	self_attns,
	dropout_rate,
	mapping_depth,
	mapping_width,
	mapping_dropout_rate,
	in_channels,
	out_channels,
	patch_size
	):
	assert len(widths) == len(depths)
	assert len(widths) == len(self_attns)
	assert len(widths) == len(dropout_rate)
	mapping_d_ff = mapping_width * 3
	d_ffs = []
	for width in widths:
	d_ffs.append(width * 3)

	levels = []
	for depth, width, d_ff, self_attn, dropout in zip(depths, widths, d_ffs, self_attns, dropout_rate):
	if self_attn['type'] == 'global':
	self_attn = itv2.GlobalAttentionSpec(self_attn.get('d_head', 64))
	elif self_attn['type'] == 'neighborhood':
	self_attn = itv2.NeighborhoodAttentionSpec(self_attn.get('d_head', 64), self_attn.get('kernel_size', 7))
	elif self_attn['type'] == 'shifted-window':
	self_attn = itv2.ShiftedWindowAttentionSpec(self_attn.get('d_head', 64), self_attn['window_size'])
	elif self_attn['type'] == 'none':
	self_attn = itv2.NoAttentionSpec()
	else:
	raise ValueError(f'unsupported self attention type {self_attn["type"]}')
	levels.append(itv2.LevelSpec(depth, width, d_ff, self_attn, dropout))
	mapping = itv2.MappingSpec(mapping_depth, mapping_width, mapping_d_ff, mapping_dropout_rate)
	model = ImageTransformerDenoiserModelV2(
	levels=levels,
	mapping=mapping,
	in_channels=in_channels,
	out_channels=out_channels,
	patch_size=patch_size,
	num_classes=0,
	mapping_cond_dim=0,
	)

	return model


	arch_name_to_object = {
	'hdit': create_hdit_model,
	'swinir': create_swinir_model,
	}