audio-styling

Build error

App Files Files Community

audio-styling / deepafx_st /processors /proxy /tcn.py

JohnnyPittt

Duplicate from nateraw/deepafx-st

51da11a about 3 years ago

raw

history blame contribute delete

6.81 kB

	# Copyright 2022 Christian J. Steinmetz

	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at

	# http://www.apache.org/licenses/LICENSE-2.0

	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	# TCN implementation adapted from:
	# https://github.com/csteinmetz1/micro-tcn/blob/main/microtcn/tcn.py

	import torch
	from argparse import ArgumentParser

	from deepafx_st.utils import center_crop, causal_crop


	class FiLM(torch.nn.Module):
	def __init__(self, num_features, cond_dim):
	super().__init__()
	self.num_features = num_features
	self.bn = torch.nn.BatchNorm1d(num_features, affine=False)
	self.adaptor = torch.nn.Linear(cond_dim, num_features * 2)

	def forward(self, x, cond):

	# project conditioning to 2 x num. conv channels
	cond = self.adaptor(cond)

	# split the projection into gain and bias
	g, b = torch.chunk(cond, 2, dim=-1)

	# add virtual channel dim if needed
	if g.ndim == 2:
	g = g.unsqueeze(1)
	b = b.unsqueeze(1)

	# reshape for application
	g = g.permute(0, 2, 1)
	b = b.permute(0, 2, 1)

	x = self.bn(x) # apply BatchNorm without affine
	x = (x * g) + b # then apply conditional affine

	return x


	class ConditionalTCNBlock(torch.nn.Module):
	def __init__(
	self, in_ch, out_ch, cond_dim, kernel_size=3, dilation=1, causal=False, **kwargs
	):
	super().__init__()

	self.in_ch = in_ch
	self.out_ch = out_ch
	self.kernel_size = kernel_size
	self.dilation = dilation
	self.causal = causal

	self.conv1 = torch.nn.Conv1d(
	in_ch,
	out_ch,
	kernel_size=kernel_size,
	padding=0,
	dilation=dilation,
	bias=True,
	)
	self.film = FiLM(out_ch, cond_dim)
	self.relu = torch.nn.PReLU(out_ch)
	self.res = torch.nn.Conv1d(
	in_ch, out_ch, kernel_size=1, groups=in_ch, bias=False
	)

	def forward(self, x, p):
	x_in = x

	x = self.conv1(x)
	x = self.film(x, p) # apply FiLM conditioning
	x = self.relu(x)
	x_res = self.res(x_in)

	if self.causal:
	x = x + causal_crop(x_res, x.shape[-1])
	else:
	x = x + center_crop(x_res, x.shape[-1])

	return x


	class ConditionalTCN(torch.nn.Module):
	"""Temporal convolutional network with conditioning module.
	Args:
	sample_rate (float): Audio sample rate.
	num_control_params (int, optional): Dimensionality of the conditioning signal. Default: 24
	ninputs (int, optional): Number of input channels (mono = 1, stereo 2). Default: 1
	noutputs (int, optional): Number of output channels (mono = 1, stereo 2). Default: 1
	nblocks (int, optional): Number of total TCN blocks. Default: 10
	kernel_size (int, optional: Width of the convolutional kernels. Default: 3
	dialation_growth (int, optional): Compute the dilation factor at each block as dilation_growth ** (n % stack_size). Default: 1
	channel_growth (int, optional): Compute the output channels at each black as in_ch * channel_growth. Default: 2
	channel_width (int, optional): When channel_growth = 1 all blocks use convolutions with this many channels. Default: 64
	stack_size (int, optional): Number of blocks that constitute a single stack of blocks. Default: 10
	causal (bool, optional): Causal TCN configuration does not consider future input values. Default: False
	"""

	def __init__(
	self,
	sample_rate,
	num_control_params=24,
	ninputs=1,
	noutputs=1,
	nblocks=10,
	kernel_size=15,
	dilation_growth=2,
	channel_growth=1,
	channel_width=64,
	stack_size=10,
	causal=False,
	skip_connections=False,
	**kwargs,
	):
	super().__init__()
	self.num_control_params = num_control_params
	self.ninputs = ninputs
	self.noutputs = noutputs
	self.nblocks = nblocks
	self.kernel_size = kernel_size
	self.dilation_growth = dilation_growth
	self.channel_growth = channel_growth
	self.channel_width = channel_width
	self.stack_size = stack_size
	self.causal = causal
	self.skip_connections = skip_connections
	self.sample_rate = sample_rate

	self.blocks = torch.nn.ModuleList()
	for n in range(nblocks):
	in_ch = out_ch if n > 0 else ninputs

	if self.channel_growth > 1:
	out_ch = in_ch * self.channel_growth
	else:
	out_ch = self.channel_width

	dilation = self.dilation_growth ** (n % self.stack_size)

	self.blocks.append(
	ConditionalTCNBlock(
	in_ch,
	out_ch,
	self.num_control_params,
	kernel_size=self.kernel_size,
	dilation=dilation,
	padding="same" if self.causal else "valid",
	causal=self.causal,
	)
	)

	self.output = torch.nn.Conv1d(out_ch, noutputs, kernel_size=1)
	self.receptive_field = self.compute_receptive_field()
	# print(
	# f"TCN receptive field: {self.receptive_field} samples",
	# f" or {(self.receptive_field/self.sample_rate)*1e3:0.3f} ms",
	# )

	def forward(self, x, p, **kwargs):

	# causally pad input signal
	x = torch.nn.functional.pad(x, (self.receptive_field - 1, 0))

	# iterate over blocks passing conditioning
	for idx, block in enumerate(self.blocks):
	x = block(x, p)
	if self.skip_connections:
	if idx == 0:
	skips = x
	else:
	skips = center_crop(skips, x[-1]) + x
	else:
	skips = 0

	# final 1x1 convolution to collapse channels
	out = self.output(x + skips)

	return out

	def compute_receptive_field(self):
	"""Compute the receptive field in samples."""
	rf = self.kernel_size
	for n in range(1, self.nblocks):
	dilation = self.dilation_growth ** (n % self.stack_size)
	rf = rf + ((self.kernel_size - 1) * dilation)
	return rf