Spaces:
Running
on
Zero
Running
on
Zero
| from functools import lru_cache | |
| from scipy import signal | |
| import numpy as np | |
| import librosa | |
| def mel_basis(hp): | |
| assert hp.fmax <= hp.sample_rate // 2 | |
| return librosa.filters.mel( | |
| sr=hp.sample_rate, | |
| n_fft=hp.n_fft, | |
| n_mels=hp.num_mels, | |
| fmin=hp.fmin, | |
| fmax=hp.fmax) # -> (nmel, nfreq) | |
| def preemphasis(wav, hp): | |
| assert hp.preemphasis != 0 | |
| wav = signal.lfilter([1, -hp.preemphasis], [1], wav) | |
| wav = np.clip(wav, -1, 1) | |
| return wav | |
| def melspectrogram(wav, hp, pad=True): | |
| # Run through pre-emphasis | |
| if hp.preemphasis > 0: | |
| wav = preemphasis(wav, hp) | |
| assert np.abs(wav).max() - 1 < 1e-07 | |
| # Do the stft | |
| spec_complex = _stft(wav, hp, pad=pad) | |
| # Get the magnitudes | |
| spec_magnitudes = np.abs(spec_complex) | |
| if hp.mel_power != 1.0: | |
| spec_magnitudes **= hp.mel_power | |
| # Get the mel and convert magnitudes->db | |
| mel = np.dot(mel_basis(hp), spec_magnitudes) | |
| if hp.mel_type == "db": | |
| mel = _amp_to_db(mel, hp) | |
| # Normalise the mel from db to 0,1 | |
| if hp.normalized_mels: | |
| mel = _normalize(mel, hp).astype(np.float32) | |
| assert not pad or mel.shape[1] == 1 + len(wav) // hp.hop_size # Sanity check | |
| return mel # (M, T) | |
| def _stft(y, hp, pad=True): | |
| # NOTE: after 0.8, pad mode defaults to constant, setting this to reflect for | |
| # historical consistency and streaming-version consistency | |
| return librosa.stft( | |
| y, | |
| n_fft=hp.n_fft, | |
| hop_length=hp.hop_size, | |
| win_length=hp.win_size, | |
| center=pad, | |
| pad_mode="reflect", | |
| ) | |
| def _amp_to_db(x, hp): | |
| return 20 * np.log10(np.maximum(hp.stft_magnitude_min, x)) | |
| def _db_to_amp(x): | |
| return np.power(10.0, x * 0.05) | |
| def _normalize(s, hp, headroom_db=15): | |
| min_level_db = 20 * np.log10(hp.stft_magnitude_min) | |
| s = (s - min_level_db) / (-min_level_db + headroom_db) | |
| return s | |