Spaces:

keras-io
/

MelGAN-spectrogram-inversion

Runtime error

App Files Files Community

vumichien commited on Jun 14, 2022

Commit

0446ab5

1 Parent(s): 83a8a3e

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from huggingface_hub import from_pretrained_keras
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras import layers
+import tensorflow_io as tfio
+import gradio as gr
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+class MelSpec(layers.Layer):
+    def __init__(
+        self,
+        frame_length=1024,
+        frame_step=256,
+        fft_length=None,
+        sampling_rate=22050,
+        num_mel_channels=80,
+        freq_min=125,
+        freq_max=7600,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.frame_length = frame_length
+        self.frame_step = frame_step
+        self.fft_length = fft_length
+        self.sampling_rate = sampling_rate
+        self.num_mel_channels = num_mel_channels
+        self.freq_min = freq_min
+        self.freq_max = freq_max
+        self.mel_filterbank = tf.signal.linear_to_mel_weight_matrix(
+            num_mel_bins=self.num_mel_channels,
+            num_spectrogram_bins=self.frame_length // 2 + 1,
+            sample_rate=self.sampling_rate,
+            lower_edge_hertz=self.freq_min,
+            upper_edge_hertz=self.freq_max,
+        )
+    def call(self, audio):
+        stft = tf.signal.stft(
+            tf.squeeze(audio, -1),
+            self.frame_length,
+            self.frame_step,
+            self.fft_length,
+            pad_end=True,
+        )
+        # Taking the magnitude of the STFT output
+        magnitude = tf.abs(stft)
+        # Multiplying the Mel-filterbank with the magnitude and scaling it using the db scale
+        mel = tf.matmul(tf.square(magnitude), self.mel_filterbank)
+        log_mel_spec = tfio.audio.dbscale(mel, top_db=80)
+        return log_mel_spec
+    def get_config(self):
+        config = super(MelSpec, self).get_config()
+        config.update(
+            {
+                "frame_length": self.frame_length,
+                "frame_step": self.frame_step,
+                "fft_length": self.fft_length,
+                "sampling_rate": self.sampling_rate,
+                "num_mel_channels": self.num_mel_channels,
+                "freq_min": self.freq_min,
+                "freq_max": self.freq_max,
+            }
+        )
+        return config
+model = from_pretrained_keras("keras-io/MelGAN-spectrogram-inversion")
+def inference(audio, model):
+    input, sr = librosa.load(audio)
+    # input, sr = audio
+    x = tf.expand_dims(input, axis=-1)
+    mel = MelSpec()(x)
+    audio_sample = tf.expand_dims(mel, axis=0)
+    pred = model.predict(audio_sample, batch_size=1, verbose=0)
+    return input, pred.squeeze(), sr
+def predict(audio, micro):
+    input = audio if audio is not None else micro
+    x, x_pred, sr = inference(audio, model)
+    fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(10, 8), dpi=120)
+    D = librosa.amplitude_to_db(np.abs(librosa.stft(x)), ref=np.max)
+    img = librosa.display.specshow(D, y_axis='linear', x_axis='time',
+                                  sr=sr, ax=ax[0])
+    ax[0].set(title='Spectrogram of Original  sample audio')
+    ax[0].label_outer()
+    D = librosa.amplitude_to_db(np.abs(librosa.stft(x_pred)), ref=np.max)
+    img = librosa.display.specshow(D, y_axis='linear', x_axis='time',
+                                  sr=sr, ax=ax[1])
+    ax[1].set(title='Spectrogram of synthesis  sample audio ')
+    ax[1].label_outer()
+    return plt.gcf()
+inputs = [
+         gr.Audio(source = "upload", label='Upload audio file', type="filepath"),
+         gr.Audio(source = "microphone", label='Record audio from microphone', type="filepath")
+]
+examples = [
+]
+gr.Interface(
+    fn=predict,
+    title="MelGAN-based spectrogram inversion",
+    description = "Inversion of audio from mel-spectrograms using the MelGAN architecture and feature matching",
+    inputs=inputs,
+    examples=examples,
+    outputs=gr.Plot(),
+    article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. Based on the keras example from <a href=\"https://keras.io/examples/audio/melgan_spectrogram_inversion/\">Darshan Deshpande</a>",
+).launch(debug=False, enable_queue=True)