Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import torch | |
| import random | |
| from diffusers import SpectrogramDiffusionPipeline, MidiProcessor | |
| pipe = SpectrogramDiffusionPipeline.from_pretrained( | |
| "google/music-spectrogram-diffusion", torch_dtype=torch.float16 | |
| ).to("cuda") | |
| pipe.enable_xformers_memory_efficient_attention() | |
| processor = MidiProcessor() | |
| COLORS = [ | |
| ["#ff0000", "#00ff00"], | |
| ["#00ff00", "#0000ff"], | |
| ["#0000ff", "#ff0000"], | |
| ] | |
| def predict(audio_file_pth): | |
| with torch.inference_mode(): | |
| output = pipe(processor(audio_file_pth.name)[:5]) | |
| audio = output.audios[0] | |
| return gr.make_waveform((16000, audio.ravel()), bars_color=random.choice(COLORS), bar_count=75) | |
| title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion" | |
| description = """ | |
| <p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings. | |
| <br/> | |
| <a href="https://huggingface.co/spaces/reach-vb/music-spectrogram-diffusion?duplicate=true"> | |
| <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> | |
| </p> | |
| In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime. | |
| This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments. | |
| They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter. | |
| """ | |
| examples = ["examples/beethoven_mond_2.mid", "examples/beethoven_hammerklavier_2.mid"] | |
| article = """ | |
| <div style='margin:20px auto;'> | |
| <p>References: <a href="https://arxiv.org/abs/2206.05408">Music Spectrogram Diffusion paper</a> | | |
| <a href="https://github.com/magenta/music-spectrogram-diffusion">original GitHub</a> | | |
| <a href="https://github.com/magenta/music-spectrogram-diffusion#pretrained-models">original weights</a></p> | |
| <pre> | |
| @article{hawthorne2022multi, | |
| title={Multi-instrument music synthesis with spectrogram diffusion}, | |
| author={Hawthorne, Curtis and Simon, Ian and Roberts, Adam and Zeghidour, Neil and Gardner, Josh and Manilow, Ethan and Engel, Jesse}, | |
| journal={arXiv preprint arXiv:2206.05408}, | |
| year={2022} | |
| } | |
| </pre> | |
| </div> | |
| """ | |
| gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.File(label="Upload MIDI", file_count="single", file_types=[".mid"]), | |
| ], | |
| outputs=[ | |
| gr.Video(label="Synthesised Music"), | |
| ], | |
| title=title, | |
| description=description, | |
| theme="gradio/monochrome", | |
| examples=examples, | |
| article=article, | |
| ).launch(debug=True) | |