Spaces:

reach-vb
/

music-spectrogram-diffusion

Runtime error

App Files Files Community

music-spectrogram-diffusion / app.py

reach-vb HF Staff

Update app.py

91fcea6 over 2 years ago

raw

history blame contribute delete

2.82 kB

	import gradio as gr
	import librosa
	import numpy as np
	import torch
	import random

	from diffusers import SpectrogramDiffusionPipeline, MidiProcessor

	pipe = SpectrogramDiffusionPipeline.from_pretrained(
	"google/music-spectrogram-diffusion", torch_dtype=torch.float16
	).to("cuda")
	pipe.enable_xformers_memory_efficient_attention()

	processor = MidiProcessor()

	COLORS = [
	["#ff0000", "#00ff00"],
	["#00ff00", "#0000ff"],
	["#0000ff", "#ff0000"],
	]

	def predict(audio_file_pth):

	with torch.inference_mode():
	output = pipe(processor(audio_file_pth.name)[:5])
	audio = output.audios[0]

	return gr.make_waveform((16000, audio.ravel()), bars_color=random.choice(COLORS), bar_count=75)


	title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion"

	description = """
	<p>For faster inference without waiting in the queue, you should duplicate this space and upgrade to GPU via the settings.
	<br/>
	<a href="https://huggingface.co/spaces/reach-vb/music-spectrogram-diffusion?duplicate=true">
	<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
	</p>

	In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime.
	This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments.

	They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter.
	"""

	examples = ["examples/beethoven_mond_2.mid", "examples/beethoven_hammerklavier_2.mid"]


	article = """
	<div style='margin:20px auto;'>
	<p>References: <a href="https://arxiv.org/abs/2206.05408">Music Spectrogram Diffusion paper</a> \|
	<a href="https://github.com/magenta/music-spectrogram-diffusion">original GitHub</a> \|
	<a href="https://github.com/magenta/music-spectrogram-diffusion#pretrained-models">original weights</a></p>
	<pre>
	@article{hawthorne2022multi,
	title={Multi-instrument music synthesis with spectrogram diffusion},
	author={Hawthorne, Curtis and Simon, Ian and Roberts, Adam and Zeghidour, Neil and Gardner, Josh and Manilow, Ethan and Engel, Jesse},
	journal={arXiv preprint arXiv:2206.05408},
	year={2022}
	}
	</pre>
	</div>
	"""

	gr.Interface(
	fn=predict,
	inputs=[
	gr.File(label="Upload MIDI", file_count="single", file_types=[".mid"]),
	],
	outputs=[
	gr.Video(label="Synthesised Music"),
	],
	title=title,
	description=description,
	theme="gradio/monochrome",
	examples=examples,
	article=article,
	).launch(debug=True)