Spaces:
Running
on
Zero
Running
on
Zero
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +12 -6
- audiotools/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/audio_signal.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/display.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/dsp.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/effects.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/ffmpeg.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/loudness.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/playback.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/util.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/whisper.cpython-310.pyc +0 -0
- audiotools/core/templates/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/datasets.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/preprocess.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/transforms.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/distance.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/quality.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/spectral.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/accelerator.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/decorators.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/experiment.cpython-310.pyc +0 -0
- audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/ml/layers/__pycache__/base.cpython-310.pyc +0 -0
- audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc +0 -0
- src/__pycache__/inference.cpython-310.pyc +0 -0
- src/models/__pycache__/blocks.cpython-310.pyc +0 -0
- src/models/__pycache__/conditioners.cpython-310.pyc +0 -0
- src/models/__pycache__/udit.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/attention.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/modules.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/rotary.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/span_mask.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/timm.cpython-310.pyc +0 -0
- src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc +0 -0
- src/modules/dac/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/base.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/dac.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/layers.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/loss.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc +0 -0
- src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc +0 -0
app.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
import os
|
| 2 |
import torch
|
| 3 |
import random
|
| 4 |
-
import spaces
|
| 5 |
import numpy as np
|
| 6 |
import gradio as gr
|
| 7 |
import soundfile as sf
|
|
|
|
| 8 |
from transformers import T5Tokenizer, T5EncoderModel
|
| 9 |
from diffusers import DDIMScheduler
|
| 10 |
from src.models.conditioners import MaskDiT
|
|
@@ -33,9 +34,12 @@ def load_models(config_name, ckpt_path, vae_path, device):
|
|
| 33 |
unet.load_state_dict(torch.load(ckpt_path)['model'])
|
| 34 |
unet.eval()
|
| 35 |
|
|
|
|
|
|
|
|
|
|
| 36 |
# Load noise scheduler
|
| 37 |
noise_scheduler = DDIMScheduler(**params['diff'])
|
| 38 |
-
|
| 39 |
latents = torch.randn((1, 128, 128), device=device)
|
| 40 |
noise = torch.randn_like(latents)
|
| 41 |
timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
|
|
@@ -43,6 +47,7 @@ def load_models(config_name, ckpt_path, vae_path, device):
|
|
| 43 |
|
| 44 |
return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
|
| 45 |
|
|
|
|
| 46 |
MAX_SEED = np.iinfo(np.int32).max
|
| 47 |
|
| 48 |
# Model and config paths
|
|
@@ -57,6 +62,7 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
| 57 |
autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
|
| 58 |
device)
|
| 59 |
|
|
|
|
| 60 |
@spaces.GPU
|
| 61 |
def generate_audio(text, length,
|
| 62 |
guidance_scale, guidance_rescale, ddim_steps, eta,
|
|
@@ -102,7 +108,7 @@ css = """
|
|
| 102 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
| 103 |
with gr.Column(elem_id="col-container"):
|
| 104 |
gr.Markdown("""
|
| 105 |
-
# EzAudio Text-to-Audio Generator
|
| 106 |
Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
|
| 107 |
""")
|
| 108 |
|
|
@@ -125,10 +131,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
|
| 125 |
with gr.Accordion("Advanced Settings", open=False):
|
| 126 |
guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
|
| 127 |
guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
|
| 128 |
-
ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=
|
| 129 |
eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
|
| 130 |
seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
|
| 131 |
-
randomize_seed = gr.Checkbox(label="Randomize Seed", value=
|
| 132 |
|
| 133 |
# Examples block
|
| 134 |
gr.Examples(
|
|
@@ -147,4 +153,4 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
|
| 147 |
)
|
| 148 |
|
| 149 |
# Launch the Gradio demo
|
| 150 |
-
demo.launch()
|
|
|
|
| 1 |
import os
|
| 2 |
import torch
|
| 3 |
import random
|
| 4 |
+
# import spaces
|
| 5 |
import numpy as np
|
| 6 |
import gradio as gr
|
| 7 |
import soundfile as sf
|
| 8 |
+
from accelerate import Accelerator
|
| 9 |
from transformers import T5Tokenizer, T5EncoderModel
|
| 10 |
from diffusers import DDIMScheduler
|
| 11 |
from src.models.conditioners import MaskDiT
|
|
|
|
| 34 |
unet.load_state_dict(torch.load(ckpt_path)['model'])
|
| 35 |
unet.eval()
|
| 36 |
|
| 37 |
+
accelerator = Accelerator(mixed_precision="fp16")
|
| 38 |
+
unet = accelerator.prepare(unet)
|
| 39 |
+
|
| 40 |
# Load noise scheduler
|
| 41 |
noise_scheduler = DDIMScheduler(**params['diff'])
|
| 42 |
+
|
| 43 |
latents = torch.randn((1, 128, 128), device=device)
|
| 44 |
noise = torch.randn_like(latents)
|
| 45 |
timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
|
|
|
|
| 47 |
|
| 48 |
return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
|
| 49 |
|
| 50 |
+
|
| 51 |
MAX_SEED = np.iinfo(np.int32).max
|
| 52 |
|
| 53 |
# Model and config paths
|
|
|
|
| 62 |
autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
|
| 63 |
device)
|
| 64 |
|
| 65 |
+
|
| 66 |
@spaces.GPU
|
| 67 |
def generate_audio(text, length,
|
| 68 |
guidance_scale, guidance_rescale, ddim_steps, eta,
|
|
|
|
| 108 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
| 109 |
with gr.Column(elem_id="col-container"):
|
| 110 |
gr.Markdown("""
|
| 111 |
+
# EzAudio: High-quality Text-to-Audio Generator
|
| 112 |
Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
|
| 113 |
""")
|
| 114 |
|
|
|
|
| 131 |
with gr.Accordion("Advanced Settings", open=False):
|
| 132 |
guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
|
| 133 |
guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
|
| 134 |
+
ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=50, label="DDIM Steps")
|
| 135 |
eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
|
| 136 |
seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
|
| 137 |
+
randomize_seed = gr.Checkbox(label="Randomize Seed (Disable Seed)", value=True)
|
| 138 |
|
| 139 |
# Examples block
|
| 140 |
gr.Examples(
|
|
|
|
| 153 |
)
|
| 154 |
|
| 155 |
# Launch the Gradio demo
|
| 156 |
+
demo.launch()
|
audiotools/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (432 Bytes). View file
|
|
|
audiotools/core/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (283 Bytes). View file
|
|
|
audiotools/core/__pycache__/audio_signal.cpython-310.pyc
ADDED
|
Binary file (45.4 kB). View file
|
|
|
audiotools/core/__pycache__/display.cpython-310.pyc
ADDED
|
Binary file (6.36 kB). View file
|
|
|
audiotools/core/__pycache__/dsp.cpython-310.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|
audiotools/core/__pycache__/effects.cpython-310.pyc
ADDED
|
Binary file (17.5 kB). View file
|
|
|
audiotools/core/__pycache__/ffmpeg.cpython-310.pyc
ADDED
|
Binary file (5.59 kB). View file
|
|
|
audiotools/core/__pycache__/loudness.cpython-310.pyc
ADDED
|
Binary file (8.44 kB). View file
|
|
|
audiotools/core/__pycache__/playback.cpython-310.pyc
ADDED
|
Binary file (6.87 kB). View file
|
|
|
audiotools/core/__pycache__/util.cpython-310.pyc
ADDED
|
Binary file (18.6 kB). View file
|
|
|
audiotools/core/__pycache__/whisper.cpython-310.pyc
ADDED
|
Binary file (2.93 kB). View file
|
|
|
audiotools/core/templates/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (145 Bytes). View file
|
|
|
audiotools/data/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (234 Bytes). View file
|
|
|
audiotools/data/__pycache__/datasets.cpython-310.pyc
ADDED
|
Binary file (17 kB). View file
|
|
|
audiotools/data/__pycache__/preprocess.cpython-310.pyc
ADDED
|
Binary file (2.83 kB). View file
|
|
|
audiotools/data/__pycache__/transforms.cpython-310.pyc
ADDED
|
Binary file (55.5 kB). View file
|
|
|
audiotools/metrics/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (310 Bytes). View file
|
|
|
audiotools/metrics/__pycache__/distance.cpython-310.pyc
ADDED
|
Binary file (3.82 kB). View file
|
|
|
audiotools/metrics/__pycache__/quality.cpython-310.pyc
ADDED
|
Binary file (4.45 kB). View file
|
|
|
audiotools/metrics/__pycache__/spectral.cpython-310.pyc
ADDED
|
Binary file (7.43 kB). View file
|
|
|
audiotools/ml/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (321 Bytes). View file
|
|
|
audiotools/ml/__pycache__/accelerator.cpython-310.pyc
ADDED
|
Binary file (6.65 kB). View file
|
|
|
audiotools/ml/__pycache__/decorators.cpython-310.pyc
ADDED
|
Binary file (14.2 kB). View file
|
|
|
audiotools/ml/__pycache__/experiment.cpython-310.pyc
ADDED
|
Binary file (3.32 kB). View file
|
|
|
audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (228 Bytes). View file
|
|
|
audiotools/ml/layers/__pycache__/base.cpython-310.pyc
ADDED
|
Binary file (9.27 kB). View file
|
|
|
audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc
ADDED
|
Binary file (3.87 kB). View file
|
|
|
src/__pycache__/inference.cpython-310.pyc
ADDED
|
Binary file (4.24 kB). View file
|
|
|
src/models/__pycache__/blocks.cpython-310.pyc
ADDED
|
Binary file (7.27 kB). View file
|
|
|
src/models/__pycache__/conditioners.cpython-310.pyc
ADDED
|
Binary file (5.59 kB). View file
|
|
|
src/models/__pycache__/udit.cpython-310.pyc
ADDED
|
Binary file (7.86 kB). View file
|
|
|
src/models/utils/__pycache__/__init__.cpython-310.pyc
CHANGED
|
Binary files a/src/models/utils/__pycache__/__init__.cpython-310.pyc and b/src/models/utils/__pycache__/__init__.cpython-310.pyc differ
|
|
|
src/models/utils/__pycache__/attention.cpython-310.pyc
CHANGED
|
Binary files a/src/models/utils/__pycache__/attention.cpython-310.pyc and b/src/models/utils/__pycache__/attention.cpython-310.pyc differ
|
|
|
src/models/utils/__pycache__/modules.cpython-310.pyc
CHANGED
|
Binary files a/src/models/utils/__pycache__/modules.cpython-310.pyc and b/src/models/utils/__pycache__/modules.cpython-310.pyc differ
|
|
|
src/models/utils/__pycache__/rotary.cpython-310.pyc
CHANGED
|
Binary files a/src/models/utils/__pycache__/rotary.cpython-310.pyc and b/src/models/utils/__pycache__/rotary.cpython-310.pyc differ
|
|
|
src/models/utils/__pycache__/span_mask.cpython-310.pyc
CHANGED
|
Binary files a/src/models/utils/__pycache__/span_mask.cpython-310.pyc and b/src/models/utils/__pycache__/span_mask.cpython-310.pyc differ
|
|
|
src/models/utils/__pycache__/timm.cpython-310.pyc
CHANGED
|
Binary files a/src/models/utils/__pycache__/timm.cpython-310.pyc and b/src/models/utils/__pycache__/timm.cpython-310.pyc differ
|
|
|
src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc
ADDED
|
Binary file (2.34 kB). View file
|
|
|
src/modules/dac/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (451 Bytes). View file
|
|
|
src/modules/dac/model/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (292 Bytes). View file
|
|
|
src/modules/dac/model/__pycache__/base.cpython-310.pyc
ADDED
|
Binary file (7.19 kB). View file
|
|
|
src/modules/dac/model/__pycache__/dac.cpython-310.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc
ADDED
|
Binary file (7.99 kB). View file
|
|
|
src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (227 Bytes). View file
|
|
|
src/modules/dac/nn/__pycache__/layers.cpython-310.pyc
ADDED
|
Binary file (1.45 kB). View file
|
|
|
src/modules/dac/nn/__pycache__/loss.cpython-310.pyc
ADDED
|
Binary file (11.6 kB). View file
|
|
|
src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc
ADDED
|
Binary file (8.66 kB). View file
|
|
|
src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (2.84 kB). View file
|
|
|
src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (1.2 kB). View file
|
|
|