Spaces:
Runtime error
Runtime error
make it multiplayer (#3)
Browse files- make it multiplayer (9f88b4455c91e04313c6be362fd8ac3713b4d5bf)
- Update README.md (a2fa3a420dbb1f91b30a5b8f7b4398333f2c6d89)
Co-authored-by: Apolinário from multimodal AI art <multimodalart@users.noreply.huggingface.co>
README.md
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
emoji: 💻
|
| 4 |
colorFrom: pink
|
| 5 |
colorTo: red
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Stable Audio Live Multiplayer
|
| 3 |
emoji: 💻
|
| 4 |
colorFrom: pink
|
| 5 |
colorTo: red
|
app.py
CHANGED
|
@@ -10,6 +10,13 @@ import uuid
|
|
| 10 |
from stable_audio_tools import get_pretrained_model
|
| 11 |
from stable_audio_tools.inference.generation import generate_diffusion_cond
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# Load the model outside of the GPU-decorated function
|
| 14 |
def load_model():
|
| 15 |
|
|
@@ -19,7 +26,7 @@ def load_model():
|
|
| 19 |
|
| 20 |
# Function to set up, generate, and process the audio
|
| 21 |
@spaces.GPU(duration=120) # Allocate GPU only when this function is called
|
| 22 |
-
def generate_audio(prompt, sampler_type_dropdown, seconds_total=30, steps=100, cfg_scale=7,sigma_min_slider=0.3,sigma_max_slider=500):
|
| 23 |
print(f"Prompt received: {prompt}")
|
| 24 |
print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
|
| 25 |
|
|
@@ -76,34 +83,54 @@ def generate_audio(prompt, sampler_type_dropdown, seconds_total=30, steps=100, c
|
|
| 76 |
print(f"Audio trimmed to {seconds_total} seconds.")
|
| 77 |
|
| 78 |
# Generate a unique filename for the output
|
| 79 |
-
|
|
|
|
|
|
|
| 80 |
print(f"Saving audio to file: {unique_filename}")
|
| 81 |
|
| 82 |
# Save to file
|
| 83 |
torchaudio.save(unique_filename, output, sample_rate)
|
| 84 |
print(f"Audio saved: {unique_filename}")
|
| 85 |
|
|
|
|
|
|
|
|
|
|
| 86 |
# Return the path to the generated audio file
|
| 87 |
return unique_filename
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
[
|
| 108 |
"Create a serene soundscape of a quiet beach at sunset.", # Text prompt
|
| 109 |
"dpmpp-2m-sde", # Sampler type
|
|
@@ -157,12 +184,70 @@ interface = gr.Interface(
|
|
| 157 |
0.3, # Sigma min
|
| 158 |
500 # Sigma max
|
| 159 |
]
|
| 160 |
-
|
| 161 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
-
# Pre-load the model to avoid multiprocessing issues
|
| 164 |
model, model_config = load_model()
|
| 165 |
|
| 166 |
-
|
| 167 |
-
interface.queue(max_size=10).launch()
|
| 168 |
-
|
|
|
|
| 10 |
from stable_audio_tools import get_pretrained_model
|
| 11 |
from stable_audio_tools.inference.generation import generate_diffusion_cond
|
| 12 |
|
| 13 |
+
PAGE_SIZE = 10
|
| 14 |
+
FILE_DIR_PATH = "/data"
|
| 15 |
+
|
| 16 |
+
theme = gr.themes.Base(
|
| 17 |
+
font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
# Load the model outside of the GPU-decorated function
|
| 21 |
def load_model():
|
| 22 |
|
|
|
|
| 26 |
|
| 27 |
# Function to set up, generate, and process the audio
|
| 28 |
@spaces.GPU(duration=120) # Allocate GPU only when this function is called
|
| 29 |
+
def generate_audio(prompt, sampler_type_dropdown, seconds_total=30, steps=100, cfg_scale=7,sigma_min_slider=0.3,sigma_max_slider=500, progress=gr.Progress(track_tqdm=True)):
|
| 30 |
print(f"Prompt received: {prompt}")
|
| 31 |
print(f"Settings: Duration={seconds_total}s, Steps={steps}, CFG Scale={cfg_scale}")
|
| 32 |
|
|
|
|
| 83 |
print(f"Audio trimmed to {seconds_total} seconds.")
|
| 84 |
|
| 85 |
# Generate a unique filename for the output
|
| 86 |
+
random_uuid = uuid.uuid4().hex
|
| 87 |
+
unique_filename = f"/data/output_{random_uuid}.wav"
|
| 88 |
+
unique_textfile = f"/data/output_{random_uuid}.txt"
|
| 89 |
print(f"Saving audio to file: {unique_filename}")
|
| 90 |
|
| 91 |
# Save to file
|
| 92 |
torchaudio.save(unique_filename, output, sample_rate)
|
| 93 |
print(f"Audio saved: {unique_filename}")
|
| 94 |
|
| 95 |
+
with open(unique_textfile, "w") as file:
|
| 96 |
+
file.write(prompt)
|
| 97 |
+
|
| 98 |
# Return the path to the generated audio file
|
| 99 |
return unique_filename
|
| 100 |
|
| 101 |
+
def list_all_outputs(generation_history):
|
| 102 |
+
directory_path = FILE_DIR_PATH
|
| 103 |
+
files_in_directory = os.listdir(directory_path)
|
| 104 |
+
wav_files = [os.path.join(directory_path, file) for file in files_in_directory if file.endswith('.wav')]
|
| 105 |
+
wav_files.sort(key=lambda x: os.path.getmtime(os.path.join(directory_path, x)), reverse=True)
|
| 106 |
+
history_list = generation_history.split(',') if generation_history else []
|
| 107 |
+
updated_files = [file for file in wav_files if file not in history_list]
|
| 108 |
+
updated_history = updated_files + history_list
|
| 109 |
+
return ','.join(updated_history), gr.update(visible=True)
|
| 110 |
+
|
| 111 |
+
def increase_list_size(list_size):
|
| 112 |
+
return list_size+PAGE_SIZE
|
| 113 |
+
|
| 114 |
+
css = '''
|
| 115 |
+
#live_gen:before {
|
| 116 |
+
content: '';
|
| 117 |
+
animation: svelte-z7cif2-pulseStart 1s cubic-bezier(.4,0,.6,1), svelte-z7cif2-pulse 2s cubic-bezier(.4,0,.6,1) 1s infinite;
|
| 118 |
+
border: 2px solid var(--color-accent);
|
| 119 |
+
background: transparent;
|
| 120 |
+
z-index: var(--layer-1);
|
| 121 |
+
pointer-events: none;
|
| 122 |
+
position: absolute;
|
| 123 |
+
height: 100%;
|
| 124 |
+
width: 100%;
|
| 125 |
+
border-radius: 7px;
|
| 126 |
+
}
|
| 127 |
+
#live_gen_items{
|
| 128 |
+
max-height: 570px;
|
| 129 |
+
overflow-y: scroll;
|
| 130 |
+
}
|
| 131 |
+
'''
|
| 132 |
+
|
| 133 |
+
examples = [
|
| 134 |
[
|
| 135 |
"Create a serene soundscape of a quiet beach at sunset.", # Text prompt
|
| 136 |
"dpmpp-2m-sde", # Sampler type
|
|
|
|
| 184 |
0.3, # Sigma min
|
| 185 |
500 # Sigma max
|
| 186 |
]
|
| 187 |
+
]
|
| 188 |
+
with gr.Blocks(theme=theme, css=css) as demo:
|
| 189 |
+
gr.Markdown("# Stable Audio Multiplayer Live")
|
| 190 |
+
gr.Markdown("Generate audio with text, share and learn from others how to best prompt this new model")
|
| 191 |
+
generation_history = gr.Textbox(visible=False)
|
| 192 |
+
list_size = gr.Number(value=PAGE_SIZE, visible=False)
|
| 193 |
+
with gr.Row():
|
| 194 |
+
with gr.Column():
|
| 195 |
+
prompt = gr.Textbox(label="Prompt", placeholder="Enter your text prompt here")
|
| 196 |
+
btn_run = gr.Button("Generate")
|
| 197 |
+
with gr.Accordion("Parameters", open=True):
|
| 198 |
+
with gr.Row():
|
| 199 |
+
duration = gr.Slider(0, 47, value=20, step=1, label="Duration in Seconds")
|
| 200 |
+
|
| 201 |
+
with gr.Accordion("Advanced parameters", open=False):
|
| 202 |
+
steps = gr.Slider(10, 150, value=80, step=10, label="Number of Diffusion Steps")
|
| 203 |
+
sampler_type = gr.Dropdown(["dpmpp-2m-sde", "dpmpp-3m-sde", "k-heun", "k-lms",
|
| 204 |
+
"k-dpmpp-2s-ancestral", "k-dpm-2", "k-dpm-fast"],
|
| 205 |
+
label="Sampler type", value="dpmpp-3m-sde")
|
| 206 |
+
with gr.Row():
|
| 207 |
+
cfg_scale = gr.Slider(1, 15, value=7, step=0.1, label="CFG Scale")
|
| 208 |
+
sigma_min = gr.Slider(0.0, 5.0, step=0.01, value=0.3, label="Sigma min")
|
| 209 |
+
sigma_max = gr.Slider(0.0, 1000.0, step=0.1, value=500, label="Sigma max")
|
| 210 |
+
with gr.Column() as output_list:
|
| 211 |
+
output = gr.Audio(type="filepath", label="Generated Audio")
|
| 212 |
+
with gr.Column(elem_id="live_gen") as community_list:
|
| 213 |
+
gr.Markdown("# Community generations")
|
| 214 |
+
with gr.Column(elem_id="live_gen_items"):
|
| 215 |
+
@gr.render(inputs=[generation_history, list_size])
|
| 216 |
+
def show_output_list(generation_history, list_size):
|
| 217 |
+
history_list = generation_history.split(',') if generation_history else []
|
| 218 |
+
history_list_latest = history_list[:list_size]
|
| 219 |
+
for generation in history_list_latest:
|
| 220 |
+
generation_prompt_file = generation.replace('.wav', '.txt')
|
| 221 |
+
with open(generation_prompt_file, 'r') as file:
|
| 222 |
+
generation_prompt = file.read()
|
| 223 |
+
with gr.Group():
|
| 224 |
+
gr.Markdown(value=f"### {generation_prompt}")
|
| 225 |
+
gr.Audio(value=generation)
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
load_more = gr.Button("Load more")
|
| 229 |
+
load_more.click(fn=increase_list_size, inputs=list_size, outputs=list_size)
|
| 230 |
+
|
| 231 |
+
gr.Examples(
|
| 232 |
+
fn=generate_audio,
|
| 233 |
+
examples=examples,
|
| 234 |
+
inputs=[prompt, sampler_type, duration, steps, cfg_scale, sigma_min, sigma_max],
|
| 235 |
+
outputs=output,
|
| 236 |
+
cache_examples="lazy"
|
| 237 |
+
)
|
| 238 |
+
gr.on(
|
| 239 |
+
triggers=[btn_run.click, prompt.submit],
|
| 240 |
+
fn=generate_audio,
|
| 241 |
+
inputs=[prompt, sampler_type, duration, steps, cfg_scale, sigma_min, sigma_max],
|
| 242 |
+
outputs=output
|
| 243 |
+
)
|
| 244 |
+
btn_run.click(
|
| 245 |
+
generate_audio,
|
| 246 |
+
inputs=[prompt, sampler_type, duration, steps, cfg_scale, sigma_min, sigma_max],
|
| 247 |
+
outputs=output
|
| 248 |
+
)
|
| 249 |
+
demo.load(fn=list_all_outputs, inputs=generation_history, outputs=[generation_history, community_list], every=2)
|
| 250 |
|
|
|
|
| 251 |
model, model_config = load_model()
|
| 252 |
|
| 253 |
+
demo.launch()
|
|
|
|
|
|