Commit
·
1892f86
1
Parent(s):
bbc001b
add new prompts
Browse files
app.py
CHANGED
|
@@ -1,19 +1,27 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import gradio as gr
|
| 3 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
|
|
|
| 4 |
|
| 5 |
DEBUG_MODE = False
|
| 6 |
|
| 7 |
if not DEBUG_MODE:
|
| 8 |
_ = preload_models()
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
|
| 11 |
|
| 12 |
def gen_tts(text, history_prompt, temp_semantic, temp_waveform):
|
| 13 |
-
|
| 14 |
-
history_prompt = None
|
| 15 |
-
else:
|
| 16 |
-
history_prompt = history_prompt.lower().replace(" ", "_").replace("speaker", "speech")
|
| 17 |
if DEBUG_MODE:
|
| 18 |
audio_arr = np.zeros(SAMPLE_RATE)
|
| 19 |
else:
|
|
@@ -26,10 +34,7 @@ iface = gr.Interface(
|
|
| 26 |
fn=gen_tts,
|
| 27 |
inputs=[
|
| 28 |
gr.Textbox(label="Input Text", lines=3, value=default_text),
|
| 29 |
-
gr.Dropdown(
|
| 30 |
-
["Unconditional"] + [f"Speaker {n}" for n in range(8)] + [f"Music {n}" for n in range(6)],
|
| 31 |
-
value="None", label="Acoustic Prompt", info="This choice primes the model on how to condition the generated audio."
|
| 32 |
-
),
|
| 33 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
|
| 34 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
|
| 35 |
],
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import gradio as gr
|
| 3 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
| 4 |
+
from bark.generation import SUPPORTED_LANGS
|
| 5 |
|
| 6 |
DEBUG_MODE = False
|
| 7 |
|
| 8 |
if not DEBUG_MODE:
|
| 9 |
_ = preload_models()
|
| 10 |
|
| 11 |
+
AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
|
| 12 |
+
PROMPT_LOOKUP = {}
|
| 13 |
+
for _, lang in SUPPORTED_LANGS:
|
| 14 |
+
for n in range(10):
|
| 15 |
+
label = f"Speaker {n} ({lang})"
|
| 16 |
+
AVAILABLE_PROMPTS.append(label)
|
| 17 |
+
PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
|
| 18 |
+
PROMPT_LOOKUP["Unconditional"] = None
|
| 19 |
+
PROMPT_LOOKUP["Announcer"] = "announcer"
|
| 20 |
+
|
| 21 |
default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
|
| 22 |
|
| 23 |
def gen_tts(text, history_prompt, temp_semantic, temp_waveform):
|
| 24 |
+
history_prompt = PROMPT_LOOKUP[history_prompt]
|
|
|
|
|
|
|
|
|
|
| 25 |
if DEBUG_MODE:
|
| 26 |
audio_arr = np.zeros(SAMPLE_RATE)
|
| 27 |
else:
|
|
|
|
| 34 |
fn=gen_tts,
|
| 35 |
inputs=[
|
| 36 |
gr.Textbox(label="Input Text", lines=3, value=default_text),
|
| 37 |
+
gr.Dropdown(AVAILABLE_PROMPTS, value="None", label="Acoustic Prompt", info="This choice primes the model on how to condition the generated audio."),
|
|
|
|
|
|
|
|
|
|
| 38 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
|
| 39 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
|
| 40 |
],
|