Update app.py
Browse files
app.py
CHANGED
|
@@ -79,9 +79,14 @@ def gradio_generate(prompt, steps, guidance):
|
|
| 79 |
|
| 80 |
return output_filename
|
| 81 |
|
| 82 |
-
description_text =
|
| 83 |
-
TANGO is
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
# Gradio input and output components
|
| 87 |
input_text = gr.inputs.Textbox(lines=2, label="Prompt")
|
|
@@ -95,7 +100,7 @@ gr_interface = gr.Interface(
|
|
| 95 |
inputs=[input_text, denoising_steps, guidance_scale],
|
| 96 |
outputs=[output_audio],
|
| 97 |
title="TANGO: Text to Audio using Instruction-Guided Diffusion",
|
| 98 |
-
description=
|
| 99 |
allow_flagging=False,
|
| 100 |
examples=[
|
| 101 |
["An audience cheering and clapping"],
|
|
@@ -104,7 +109,9 @@ gr_interface = gr.Interface(
|
|
| 104 |
["A car engine revving"],
|
| 105 |
["A dog barking"],
|
| 106 |
["A cat meowing"],
|
|
|
|
| 107 |
["Emergency sirens wailing"],
|
|
|
|
| 108 |
["Whistling with birds chirping"],
|
| 109 |
["A person snoring"],
|
| 110 |
["Motor vehicles are driving with loud engines and a person whistles"],
|
|
|
|
| 79 |
|
| 80 |
return output_filename
|
| 81 |
|
| 82 |
+
description_text = "Generate audio using TANGO by providing a text prompt. \
|
| 83 |
+
\n\nLimitations: TANGO is trained on the small AudioCaps dataset so it may not generate good audio \
|
| 84 |
+
samples related to concepts that it has not seen in training (e.g. singing). For the same reason, TANGO \
|
| 85 |
+
is not always able to finely control its generations over textual control prompts. For example, \
|
| 86 |
+
the generations from TANGO for prompts Chopping tomatoes on a wooden table and Chopping potatoes \
|
| 87 |
+
on a metal table are very similar. \
|
| 88 |
+
\n\nWe are currently training another version of TANGO on larger datasets to enhance its generalization, \
|
| 89 |
+
compositional and controllable generation ability."
|
| 90 |
|
| 91 |
# Gradio input and output components
|
| 92 |
input_text = gr.inputs.Textbox(lines=2, label="Prompt")
|
|
|
|
| 100 |
inputs=[input_text, denoising_steps, guidance_scale],
|
| 101 |
outputs=[output_audio],
|
| 102 |
title="TANGO: Text to Audio using Instruction-Guided Diffusion",
|
| 103 |
+
description=description_text,
|
| 104 |
allow_flagging=False,
|
| 105 |
examples=[
|
| 106 |
["An audience cheering and clapping"],
|
|
|
|
| 109 |
["A car engine revving"],
|
| 110 |
["A dog barking"],
|
| 111 |
["A cat meowing"],
|
| 112 |
+
["Wooden table tapping sound while water pouring"],
|
| 113 |
["Emergency sirens wailing"],
|
| 114 |
+
["two gunshots followed by birds flying away while chirping"],
|
| 115 |
["Whistling with birds chirping"],
|
| 116 |
["A person snoring"],
|
| 117 |
["Motor vehicles are driving with loud engines and a person whistles"],
|