Update demos/musicgen_app.py
Browse files- demos/musicgen_app.py +73 -3
demos/musicgen_app.py
CHANGED
|
@@ -332,7 +332,7 @@ def ui_full(launch_kwargs):
|
|
| 332 |
inputs=[text, melody, model, decoder],
|
| 333 |
outputs=[output]
|
| 334 |
)
|
| 335 |
-
|
| 336 |
"""
|
| 337 |
### More details
|
| 338 |
|
|
@@ -358,7 +358,77 @@ def ui_full(launch_kwargs):
|
|
| 358 |
|
| 359 |
We present 10 model variations:
|
| 360 |
1. facebook/musicgen-melody -- a music generation model capable of generating music condition
|
| 361 |
-
on text and melody inputs.
|
| 362 |
2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
|
| 363 |
3. facebook/musicgen-medium -- a 1.5B transformer decoder conditioned on text only.
|
| 364 |
-
4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
inputs=[text, melody, model, decoder],
|
| 333 |
outputs=[output]
|
| 334 |
)
|
| 335 |
+
gr.Markdown(
|
| 336 |
"""
|
| 337 |
### More details
|
| 338 |
|
|
|
|
| 358 |
|
| 359 |
We present 10 model variations:
|
| 360 |
1. facebook/musicgen-melody -- a music generation model capable of generating music condition
|
| 361 |
+
on text and melody inputs. **Note**, you can also use text only.
|
| 362 |
2. facebook/musicgen-small -- a 300M transformer decoder conditioned on text only.
|
| 363 |
3. facebook/musicgen-medium -- a 1.5B transformer decoder conditioned on text only.
|
| 364 |
+
4. facebook/musicgen-large -- a 3.3B transformer decoder conditioned on text only.
|
| 365 |
+
5. facebook/musicgen-melody-large -- a 3.3B transformer decoder conditioned on text and melody.
|
| 366 |
+
6. facebook/musicgen-stereo-small -- a 300M transformer decoder conditioned on text only, fine tuned for stereo output.
|
| 367 |
+
7. facebook/musicgen-stereo-medium -- a 1.5B transformer decoder conditioned on text only, fine tuned for stereo output.
|
| 368 |
+
8. facebook/musicgen-stereo-melody -- a 1.5B transformer decoder conditioned on text and melody, fine tuned for stereo output.
|
| 369 |
+
9. facebook/musicgen-stereo-large -- a 3.3B transformer decoder conditioned on text only, fine tuned for stereo output.
|
| 370 |
+
10. facebook/musicgen-stereo-melody-large -- a 3.3B transformer decoder conditioned on text and melody, fine tuned for stereo output.
|
| 371 |
+
|
| 372 |
+
We also present two way of decoding the audio tokens:
|
| 373 |
+
1. Use the default GAN based compression model. It can suffer from artifacts especially
|
| 374 |
+
for crashes, snares etc.
|
| 375 |
+
2. Use [MultiBand Diffusion](https://arxiv.org/abs/2308.02560). Should improve the audio quality,
|
| 376 |
+
at an extra computational cost. When this is selected, we provide both the GAN based decoded
|
| 377 |
+
audio, and the one obtained with MBD.
|
| 378 |
+
|
| 379 |
+
See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft/blob/main/docs/MUSICGEN.md)
|
| 380 |
+
for more details.
|
| 381 |
+
"""
|
| 382 |
+
)
|
| 383 |
+
|
| 384 |
+
interface.queue().launch(**launch_kwargs)
|
| 385 |
+
|
| 386 |
+
# --- Main Entry Point ---
|
| 387 |
+
if __name__ == '__main__':
|
| 388 |
+
parser = argparse.ArgumentParser()
|
| 389 |
+
parser.add_argument(
|
| 390 |
+
'--listen',
|
| 391 |
+
type=str,
|
| 392 |
+
default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
|
| 393 |
+
help='IP to listen on for connections to Gradio',
|
| 394 |
+
)
|
| 395 |
+
parser.add_argument(
|
| 396 |
+
'--username', type=str, default='', help='Username for authentication'
|
| 397 |
+
)
|
| 398 |
+
parser.add_argument(
|
| 399 |
+
'--password', type=str, default='', help='Password for authentication'
|
| 400 |
+
)
|
| 401 |
+
parser.add_argument(
|
| 402 |
+
'--server_port',
|
| 403 |
+
type=int,
|
| 404 |
+
default=0,
|
| 405 |
+
help='Port to run the server listener on',
|
| 406 |
+
)
|
| 407 |
+
parser.add_argument(
|
| 408 |
+
'--inbrowser', action='store_true', help='Open in browser'
|
| 409 |
+
)
|
| 410 |
+
parser.add_argument(
|
| 411 |
+
'--share', action='store_true', help='Share the gradio UI'
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
args = parser.parse_args()
|
| 415 |
+
|
| 416 |
+
launch_kwargs = {}
|
| 417 |
+
launch_kwargs['server_name'] = args.listen
|
| 418 |
+
|
| 419 |
+
if args.username and args.password:
|
| 420 |
+
launch_kwargs['auth'] = (args.username, args.password)
|
| 421 |
+
if args.server_port:
|
| 422 |
+
launch_kwargs['server_port'] = args.server_port
|
| 423 |
+
if args.inbrowser:
|
| 424 |
+
launch_kwargs['inbrowser'] = args.inbrowser
|
| 425 |
+
if args.share:
|
| 426 |
+
launch_kwargs['share'] = args.share
|
| 427 |
+
|
| 428 |
+
logging.basicConfig(level=logging.INFO, stream=sys.stderr)
|
| 429 |
+
# Added predictor shutdown
|
| 430 |
+
try:
|
| 431 |
+
ui_full(launch_kwargs)
|
| 432 |
+
finally:
|
| 433 |
+
if _predictor is not None:
|
| 434 |
+
_predictor.shutdown()
|