Spaces:
Paused
Paused
| """ | |
| This module contains functions to generate song covers using RVC-based voice models. | |
| """ | |
| from typing import Any | |
| from typings.extra import F0Method, InputAudioExt, InputType, OutputAudioExt | |
| import gc | |
| import glob | |
| import os | |
| import shlex | |
| import shutil | |
| import subprocess | |
| from contextlib import suppress | |
| from logging import WARNING | |
| from pathlib import Path, PurePath | |
| from urllib.parse import parse_qs, urlparse | |
| import yt_dlp | |
| import gradio as gr | |
| import soundfile as sf | |
| import sox | |
| from audio_separator.separator import Separator | |
| from pedalboard import Compressor, HighpassFilter, Reverb | |
| from pedalboard._pedalboard import Pedalboard | |
| from pedalboard.io import AudioFile | |
| from pydub import AudioSegment | |
| from pydub import utils as pydub_utils | |
| from vc.rvc import Config, get_vc, load_hubert, rvc_infer | |
| from backend.common import ( | |
| INTERMEDIATE_AUDIO_DIR, | |
| OUTPUT_AUDIO_DIR, | |
| display_progress, | |
| get_file_hash, | |
| get_hash, | |
| get_path_stem, | |
| get_rvc_model, | |
| json_dump, | |
| json_load, | |
| ) | |
| from backend.exceptions import InputMissingError, InvalidPathError, PathNotFoundError | |
| from common import RVC_MODELS_DIR, SEPARATOR_MODELS_DIR | |
| SEPARATOR = Separator( | |
| log_level=WARNING, | |
| model_file_dir=SEPARATOR_MODELS_DIR, | |
| output_dir=INTERMEDIATE_AUDIO_DIR, | |
| mdx_params={ | |
| "hop_length": 1024, | |
| "segment_size": 256, | |
| "overlap": 0.001, | |
| "batch_size": 1, | |
| "enable_denoise": False, | |
| }, | |
| mdxc_params={"segment_size": 256, "batch_size": 1, "overlap": 2}, | |
| ) | |
| def _get_youtube_video_id(url: str, ignore_playlist: bool = True) -> str | None: | |
| """ | |
| Get video id from a YouTube URL. | |
| Parameters | |
| ---------- | |
| url : str | |
| The YouTube URL. | |
| ignore_playlist : bool, default=True | |
| Whether to get id of first video in playlist or the playlist id itself. | |
| Returns | |
| ------- | |
| str | |
| The video id. | |
| """ | |
| query = urlparse(url) | |
| if query.hostname == "youtu.be": | |
| if query.path[1:] == "watch": | |
| return query.query[2:] | |
| return query.path[1:] | |
| if query.hostname in {"www.youtube.com", "youtube.com", "music.youtube.com"}: | |
| if not ignore_playlist: | |
| # use case: get playlist id not current video in playlist | |
| with suppress(KeyError): | |
| return parse_qs(query.query)["list"][0] | |
| if query.path == "/watch": | |
| return parse_qs(query.query)["v"][0] | |
| if query.path[:7] == "/watch/": | |
| return query.path.split("/")[1] | |
| if query.path[:7] == "/embed/": | |
| return query.path.split("/")[2] | |
| if query.path[:3] == "/v/": | |
| return query.path.split("/")[2] | |
| return None | |
| def _yt_download(link: str, song_dir: str) -> str: | |
| """ | |
| Download audio from a YouTube link. | |
| Parameters | |
| ---------- | |
| link : str | |
| The YouTube link. | |
| song_dir : str | |
| The directory to save the downloaded audio to. | |
| Returns | |
| ------- | |
| str | |
| The path to the downloaded audio file. | |
| """ | |
| outtmpl = os.path.join(song_dir, "0_%(title)s_Original") | |
| ydl_opts = { | |
| "quiet": True, | |
| "no_warnings": True, | |
| "format": "bestaudio", | |
| "outtmpl": outtmpl, | |
| "ignoreerrors": True, | |
| "nocheckcertificate": True, | |
| "postprocessors": [ | |
| { | |
| "key": "FFmpegExtractAudio", | |
| "preferredcodec": "wav", | |
| "preferredquality": 0, | |
| } | |
| ], | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| result = ydl.extract_info(link, download=True) | |
| if not result: | |
| raise PathNotFoundError("No audio found in the provided YouTube link!") | |
| download_path = ydl.prepare_filename(result, outtmpl=f"{outtmpl}.wav") | |
| return download_path | |
| def _get_input_audio_paths() -> list[str]: | |
| """ | |
| Get the paths of all cached input audio files. | |
| Returns | |
| ------- | |
| list[str] | |
| The paths of all cached input audio files | |
| """ | |
| # TODO if we later add .json file for input then we need to exclude those here | |
| return glob.glob(os.path.join(INTERMEDIATE_AUDIO_DIR, "*", "0_*_Original*")) | |
| def _get_input_audio_path(song_dir: str) -> str | None: | |
| """ | |
| Get the path of the cached input audio file in a given song directory. | |
| Parameters | |
| ---------- | |
| song_dir : str | |
| The path to a song directory. | |
| Returns | |
| ------- | |
| str | |
| The path of the cached input audio file, if it exists. | |
| """ | |
| # NOTE orig_song_paths should never contain more than one element | |
| return next(iter(glob.glob(os.path.join(song_dir, "0_*_Original*"))), None) | |
| def _pitch_shift(audio_path: str, output_path: str, n_semi_tones: int) -> None: | |
| """ | |
| Pitch-shift an audio file. | |
| Parameters | |
| ---------- | |
| audio_path : str | |
| The path of the audio file to pitch-shift. | |
| output_path : str | |
| The path to save the pitch-shifted audio file to. | |
| n_semi_tones : int | |
| The number of semi-tones to pitch-shift the audio by. | |
| """ | |
| y, sr = sf.read(audio_path) | |
| tfm = sox.Transformer() | |
| tfm.pitch(n_semi_tones) | |
| y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr) | |
| sf.write(output_path, y_shifted, sr) | |
| # TODO consider increasing hash_size to 16 | |
| # otherwise we might have problems with hash collisions | |
| # when using app as CLI | |
| def _get_unique_base_path( | |
| song_dir: str, | |
| prefix: str, | |
| arg_dict: dict[str, Any], | |
| progress_bar: gr.Progress | None = None, | |
| percentage: float = 0.0, | |
| hash_size: int = 5, | |
| ) -> str: | |
| """ | |
| Get a unique base path for an audio file in a song directory | |
| by hashing the arguments used to generate the audio. | |
| Parameters | |
| ---------- | |
| song_dir : str | |
| The path to a song directory. | |
| prefix : str | |
| The prefix to use for the base path. | |
| arg_dict : dict | |
| The dictionary of arguments used to generate the audio in the given file. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentage : float, default=0.0 | |
| Percentage to display in the progress bar. | |
| hash_size : int, default=5 | |
| The size (in bytes) of the hash to use for the base path. | |
| Returns | |
| ------- | |
| str | |
| The unique base path for the audio file. | |
| """ | |
| dict_hash = get_hash(arg_dict, size=hash_size) | |
| while True: | |
| base_path = os.path.join(song_dir, f"{prefix}_{dict_hash}") | |
| json_path = f"{base_path}.json" | |
| if os.path.exists(json_path): | |
| file_dict = json_load(json_path) | |
| if file_dict == arg_dict: | |
| return base_path | |
| display_progress("[~] Rehashing...", percentage, progress_bar) | |
| dict_hash = get_hash(dict_hash, size=hash_size) | |
| else: | |
| return base_path | |
| def _convert_voice( | |
| voice_model: str, | |
| voice_path: str, | |
| output_path: str, | |
| pitch_change: int, | |
| f0_method: F0Method, | |
| index_rate: float, | |
| filter_radius: int, | |
| rms_mix_rate: float, | |
| protect: float, | |
| crepe_hop_length: int, | |
| output_sr: int, | |
| ) -> None: | |
| """ | |
| Convert a voice track using a voice model. | |
| Parameters | |
| ---------- | |
| voice_model : str | |
| The name of the voice model to use. | |
| voice_path : str | |
| The path to the voice track to convert. | |
| output_path : str | |
| The path to save the converted voice to. | |
| pitch_change : int | |
| The number of semi-tones to pitch-shift the converted voice by. | |
| f0_method : F0Method | |
| The method to use for pitch extraction. | |
| index_rate : float | |
| The influence of index file on voice conversion. | |
| filter_radius : int | |
| The filter radius to use for the voice conversion. | |
| rms_mix_rate : float | |
| The blending rate of the volume envelope of converted voice. | |
| protect : float | |
| The protection rate for consonants and breathing sounds. | |
| crepe_hop_length : int | |
| The hop length to use for Crepe pitch extraction method. | |
| output_sr : int | |
| The sample rate to use for the output audio. | |
| """ | |
| rvc_model_path, rvc_index_path = get_rvc_model(voice_model) | |
| device = "cuda:0" | |
| config = Config(device, True) | |
| hubert_model = load_hubert( | |
| device, config.is_half, os.path.join(RVC_MODELS_DIR, "hubert_base.pt") | |
| ) | |
| cpt, version, net_g, tgt_sr, vc = get_vc( | |
| device, config.is_half, config, rvc_model_path | |
| ) | |
| # convert main vocals | |
| rvc_infer( | |
| rvc_index_path, | |
| index_rate, | |
| voice_path, | |
| output_path, | |
| pitch_change, | |
| f0_method, | |
| cpt, | |
| version, | |
| net_g, | |
| filter_radius, | |
| tgt_sr, | |
| rms_mix_rate, | |
| protect, | |
| crepe_hop_length, | |
| vc, | |
| hubert_model, | |
| output_sr, | |
| ) | |
| del hubert_model, cpt | |
| gc.collect() | |
| def _add_audio_effects( | |
| audio_path: str, | |
| output_path: str, | |
| reverb_rm_size: float, | |
| reverb_wet: float, | |
| reverb_dry: float, | |
| reverb_damping: float, | |
| ) -> None: | |
| """ | |
| Add high-pass filter, compressor and reverb effects to an audio file. | |
| Parameters | |
| ---------- | |
| audio_path : str | |
| The path of the audio file to add effects to. | |
| output_path : str | |
| The path to save the effected audio file to. | |
| reverb_rm_size : float | |
| The room size of the reverb effect. | |
| reverb_wet : float | |
| The wet level of the reverb effect. | |
| reverb_dry : float | |
| The dry level of the reverb effect. | |
| reverb_damping : float | |
| The damping of the reverb effect. | |
| """ | |
| board = Pedalboard( | |
| [ | |
| HighpassFilter(), | |
| Compressor(ratio=4, threshold_db=-15), | |
| Reverb( | |
| room_size=reverb_rm_size, | |
| dry_level=reverb_dry, | |
| wet_level=reverb_wet, | |
| damping=reverb_damping, | |
| ), | |
| ] | |
| ) | |
| with AudioFile(audio_path) as f: | |
| with AudioFile(output_path, "w", f.samplerate, f.num_channels) as o: | |
| # Read one second of audio at a time, until the file is empty: | |
| while f.tell() < f.frames: | |
| chunk = f.read(int(f.samplerate)) | |
| effected = board(chunk, f.samplerate, reset=False) | |
| o.write(effected) | |
| def _map_audio_ext(input_audio_ext: InputAudioExt) -> OutputAudioExt: | |
| """ | |
| Map an input audio extension to an output audio extension. | |
| Parameters | |
| ---------- | |
| input_audio_ext : InputAudioExt | |
| The input audio extension. | |
| Returns | |
| ------- | |
| OutputAudioExt | |
| The output audio extension. | |
| """ | |
| match input_audio_ext: | |
| case "m4a": | |
| return "ipod" | |
| case "aac": | |
| return "adts" | |
| case _: | |
| return input_audio_ext | |
| def _mix_audio( | |
| main_vocal_path: str, | |
| backup_vocal_path: str, | |
| instrumental_path: str, | |
| main_gain: int, | |
| backup_gain: int, | |
| inst_gain: int, | |
| output_format: InputAudioExt, | |
| output_sr: int, | |
| output_path: str, | |
| ) -> None: | |
| """ | |
| Mix main vocals, backup vocals and instrumentals. | |
| Parameters | |
| ---------- | |
| main_vocal_path : str | |
| The path of an audio file containing main vocals. | |
| backup_vocal_path : str | |
| The path of an audio file containing backup vocals. | |
| instrumental_path : str | |
| The path of an audio file containing instrumentals. | |
| main_gain : int | |
| The gain to apply to the main vocals. | |
| backup_gain : int | |
| The gain to apply to the backup vocals. | |
| inst_gain : int | |
| The gain to apply to the instrumental. | |
| output_format : InputAudioExt | |
| The format to save the mixed audio file in. | |
| output_sr : int | |
| The sample rate to use for the mixed audio file. | |
| output_path : str | |
| The path to save the mixed audio file to. | |
| """ | |
| main_vocal_audio = AudioSegment.from_wav(main_vocal_path) + main_gain | |
| backup_vocal_audio = AudioSegment.from_wav(backup_vocal_path) + backup_gain | |
| instrumental_audio = AudioSegment.from_wav(instrumental_path) + inst_gain | |
| combined_audio = main_vocal_audio.overlay(backup_vocal_audio).overlay( | |
| instrumental_audio | |
| ) | |
| combined_audio_resampled = combined_audio.set_frame_rate(output_sr) | |
| mapped_output_format = _map_audio_ext(output_format) | |
| combined_audio_resampled.export(output_path, format=mapped_output_format) | |
| def get_named_song_dirs() -> list[tuple[str, str]]: | |
| """ | |
| Get the names and paths of all song directories. | |
| Returns | |
| ------- | |
| list[tuple[str, str]] | |
| A list of tuples containing the name and path of each song directory. | |
| """ | |
| input_paths = _get_input_audio_paths() | |
| named_song_dirs: list[tuple[str, str]] = [] | |
| for path in input_paths: | |
| song_dir, song_basename = os.path.split(path) | |
| song_name = ( | |
| os.path.splitext(song_basename)[0] | |
| .removeprefix("0_") | |
| .removesuffix("_Original") | |
| ) | |
| named_song_dirs.append((song_name, song_dir)) | |
| return sorted(named_song_dirs, key=lambda x: x[0]) | |
| def convert_to_stereo( | |
| song_path: str, | |
| song_dir: str, | |
| progress_bar: gr.Progress | None = None, | |
| percentage: float = 0.0, | |
| ) -> str: | |
| """ | |
| Converts an audio file to stereo. | |
| Parameters | |
| ---------- | |
| song_path : str | |
| The path to the audio file to convert. | |
| song_dir : str | |
| The path to the directory where the stereo audio file will be saved. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentage : float, default=0.0 | |
| Percentage to display in the progress bar. | |
| Returns | |
| ------- | |
| str | |
| The path to the stereo audio file. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no audio file or song directory path is provided. | |
| PathNotFoundError | |
| If the provided audio file or song directory path does not point | |
| to an existing file or directory. | |
| """ | |
| if not song_path: | |
| raise InputMissingError("Input song missing!") | |
| if not os.path.isfile(song_path): | |
| raise PathNotFoundError("Input song does not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("Song directory does not exist!") | |
| stereo_path = song_path | |
| song_info = pydub_utils.mediainfo(song_path) | |
| if song_info["channels"] == "1": | |
| arg_dict = { | |
| "input-files": [ | |
| {"name": os.path.basename(song_path), "hash": get_file_hash(song_path)} | |
| ], | |
| } | |
| stereo_path_base = _get_unique_base_path( | |
| song_dir, "0_Stereo", arg_dict, progress_bar, percentage | |
| ) | |
| stereo_path = f"{stereo_path_base}.wav" | |
| stereo_json_path = f"{stereo_path_base}.json" | |
| if not (os.path.exists(stereo_path) and os.path.exists(stereo_json_path)): | |
| display_progress( | |
| "[~] Converting song to stereo...", percentage, progress_bar | |
| ) | |
| command = shlex.split( | |
| f'ffmpeg -y -loglevel error -i "{song_path}" -ac 2 -f wav' | |
| f' "{stereo_path}"' | |
| ) | |
| subprocess.run(command) | |
| json_dump(arg_dict, stereo_json_path) | |
| return stereo_path | |
| def _make_song_dir( | |
| song_input: str, progress_bar: gr.Progress | None = None, percentage: float = 0.0 | |
| ) -> tuple[str, InputType]: | |
| """ | |
| Create a song directory for a given song input. | |
| * If the song input is a YouTube URL, | |
| the song directory will be named after the video id. | |
| * If the song input is a local audio file, | |
| the song directory will be named after the file hash. | |
| * if the song input is a song directory, | |
| the song directory will be used as is. | |
| Parameters | |
| ---------- | |
| song_input : str | |
| The song input to create a directory for. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentage : float, default=0.0 | |
| Percentage to display in the progress bar. | |
| Returns | |
| ------- | |
| song_dir : str | |
| The path to the created song directory. | |
| input_type : InputType | |
| The type of input provided. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no song input is provided. | |
| InvalidPathError | |
| If the provided YouTube URL is invalid or if the provided song directory | |
| is not located in the root of the intermediate audio directory. | |
| PathNotFoundError | |
| If the provided song input is neither a valid HTTPS-based URL | |
| nor the path of an existing song directory or audio file. | |
| """ | |
| # if song directory | |
| if os.path.isdir(song_input): | |
| if not PurePath(song_input).parent == PurePath(INTERMEDIATE_AUDIO_DIR): | |
| raise InvalidPathError( | |
| "Song directory not located in the root of the intermediate audio" | |
| " directory." | |
| ) | |
| display_progress( | |
| "[~] Using existing song directory...", percentage, progress_bar | |
| ) | |
| input_type = "local" | |
| return song_input, input_type | |
| display_progress("[~] Creating song directory...", percentage, progress_bar) | |
| # if youtube url | |
| if urlparse(song_input).scheme == "https": | |
| input_type = "yt" | |
| song_id = _get_youtube_video_id(song_input) | |
| if song_id is None: | |
| raise InvalidPathError("Invalid YouTube url!") | |
| # if local audio file | |
| elif os.path.isfile(song_input): | |
| input_type = "local" | |
| song_id = get_file_hash(song_input) | |
| else: | |
| raise PathNotFoundError(f"Song input {song_input} does not exist.") | |
| song_dir = os.path.join(INTERMEDIATE_AUDIO_DIR, song_id) | |
| Path(song_dir).mkdir(parents=True, exist_ok=True) | |
| return song_dir, input_type | |
| def retrieve_song( | |
| song_input: str, | |
| progress_bar: gr.Progress | None = None, | |
| percentages: tuple[float, float, float] = (0, 0.33, 0.67), | |
| ) -> tuple[str, str]: | |
| """ | |
| Retrieve a song from a YouTube URL, local audio file or a song directory. | |
| Parameters | |
| ---------- | |
| song_input : str | |
| A Youtube URL, the path of a local audio file | |
| or the path of a song directory. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentages : tuple[float,float,float], default=(0, 0.33, 0.67) | |
| Percentages to display in the progress bar. | |
| Returns | |
| ------- | |
| song_path : str | |
| The path to the retrieved audio file | |
| song_dir : str | |
| The path to the song directory containing it. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no song input is provided. | |
| InvalidPathError | |
| If the provided Youtube URL is invalid or if the provided song directory | |
| is not located in the root of the intermediate audio directory. | |
| PathNotFoundError | |
| If the provided song input is neither a valid HTTPS-based URL | |
| nor the path of an existing song directory or audio file. | |
| """ | |
| if not song_input: | |
| raise InputMissingError( | |
| "Song input missing! Please provide a valid YouTube url, local audio file" | |
| " path or cached song directory path." | |
| ) | |
| song_dir, input_type = _make_song_dir(song_input, progress_bar, percentages[0]) | |
| orig_song_path = _get_input_audio_path(song_dir) | |
| if not orig_song_path: | |
| if input_type == "yt": | |
| display_progress("[~] Downloading song...", percentages[1], progress_bar) | |
| song_link = song_input.split("&")[0] | |
| orig_song_path = _yt_download(song_link, song_dir) | |
| else: | |
| display_progress("[~] Copying song...", percentages[1], progress_bar) | |
| song_input_base = os.path.basename(song_input) | |
| song_input_name, song_input_ext = os.path.splitext(song_input_base) | |
| orig_song_name = f"0_{song_input_name}_Original" | |
| orig_song_path = os.path.join(song_dir, orig_song_name + song_input_ext) | |
| shutil.copyfile(song_input, orig_song_path) | |
| stereo_path = convert_to_stereo( | |
| orig_song_path, song_dir, progress_bar, percentages[2] | |
| ) | |
| return stereo_path, song_dir | |
| def separate_vocals( | |
| song_path: str, | |
| song_dir: str, | |
| stereofy: bool = True, | |
| progress_bar: gr.Progress | None = None, | |
| percentages: tuple[float, float] = (0.0, 0.5), | |
| ) -> tuple[str, str]: | |
| """ | |
| Separate a song into vocals and instrumentals. | |
| Parameters | |
| ---------- | |
| song_path : str | |
| The path to the song to separate. | |
| song_dir : str | |
| The path to the song directory where the | |
| separated vocals and instrumentals will be saved. | |
| stereofy : bool, default=True | |
| Whether to convert the song to stereo | |
| before separating its vocals and instrumentals. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentages : tuple[float,float], default=(0.0, 0.5) | |
| Percentages to display in the progress bar. | |
| Returns | |
| ------- | |
| vocals_path : str | |
| The path to the separated vocals. | |
| instrumentals_path : str | |
| The path to the separated instrumentals. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no song path or song directory path is provided. | |
| PathNotFoundError | |
| If the provided song path or song directory path does not point | |
| to an existing file or directory. | |
| """ | |
| if not song_path: | |
| raise InputMissingError("Input song missing!") | |
| if not os.path.isfile(song_path): | |
| raise PathNotFoundError("Input song does not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("Song directory does not exist!") | |
| song_path = ( | |
| convert_to_stereo(song_path, song_dir, progress_bar, percentages[0]) | |
| if stereofy | |
| else song_path | |
| ) | |
| arg_dict = { | |
| "input-files": [ | |
| {"name": os.path.basename(song_path), "hash": get_file_hash(song_path)} | |
| ], | |
| } | |
| vocals_path_base = _get_unique_base_path( | |
| song_dir, "1_Vocals", arg_dict, progress_bar, percentages[1] | |
| ) | |
| instrumentals_path_base = _get_unique_base_path( | |
| song_dir, "1_Instrumental", arg_dict, progress_bar, percentages[1] | |
| ) | |
| vocals_path = f"{vocals_path_base}.wav" | |
| vocals_json_path = f"{vocals_path_base}.json" | |
| instrumentals_path = f"{instrumentals_path_base}.wav" | |
| instrumentals_json_path = f"{instrumentals_path_base}.json" | |
| if not ( | |
| os.path.exists(vocals_path) | |
| and os.path.exists(vocals_json_path) | |
| and os.path.exists(instrumentals_path) | |
| and os.path.exists(instrumentals_json_path) | |
| ): | |
| display_progress( | |
| "[~] Separating vocals from instrumentals...", percentages[1], progress_bar | |
| ) | |
| SEPARATOR.arch_specific_params["MDX"]["segment_size"] = 512 | |
| SEPARATOR.load_model("UVR-MDX-NET-Voc_FT.onnx") | |
| temp_instrumentals_name, temp_vocals_name = SEPARATOR.separate(song_path) | |
| shutil.move( | |
| os.path.join(INTERMEDIATE_AUDIO_DIR, temp_instrumentals_name), | |
| instrumentals_path, | |
| ) | |
| shutil.move(os.path.join(INTERMEDIATE_AUDIO_DIR, temp_vocals_name), vocals_path) | |
| json_dump(arg_dict, vocals_json_path) | |
| json_dump(arg_dict, instrumentals_json_path) | |
| return vocals_path, instrumentals_path | |
| def separate_main_vocals( | |
| vocals_path: str, | |
| song_dir: str, | |
| stereofy: bool = True, | |
| progress_bar: gr.Progress | None = None, | |
| percentages: tuple[float, float] = (0.0, 0.5), | |
| ) -> tuple[str, str]: | |
| """ | |
| Separate a vocals track into main vocals and backup vocals. | |
| Parameters | |
| ---------- | |
| vocals_path : str | |
| The path to the vocals track to separate. | |
| song_dir : str | |
| The path to the directory where the separated main vocals | |
| and backup vocals will be saved. | |
| stereofy : bool, default=True | |
| Whether to convert the vocals track to stereo | |
| before separating its main vocals and backup vocals. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentages : tuple[float,float], default=(0.0, 0.5) | |
| Percentages to display in the progress bar. | |
| Returns | |
| ------- | |
| main_vocals_path : str | |
| The path to the separated main vocals. | |
| backup_vocals_path : str | |
| The path to the separated backup vocals. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no vocals track path or song directory path is provided. | |
| PathNotFoundError | |
| If the provided vocals path or song directory path does not point | |
| to an existing file or directory. | |
| """ | |
| if not vocals_path: | |
| raise InputMissingError("Vocals missing!") | |
| if not os.path.isfile(vocals_path): | |
| raise PathNotFoundError("Vocals do not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("song directory does not exist!") | |
| vocals_path = ( | |
| convert_to_stereo(vocals_path, song_dir, progress_bar, percentages[0]) | |
| if stereofy | |
| else vocals_path | |
| ) | |
| arg_dict = { | |
| "input-files": [ | |
| {"name": os.path.basename(vocals_path), "hash": get_file_hash(vocals_path)} | |
| ], | |
| } | |
| main_vocals_path_base = _get_unique_base_path( | |
| song_dir, "2_Vocals_Main", arg_dict, progress_bar, percentages[1] | |
| ) | |
| backup_vocals_path_base = _get_unique_base_path( | |
| song_dir, "2_Vocals_Backup", arg_dict, progress_bar, percentages[1] | |
| ) | |
| main_vocals_path = f"{main_vocals_path_base}.wav" | |
| main_vocals_json_path = f"{main_vocals_path_base}.json" | |
| backup_vocals_path = f"{backup_vocals_path_base}.wav" | |
| backup_vocals_json_path = f"{backup_vocals_path_base}.json" | |
| if not ( | |
| os.path.exists(main_vocals_path) | |
| and os.path.exists(main_vocals_json_path) | |
| and os.path.exists(backup_vocals_path) | |
| and os.path.exists(backup_vocals_json_path) | |
| ): | |
| display_progress( | |
| "[~] Separating main vocals from backup vocals...", | |
| percentages[1], | |
| progress_bar, | |
| ) | |
| SEPARATOR.arch_specific_params["MDX"]["segment_size"] = 512 | |
| SEPARATOR.load_model("UVR_MDXNET_KARA_2.onnx") | |
| temp_main_vocals_name, temp_backup_vocals_name = SEPARATOR.separate(vocals_path) | |
| shutil.move( | |
| os.path.join(INTERMEDIATE_AUDIO_DIR, temp_main_vocals_name), | |
| main_vocals_path, | |
| ) | |
| shutil.move( | |
| os.path.join(INTERMEDIATE_AUDIO_DIR, temp_backup_vocals_name), | |
| backup_vocals_path, | |
| ) | |
| json_dump(arg_dict, main_vocals_json_path) | |
| json_dump(arg_dict, backup_vocals_json_path) | |
| return main_vocals_path, backup_vocals_path | |
| def dereverb_vocals( | |
| vocals_path: str, | |
| song_dir: str, | |
| stereofy: bool = True, | |
| progress_bar: gr.Progress | None = None, | |
| percentages: tuple[float, float] = (0.0, 0.5), | |
| ) -> tuple[str, str]: | |
| """ | |
| De-reverb a vocals track. | |
| Parameters | |
| ---------- | |
| vocals_path : str | |
| The path to the vocals track to de-reverb. | |
| song_dir : str | |
| The path to the directory where the de-reverbed vocals will be saved. | |
| stereofy : bool, default=True | |
| Whether to convert the vocals track to stereo before de-reverbing it. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentages : tuple[float,float], default=(0.0, 0.5) | |
| Percentages to display in the progress bar. | |
| Returns | |
| ------- | |
| vocals_dereverb_path : str | |
| The path to the de-reverbed vocals. | |
| vocals_reverb_path : str | |
| The path to the reverb of the vocals. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no vocals track path or song directory path is provided. | |
| PathNotFoundError | |
| If the provided vocals path or song directory path does not point | |
| to an existing file or directory. | |
| """ | |
| if not vocals_path: | |
| raise InputMissingError("Vocals missing!") | |
| if not os.path.isfile(vocals_path): | |
| raise PathNotFoundError("Vocals do not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("song directory does not exist!") | |
| vocals_path = ( | |
| convert_to_stereo(vocals_path, song_dir, progress_bar, percentages[0]) | |
| if stereofy | |
| else vocals_path | |
| ) | |
| arg_dict = { | |
| "input-files": [ | |
| {"name": os.path.basename(vocals_path), "hash": get_file_hash(vocals_path)} | |
| ], | |
| } | |
| vocals_dereverb_path_base = _get_unique_base_path( | |
| song_dir, "3_Vocals_DeReverb", arg_dict, progress_bar, percentages[1] | |
| ) | |
| vocals_reverb_path_base = _get_unique_base_path( | |
| song_dir, "3_Vocals_Reverb", arg_dict, progress_bar, percentages[1] | |
| ) | |
| vocals_dereverb_path = f"{vocals_dereverb_path_base}.wav" | |
| vocals_dereverb_json_path = f"{vocals_dereverb_path_base}.json" | |
| vocals_reverb_path = f"{vocals_reverb_path_base}.wav" | |
| vocals_reverb_json_path = f"{vocals_reverb_path_base}.json" | |
| if not ( | |
| os.path.exists(vocals_dereverb_path) | |
| and os.path.exists(vocals_dereverb_json_path) | |
| and os.path.exists(vocals_reverb_path) | |
| and os.path.exists(vocals_reverb_json_path) | |
| ): | |
| display_progress("[~] De-reverbing vocals...", percentages[1], progress_bar) | |
| SEPARATOR.arch_specific_params["MDX"]["segment_size"] = 256 | |
| SEPARATOR.load_model("Reverb_HQ_By_FoxJoy.onnx") | |
| temp_vocals_dereverb_name, temp_vocals_reverb_name = SEPARATOR.separate( | |
| vocals_path | |
| ) | |
| shutil.move( | |
| os.path.join(INTERMEDIATE_AUDIO_DIR, temp_vocals_dereverb_name), | |
| vocals_dereverb_path, | |
| ) | |
| shutil.move( | |
| os.path.join(INTERMEDIATE_AUDIO_DIR, temp_vocals_reverb_name), | |
| vocals_reverb_path, | |
| ) | |
| json_dump(arg_dict, vocals_dereverb_json_path) | |
| json_dump(arg_dict, vocals_reverb_json_path) | |
| return vocals_dereverb_path, vocals_reverb_path | |
| def convert_vocals( | |
| vocals_path: str, | |
| song_dir: str, | |
| voice_model: str, | |
| pitch_change_octaves: int = 0, | |
| pitch_change_semi_tones: int = 0, | |
| index_rate: float = 0.5, | |
| filter_radius: int = 3, | |
| rms_mix_rate: float = 0.25, | |
| protect: float = 0.33, | |
| f0_method: F0Method = "rmvpe", | |
| crepe_hop_length: int = 128, | |
| progress_bar: gr.Progress | None = None, | |
| percentage: float = 0.0, | |
| ) -> str: | |
| """ | |
| Convert a vocals track using a voice model. | |
| Parameters | |
| ---------- | |
| vocals_path : str | |
| The path to the vocals track to convert. | |
| song_dir : str | |
| The path to the directory where the converted vocals will be saved. | |
| voice_model : str | |
| The name of the voice model to use. | |
| pitch_change_octaves : int, default=0 | |
| The number of octaves to pitch-shift the converted vocals by. | |
| pitch_change_semi_tones : int, default=0 | |
| The number of semi-tones to pitch-shift the converted vocals by. | |
| index_rate : float, default=0.5 | |
| The influence of the index file on the vocal conversion. | |
| filter_radius : int, default=3 | |
| The filter radius to use for the vocal conversion. | |
| rms_mix_rate : float, default=0.25 | |
| The blending rate of the volume envelope of the converted vocals. | |
| protect : float, default=0.33 | |
| The protection rate for consonants and breathing sounds. | |
| f0_method : F0Method, default="rmvpe" | |
| The method to use for pitch extraction. | |
| crepe_hop_length : int, default=128 | |
| The hop length to use for crepe-based pitch extraction. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentage : float, default=0.0 | |
| Percentage to display in the progress bar. | |
| Returns | |
| ------- | |
| str | |
| The path to the converted vocals. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no vocals track path, song directory path or voice model name is provided. | |
| PathNotFoundError | |
| If the provided vocals path, song directory path or voice model name | |
| does not point to an existing file or directory. | |
| """ | |
| if not vocals_path: | |
| raise InputMissingError("Vocals missing!") | |
| if not os.path.isfile(vocals_path): | |
| raise PathNotFoundError("Vocals do not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("song directory does not exist!") | |
| if not voice_model: | |
| raise InputMissingError("Voice model missing!") | |
| if not os.path.isdir(os.path.join(RVC_MODELS_DIR, voice_model)): | |
| raise PathNotFoundError("Voice model does not exist!") | |
| pitch_change = pitch_change_octaves * 12 + pitch_change_semi_tones | |
| hop_length_suffix = "" if f0_method != "mangio-crepe" else f"_{crepe_hop_length}" | |
| arg_dict = { | |
| "input-files": [ | |
| {"name": os.path.basename(vocals_path), "hash": get_file_hash(vocals_path)} | |
| ], | |
| "voice-model": voice_model, | |
| "pitch-shift": pitch_change, | |
| "index-rate": index_rate, | |
| "filter-radius": filter_radius, | |
| "rms-mix-rate": rms_mix_rate, | |
| "protect": protect, | |
| "f0-method": f"{f0_method}{hop_length_suffix}", | |
| } | |
| converted_vocals_path_base = _get_unique_base_path( | |
| song_dir, "4_Vocals_Converted", arg_dict, progress_bar, percentage | |
| ) | |
| converted_vocals_path = f"{converted_vocals_path_base}.wav" | |
| converted_vocals_json_path = f"{converted_vocals_path_base}.json" | |
| if not ( | |
| os.path.exists(converted_vocals_path) | |
| and os.path.exists(converted_vocals_json_path) | |
| ): | |
| display_progress("[~] Converting vocals using RVC...", percentage, progress_bar) | |
| _convert_voice( | |
| voice_model, | |
| vocals_path, | |
| converted_vocals_path, | |
| pitch_change, | |
| f0_method, | |
| index_rate, | |
| filter_radius, | |
| rms_mix_rate, | |
| protect, | |
| crepe_hop_length, | |
| 44100, | |
| ) | |
| json_dump(arg_dict, converted_vocals_json_path) | |
| return converted_vocals_path | |
| def postprocess_vocals( | |
| vocals_path: str, | |
| song_dir: str, | |
| reverb_rm_size: float = 0.15, | |
| reverb_wet: float = 0.2, | |
| reverb_dry: float = 0.8, | |
| reverb_damping: float = 0.7, | |
| progress_bar: gr.Progress | None = None, | |
| percentage: float = 0.0, | |
| ) -> str: | |
| """ | |
| Apply high-pass filter, compressor and reverb effects to a vocals track. | |
| Parameters | |
| ---------- | |
| vocals_path : str | |
| The path to the vocals track to add effects to. | |
| song_dir : str | |
| The path to the directory where the effected vocals will be saved. | |
| reverb_rm_size : float, default=0.15 | |
| The room size of the reverb effect. | |
| reverb_wet : float, default=0.2 | |
| The wet level of the reverb effect. | |
| reverb_dry : float, default=0.8 | |
| The dry level of the reverb effect. | |
| reverb_damping : float, default=0.7 | |
| The damping of the reverb effect. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentage : float, default=0.0 | |
| Percentage to display in the progress bar. | |
| Returns | |
| ------- | |
| str | |
| The path to the effected vocals. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no vocals track path or song directory path is provided. | |
| PathNotFoundError | |
| If the provided vocals path or song directory path does not point | |
| to an existing file or directory. | |
| """ | |
| if not vocals_path: | |
| raise InputMissingError("Vocals missing!") | |
| if not os.path.isfile(vocals_path): | |
| raise PathNotFoundError("Vocals do not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("song directory does not exist!") | |
| arg_dict = { | |
| "input-files": [ | |
| {"name": os.path.basename(vocals_path), "hash": get_file_hash(vocals_path)} | |
| ], | |
| "reverb-room-size": reverb_rm_size, | |
| "reverb-wet": reverb_wet, | |
| "reverb-dry": reverb_dry, | |
| "reverb-damping": reverb_damping, | |
| } | |
| vocals_mixed_path_base = _get_unique_base_path( | |
| song_dir, "5_Vocals_Postprocessed", arg_dict, progress_bar, percentage | |
| ) | |
| vocals_mixed_path = f"{vocals_mixed_path_base}.wav" | |
| vocals_mixed_json_path = f"{vocals_mixed_path_base}.json" | |
| if not ( | |
| os.path.exists(vocals_mixed_path) and os.path.exists(vocals_mixed_json_path) | |
| ): | |
| display_progress( | |
| "[~] Applying audio effects to vocals...", percentage, progress_bar | |
| ) | |
| _add_audio_effects( | |
| vocals_path, | |
| vocals_mixed_path, | |
| reverb_rm_size, | |
| reverb_wet, | |
| reverb_dry, | |
| reverb_damping, | |
| ) | |
| json_dump(arg_dict, vocals_mixed_json_path) | |
| return vocals_mixed_path | |
| def pitch_shift_background( | |
| instrumentals_path: str, | |
| backup_vocals_path: str, | |
| song_dir: str, | |
| pitch_change: int = 0, | |
| progress_bar: gr.Progress | None = None, | |
| percentages: tuple[float, float] = (0.0, 0.5), | |
| ) -> tuple[str, str]: | |
| """ | |
| Pitch shift instrumentals and backup vocals by a given number of semi-tones. | |
| Parameters | |
| ---------- | |
| instrumentals_path : str | |
| The path to the instrumentals to pitch shift. | |
| backup_vocals_path : str | |
| The path to the backup vocals to pitch shift. | |
| song_dir : str | |
| The path to the directory where the pitch-shifted instrumentals | |
| and backup vocals will be saved. | |
| pitch_change : int, default=0 | |
| The number of semi-tones to pitch-shift the instrumentals | |
| and backup vocals by. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentages : tuple[float,float], default=(0.0, 0.5) | |
| Percentages to display in the progress bar. | |
| Returns | |
| ------- | |
| instrumentals_shifted_path : str | |
| The path to the pitch-shifted instrumentals. | |
| backup_vocals_shifted_path : str | |
| The path to the pitch-shifted backup vocals. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no instrumentals path, backup vocals path or song directory path is provided. | |
| PathNotFoundError | |
| If the provided instrumentals path, backup vocals path or song directory path | |
| does not point to an existing file or directory. | |
| """ | |
| if not instrumentals_path: | |
| raise InputMissingError("Instrumentals missing!") | |
| if not os.path.isfile(instrumentals_path): | |
| raise PathNotFoundError("Instrumentals do not exist!") | |
| if not backup_vocals_path: | |
| raise InputMissingError("Backup vocals missing!") | |
| if not os.path.isfile(backup_vocals_path): | |
| raise PathNotFoundError("Backup vocals do not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("song directory does not exist!") | |
| instrumentals_shifted_path = instrumentals_path | |
| backup_vocals_shifted_path = backup_vocals_path | |
| if pitch_change != 0: | |
| instrumentals_dict = { | |
| "input-files": [ | |
| { | |
| "name": os.path.basename(instrumentals_path), | |
| "hash": get_file_hash(instrumentals_path), | |
| } | |
| ], | |
| "pitch-shift": pitch_change, | |
| } | |
| instrumentals_shifted_path_base = _get_unique_base_path( | |
| song_dir, | |
| "6_Instrumental_Shifted", | |
| instrumentals_dict, | |
| progress_bar, | |
| percentages[0], | |
| ) | |
| instrumentals_shifted_path = f"{instrumentals_shifted_path_base}.wav" | |
| instrumentals_shifted_json_path = f"{instrumentals_shifted_path_base}.json" | |
| if not ( | |
| os.path.exists(instrumentals_shifted_path) | |
| and os.path.exists(instrumentals_shifted_json_path) | |
| ): | |
| display_progress( | |
| "[~] Applying pitch shift to instrumentals", | |
| percentages[0], | |
| progress_bar, | |
| ) | |
| _pitch_shift(instrumentals_path, instrumentals_shifted_path, pitch_change) | |
| json_dump(instrumentals_dict, instrumentals_shifted_json_path) | |
| backup_vocals_dict = { | |
| "input-files": [ | |
| { | |
| "name": os.path.basename(backup_vocals_path), | |
| "hash": get_file_hash(backup_vocals_path), | |
| } | |
| ], | |
| "pitch-shift": pitch_change, | |
| } | |
| backup_vocals_shifted_path_base = _get_unique_base_path( | |
| song_dir, | |
| "6_Vocals_Backup_Shifted", | |
| backup_vocals_dict, | |
| progress_bar, | |
| percentages[1], | |
| ) | |
| backup_vocals_shifted_path = f"{backup_vocals_shifted_path_base}.wav" | |
| backup_vocals_shifted_json_path = f"{backup_vocals_shifted_path_base}.json" | |
| if not ( | |
| os.path.exists(backup_vocals_shifted_path) | |
| and os.path.exists(backup_vocals_shifted_json_path) | |
| ): | |
| display_progress( | |
| "[~] Applying pitch shift to backup vocals", | |
| percentages[1], | |
| progress_bar, | |
| ) | |
| _pitch_shift(backup_vocals_path, backup_vocals_shifted_path, pitch_change) | |
| json_dump(backup_vocals_dict, backup_vocals_shifted_json_path) | |
| return instrumentals_shifted_path, backup_vocals_shifted_path | |
| def _get_voice_model( | |
| mixed_vocals_path: str | None = None, song_dir: str | None = None | |
| ) -> str: | |
| """ | |
| Infer the voice model used for vocal conversion from a | |
| mixed vocals file in a given song directory. | |
| If the voice model cannot be inferred, "Unknown" is returned. | |
| Parameters | |
| ---------- | |
| mixed_vocals_path : str, optional | |
| The path to a mixed vocals file. | |
| song_dir : str, optional | |
| The path to a song directory. | |
| Returns | |
| ------- | |
| str | |
| The voice model used for vocal conversion. | |
| """ | |
| voice_model = "Unknown" | |
| if not (mixed_vocals_path and song_dir): | |
| return voice_model | |
| mixed_vocals_stem = get_path_stem(mixed_vocals_path) | |
| mixed_vocals_json_path = os.path.join(song_dir, f"{mixed_vocals_stem}.json") | |
| if not os.path.isfile(mixed_vocals_json_path): | |
| return voice_model | |
| mixed_vocals_json_dict = json_load(mixed_vocals_json_path) | |
| input_files = mixed_vocals_json_dict.get("input-files") | |
| input_path = input_files[0].get("name") if input_files else None | |
| if not input_path: | |
| return voice_model | |
| input_stem = get_path_stem(input_path) | |
| converted_vocals_json_path = os.path.join(song_dir, f"{input_stem}.json") | |
| if not os.path.isfile(converted_vocals_json_path): | |
| return voice_model | |
| converted_vocals_dict = json_load(converted_vocals_json_path) | |
| return converted_vocals_dict.get("voice-model", voice_model) | |
| def get_song_cover_name( | |
| mixed_vocals_path: str | None = None, | |
| song_dir: str | None = None, | |
| voice_model: str | None = None, | |
| progress_bar: gr.Progress | None = None, | |
| percentage: float = 0.0, | |
| ) -> str: | |
| """ | |
| Generates a suitable name for a cover of a song based on that song's | |
| original name and the voice model used for vocal conversion. | |
| If the path of an existing song directory is provided, the original song | |
| name is inferred from that directory. If a voice model is not provided but | |
| the path of an existing song directory and the path of a mixed | |
| vocals file in that directory are provided, then the voice model is | |
| inferred from the mixed vocals file. | |
| Parameters | |
| ---------- | |
| mixed_vocals_path : str, optional | |
| The path to a mixed vocals file. | |
| song_dir : str, optional | |
| The path to a song directory. | |
| voice_model : str, optional | |
| A voice model name. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentage : float, default=0.0 | |
| Percentage to display in the progress bar. | |
| Returns | |
| ------- | |
| str | |
| The song cover name | |
| """ | |
| display_progress("[~] Getting song cover name...", percentage, progress_bar) | |
| orig_song_path = _get_input_audio_path(song_dir) if song_dir else None | |
| orig_song_name = ( | |
| (get_path_stem(orig_song_path).removeprefix("0_").removesuffix("_Original")) | |
| if orig_song_path | |
| else "Unknown" | |
| ) | |
| voice_model = voice_model or _get_voice_model(mixed_vocals_path, song_dir) | |
| return f"{orig_song_name} ({voice_model} Ver)" | |
| def mix_song_cover( | |
| main_vocals_path: str, | |
| instrumentals_path: str, | |
| backup_vocals_path: str, | |
| song_dir: str, | |
| main_gain: int = 0, | |
| inst_gain: int = 0, | |
| backup_gain: int = 0, | |
| output_sr: int = 44100, | |
| output_format: InputAudioExt = "mp3", | |
| output_name: str | None = None, | |
| progress_bar: gr.Progress | None = None, | |
| percentages: tuple[float, float] = (0.0, 0.5), | |
| ) -> str: | |
| """ | |
| Mix main vocals, instrumentals, and backup vocals to create a song cover. | |
| Parameters | |
| ---------- | |
| main_vocals_path : str | |
| The path to the main vocals to mix. | |
| instrumentals_path : str | |
| The path to the instrumentals to mix. | |
| backup_vocals_path : str | |
| The path to the backup vocals to mix. | |
| song_dir : str | |
| The path to the song directory where the song cover will be saved. | |
| main_gain : int, default=0 | |
| The gain to apply to the main vocals. | |
| inst_gain : int, default=0 | |
| The gain to apply to the instrumentals. | |
| backup_gain : int, default=0 | |
| The gain to apply to the backup vocals. | |
| output_sr : int, default=44100 | |
| The sample rate of the song cover. | |
| output_format : InputAudioExt, default="mp3" | |
| The audio format of the song cover. | |
| output_name : str, optional | |
| The name of the song cover. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| percentages : tuple[float,float], default=(0.0, 0.5) | |
| Percentages to display in the progress bar. | |
| Returns | |
| ------- | |
| str | |
| The path to the song cover. | |
| Raises | |
| ------ | |
| InputMissingError | |
| If no main vocals, instrumentals, backup vocals or song directory path is provided. | |
| PathNotFoundError | |
| If the provided main vocals, instrumentals, backup vocals or song directory path | |
| does not point to an existing file or directory. | |
| """ | |
| if not main_vocals_path: | |
| raise InputMissingError("Main vocals missing!") | |
| if not os.path.isfile(main_vocals_path): | |
| raise PathNotFoundError("Main vocals do not exist!") | |
| if not instrumentals_path: | |
| raise InputMissingError("Instrumentals missing!") | |
| if not os.path.isfile(instrumentals_path): | |
| raise PathNotFoundError("Instrumentals do not exist!") | |
| if not backup_vocals_path: | |
| raise InputMissingError("Backup vocals missing!") | |
| if not os.path.isfile(backup_vocals_path): | |
| raise PathNotFoundError("Backup vocals do not exist!") | |
| if not song_dir: | |
| raise InputMissingError("Song directory missing!") | |
| if not os.path.isdir(song_dir): | |
| raise PathNotFoundError("song directory does not exist!") | |
| arg_dict = { | |
| "input-files": [ | |
| { | |
| "name": os.path.basename(main_vocals_path), | |
| "hash": get_file_hash(main_vocals_path), | |
| }, | |
| { | |
| "name": os.path.basename(instrumentals_path), | |
| "hash": get_file_hash(instrumentals_path), | |
| }, | |
| { | |
| "name": os.path.basename(backup_vocals_path), | |
| "hash": get_file_hash(backup_vocals_path), | |
| }, | |
| ], | |
| "main-gain": main_gain, | |
| "instrument-gain": inst_gain, | |
| "backup-gain": backup_gain, | |
| "sample-rate": output_sr, | |
| } | |
| mixdown_path_base = _get_unique_base_path( | |
| song_dir, "7_Mixdown", arg_dict, progress_bar, percentages[0] | |
| ) | |
| mixdown_path = f"{mixdown_path_base}.{output_format}" | |
| mixdown_json_path = f"{mixdown_path_base}.json" | |
| if not (os.path.exists(mixdown_path) and os.path.exists(mixdown_json_path)): | |
| display_progress( | |
| "[~] Mixing main vocals, instrumentals, and backup vocals...", | |
| percentages[0], | |
| progress_bar, | |
| ) | |
| _mix_audio( | |
| main_vocals_path, | |
| backup_vocals_path, | |
| instrumentals_path, | |
| main_gain, | |
| backup_gain, | |
| inst_gain, | |
| output_format, | |
| output_sr, | |
| mixdown_path, | |
| ) | |
| json_dump(arg_dict, mixdown_json_path) | |
| output_name = output_name or get_song_cover_name( | |
| main_vocals_path, song_dir, None, progress_bar, percentages[1] | |
| ) | |
| song_cover_path = os.path.join(OUTPUT_AUDIO_DIR, f"{output_name}.{output_format}") | |
| os.makedirs(OUTPUT_AUDIO_DIR, exist_ok=True) | |
| shutil.copyfile(mixdown_path, song_cover_path) | |
| return song_cover_path | |
| def run_pipeline( | |
| song_input: str, | |
| voice_model: str, | |
| pitch_change_vocals: int = 0, | |
| pitch_change_all: int = 0, | |
| index_rate: float = 0.5, | |
| filter_radius: int = 3, | |
| rms_mix_rate: float = 0.25, | |
| protect: float = 0.33, | |
| f0_method: F0Method = "rmvpe", | |
| crepe_hop_length: int = 128, | |
| reverb_rm_size: float = 0.15, | |
| reverb_wet: float = 0.2, | |
| reverb_dry: float = 0.8, | |
| reverb_damping: float = 0.7, | |
| main_gain: int = 0, | |
| inst_gain: int = 0, | |
| backup_gain: int = 0, | |
| output_sr: int = 44100, | |
| output_format: InputAudioExt = "mp3", | |
| output_name: str | None = None, | |
| return_files: bool = False, | |
| progress_bar: gr.Progress | None = None, | |
| ) -> str | tuple[str, ...]: | |
| """ | |
| Run the song cover generation pipeline. | |
| Parameters | |
| ---------- | |
| song_input : str | |
| A Youtube URL, the path of a local audio file or the path of a song directory. | |
| voice_model : str | |
| The name of the voice model to use for vocal conversion. | |
| pitch_change_vocals : int, default=0 | |
| The number of octaves to pitch-shift the converted vocals by. | |
| pitch_change_all : int, default=0 | |
| The number of semi-tones to pitch-shift the converted vocals, | |
| instrumentals, and backup vocals by. | |
| index_rate : float, default=0.5 | |
| The influence of the index file on the vocal conversion. | |
| filter_radius : int, default=3 | |
| The filter radius to use for the vocal conversion. | |
| rms_mix_rate : float, default=0.25 | |
| The blending rate of the volume envelope of the converted vocals. | |
| protect : float, default=0.33 | |
| The protection rate for consonants and breathing sounds in the vocal conversion. | |
| f0_method : F0Method, default="rmvpe" | |
| The method to use for pitch extraction in the vocal conversion. | |
| crepe_hop_length : int, default=128 | |
| The hop length to use for crepe-based pitch extraction. | |
| reverb_rm_size : float, default=0.15 | |
| The room size of the reverb effect to apply to the converted vocals. | |
| reverb_wet : float, default=0.2 | |
| The wet level of the reverb effect to apply to the converted vocals. | |
| reverb_dry : float, default=0.8 | |
| The dry level of the reverb effect to apply to the converted vocals. | |
| reverb_damping : float, default=0.7 | |
| The damping of the reverb effect to apply to the converted vocals. | |
| main_gain : int, default=0 | |
| The gain to apply to the post-processed vocals. | |
| inst_gain : int, default=0 | |
| The gain to apply to the pitch-shifted instrumentals. | |
| backup_gain : int, default=0 | |
| The gain to apply to the pitch-shifted backup vocals. | |
| output_sr : int, default=44100 | |
| The sample rate of the song cover. | |
| output_format : InputAudioExt, default="mp3" | |
| The audio format of the song cover. | |
| output_name : str, optional | |
| The name of the song cover. | |
| return_files : bool, default=False | |
| Whether to return the paths of the generated intermediate audio files. | |
| progress_bar : gr.Progress, optional | |
| Gradio progress bar to update. | |
| Returns | |
| ------- | |
| str | tuple[str,...] | |
| The path to the generated song cover and, if `return_files=True`, | |
| also the paths of any generated intermediate audio files. | |
| """ | |
| if not song_input: | |
| raise InputMissingError( | |
| "Song input missing! Please provide a valid YouTube url, local audio file" | |
| " path or cached song directory path." | |
| ) | |
| if not voice_model: | |
| raise InputMissingError("Voice model missing!") | |
| if not os.path.isdir(os.path.join(RVC_MODELS_DIR, voice_model)): | |
| raise PathNotFoundError("Voice model does not exist!") | |
| display_progress("[~] Starting song cover generation pipeline...", 0, progress_bar) | |
| orig_song_path, song_dir = retrieve_song( | |
| song_input, progress_bar, (0 / 15, 1 / 15, 2 / 15) | |
| ) | |
| vocals_path, instrumentals_path = separate_vocals( | |
| orig_song_path, song_dir, False, progress_bar, (3 / 15, 4 / 15) | |
| ) | |
| main_vocals_path, backup_vocals_path = separate_main_vocals( | |
| vocals_path, song_dir, False, progress_bar, (5 / 15, 6 / 15) | |
| ) | |
| vocals_dereverb_path, reverb_path = dereverb_vocals( | |
| main_vocals_path, song_dir, False, progress_bar, (7 / 15, 8 / 15) | |
| ) | |
| converted_vocals_path = convert_vocals( | |
| vocals_dereverb_path, | |
| song_dir, | |
| voice_model, | |
| pitch_change_vocals, | |
| pitch_change_all, | |
| index_rate, | |
| filter_radius, | |
| rms_mix_rate, | |
| protect, | |
| f0_method, | |
| crepe_hop_length, | |
| progress_bar, | |
| 9 / 15, | |
| ) | |
| vocals_mixed_path = postprocess_vocals( | |
| converted_vocals_path, | |
| song_dir, | |
| reverb_rm_size, | |
| reverb_wet, | |
| reverb_dry, | |
| reverb_damping, | |
| progress_bar, | |
| 10 / 15, | |
| ) | |
| instrumentals_shifted_path, backup_vocals_shifted_path = pitch_shift_background( | |
| instrumentals_path, | |
| backup_vocals_path, | |
| song_dir, | |
| pitch_change_all, | |
| progress_bar, | |
| (11 / 15, 12 / 15), | |
| ) | |
| song_cover_path = mix_song_cover( | |
| vocals_mixed_path, | |
| instrumentals_shifted_path or instrumentals_path, | |
| backup_vocals_shifted_path or backup_vocals_path, | |
| song_dir, | |
| main_gain, | |
| inst_gain, | |
| backup_gain, | |
| output_sr, | |
| output_format, | |
| output_name, | |
| progress_bar, | |
| (13 / 15, 14 / 15), | |
| ) | |
| if return_files: | |
| return ( | |
| orig_song_path, | |
| vocals_path, | |
| instrumentals_path, | |
| main_vocals_path, | |
| backup_vocals_path, | |
| vocals_dereverb_path, | |
| reverb_path, | |
| converted_vocals_path, | |
| vocals_mixed_path, | |
| instrumentals_shifted_path, | |
| backup_vocals_shifted_path, | |
| song_cover_path, | |
| ) | |
| else: | |
| return song_cover_path | |