Spaces:
Running
Running
| import argparse | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from multiprocessing import cpu_count | |
| from pathlib import Path | |
| from typing import Any | |
| import librosa | |
| import pyloudnorm as pyln | |
| import soundfile | |
| from numpy.typing import NDArray | |
| from tqdm import tqdm | |
| from config import get_config | |
| from style_bert_vits2.logging import logger | |
| from style_bert_vits2.utils.stdout_wrapper import SAFE_STDOUT | |
| DEFAULT_BLOCK_SIZE: float = 0.400 # seconds | |
| class BlockSizeException(Exception): | |
| pass | |
| def normalize_audio(data: NDArray[Any], sr: int): | |
| meter = pyln.Meter(sr, block_size=DEFAULT_BLOCK_SIZE) # create BS.1770 meter | |
| try: | |
| loudness = meter.integrated_loudness(data) | |
| except ValueError as e: | |
| raise BlockSizeException(e) | |
| data = pyln.normalize.loudness(data, loudness, -23.0) | |
| return data | |
| def resample( | |
| file: Path, | |
| input_dir: Path, | |
| output_dir: Path, | |
| target_sr: int, | |
| normalize: bool, | |
| trim: bool, | |
| ): | |
| """ | |
| fileを読み込んで、target_srなwavファイルに変換して、 | |
| output_dirの中に、input_dirからの相対パスを保つように保存する | |
| """ | |
| try: | |
| # librosaが読めるファイルかチェック | |
| # wav以外にもmp3やoggやflacなども読める | |
| wav: NDArray[Any] | |
| sr: int | |
| wav, sr = librosa.load(file, sr=target_sr) | |
| if normalize: | |
| try: | |
| wav = normalize_audio(wav, sr) | |
| except BlockSizeException: | |
| print("") | |
| logger.info( | |
| f"Skip normalize due to less than {DEFAULT_BLOCK_SIZE} second audio: {file}" | |
| ) | |
| if trim: | |
| wav, _ = librosa.effects.trim(wav, top_db=30) | |
| relative_path = file.relative_to(input_dir) | |
| # ここで拡張子が.wav以外でも.wavに置き換えられる | |
| output_path = output_dir / relative_path.with_suffix(".wav") | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| soundfile.write(output_path, wav, sr) | |
| except Exception as e: | |
| logger.warning(f"Cannot load file, so skipping: {file}, {e}") | |
| if __name__ == "__main__": | |
| config = get_config() | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--sr", | |
| type=int, | |
| default=config.resample_config.sampling_rate, | |
| help="sampling rate", | |
| ) | |
| parser.add_argument( | |
| "--input_dir", | |
| "-i", | |
| type=str, | |
| default=config.resample_config.in_dir, | |
| help="path to source dir", | |
| ) | |
| parser.add_argument( | |
| "--output_dir", | |
| "-o", | |
| type=str, | |
| default=config.resample_config.out_dir, | |
| help="path to target dir", | |
| ) | |
| parser.add_argument( | |
| "--num_processes", | |
| type=int, | |
| default=4, | |
| help="cpu_processes", | |
| ) | |
| parser.add_argument( | |
| "--normalize", | |
| action="store_true", | |
| default=False, | |
| help="loudness normalize audio", | |
| ) | |
| parser.add_argument( | |
| "--trim", | |
| action="store_true", | |
| default=False, | |
| help="trim silence (start and end only)", | |
| ) | |
| args = parser.parse_args() | |
| if args.num_processes == 0: | |
| processes = cpu_count() - 2 if cpu_count() > 4 else 1 | |
| else: | |
| processes: int = args.num_processes | |
| input_dir = Path(args.input_dir) | |
| output_dir = Path(args.output_dir) | |
| logger.info(f"Resampling {input_dir} to {output_dir}") | |
| sr = int(args.sr) | |
| normalize: bool = args.normalize | |
| trim: bool = args.trim | |
| # 後でlibrosaに読ませて有効な音声ファイルかチェックするので、全てのファイルを取得 | |
| original_files = [f for f in input_dir.rglob("*") if f.is_file()] | |
| if len(original_files) == 0: | |
| logger.error(f"No files found in {input_dir}") | |
| raise ValueError(f"No files found in {input_dir}") | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| with ThreadPoolExecutor(max_workers=processes) as executor: | |
| futures = [ | |
| executor.submit(resample, file, input_dir, output_dir, sr, normalize, trim) | |
| for file in original_files | |
| ] | |
| for future in tqdm( | |
| as_completed(futures), total=len(original_files), file=SAFE_STDOUT | |
| ): | |
| pass | |
| logger.info("Resampling Done!") | |