Spaces:
Configuration error
Configuration error
| from pathlib import Path | |
| import os, tempfile | |
| import numpy as np | |
| import soundfile as sf | |
| import librosa | |
| import torch | |
| import gc | |
| from audio_separator.separator import Separator | |
| def get_vocals(src_path: str, dst_path: str, min_seconds: float = 8) -> str: | |
| """ | |
| If the source audio is shorter than `min_seconds`, pad with trailing silence | |
| in a temporary file, then run separation and save only the vocals to dst_path. | |
| Returns the full path to the vocals file. | |
| """ | |
| default_device = torch.get_default_device() | |
| torch.set_default_device('cpu') | |
| dst = Path(dst_path) | |
| dst.parent.mkdir(parents=True, exist_ok=True) | |
| # Quick duration check | |
| duration = librosa.get_duration(path=src_path) | |
| use_path = src_path | |
| temp_path = None | |
| try: | |
| if duration < min_seconds: | |
| # Load (resample) and pad in memory | |
| y, sr = librosa.load(src_path, sr=None, mono=False) | |
| if y.ndim == 1: # ensure shape (channels, samples) | |
| y = y[np.newaxis, :] | |
| target_len = int(min_seconds * sr) | |
| pad = max(0, target_len - y.shape[1]) | |
| if pad: | |
| y = np.pad(y, ((0, 0), (0, pad)), mode="constant") | |
| # Write a temp WAV for the separator | |
| fd, temp_path = tempfile.mkstemp(suffix=".wav") | |
| os.close(fd) | |
| sf.write(temp_path, y.T, sr) # soundfile expects (frames, channels) | |
| use_path = temp_path | |
| # Run separation: emit only the vocals, with your exact filename | |
| sep = Separator( | |
| output_dir=str(dst.parent), | |
| output_format=(dst.suffix.lstrip(".") or "wav"), | |
| output_single_stem="Vocals", | |
| model_file_dir="ckpts/roformer/" #model_bs_roformer_ep_317_sdr_12.9755.ckpt" | |
| ) | |
| sep.load_model() | |
| out_files = sep.separate(use_path, {"Vocals": dst.stem}) | |
| out = Path(out_files[0]) | |
| return str(out if out.is_absolute() else (dst.parent / out)) | |
| finally: | |
| if temp_path and os.path.exists(temp_path): | |
| os.remove(temp_path) | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| torch.set_default_device(default_device) | |
| # Example: | |
| # final = extract_vocals("in/clip.mp3", "out/vocals.wav") | |
| # print(final) | |