Spaces:
Running
Running
| import json | |
| from pathlib import Path | |
| from typing import Union | |
| import numpy as np | |
| from style_bert_vits2.constants import DEFAULT_STYLE | |
| from style_bert_vits2.logging import logger | |
| def save_neutral_vector( | |
| wav_dir: Union[Path, str], | |
| output_dir: Union[Path, str], | |
| config_path: Union[Path, str], | |
| config_output_path: Union[Path, str], | |
| ): | |
| wav_dir = Path(wav_dir) | |
| output_dir = Path(output_dir) | |
| embs = [] | |
| for file in wav_dir.rglob("*.npy"): | |
| xvec = np.load(file) | |
| embs.append(np.expand_dims(xvec, axis=0)) | |
| x = np.concatenate(embs, axis=0) # (N, 256) | |
| mean = np.mean(x, axis=0) # (256,) | |
| only_mean = np.stack([mean]) # (1, 256) | |
| np.save(output_dir / "style_vectors.npy", only_mean) | |
| logger.info(f"Saved mean style vector to {output_dir}") | |
| with open(config_path, encoding="utf-8") as f: | |
| json_dict = json.load(f) | |
| json_dict["data"]["num_styles"] = 1 | |
| json_dict["data"]["style2id"] = {DEFAULT_STYLE: 0} | |
| with open(config_output_path, "w", encoding="utf-8") as f: | |
| json.dump(json_dict, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Saved style config to {config_output_path}") | |
| def save_styles_by_dirs( | |
| wav_dir: Union[Path, str], | |
| output_dir: Union[Path, str], | |
| config_path: Union[Path, str], | |
| config_output_path: Union[Path, str], | |
| ): | |
| wav_dir = Path(wav_dir) | |
| output_dir = Path(output_dir) | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| config_path = Path(config_path) | |
| config_output_path = Path(config_output_path) | |
| subdirs = [d for d in wav_dir.iterdir() if d.is_dir()] | |
| subdirs.sort() | |
| if len(subdirs) in (0, 1): | |
| logger.info( | |
| f"At least 2 subdirectories are required for generating style vectors with respect to them, found {len(subdirs)}." | |
| ) | |
| logger.info("Generating only neutral style vector instead.") | |
| save_neutral_vector(wav_dir, output_dir, config_path, config_output_path) | |
| return | |
| # First get mean of all for Neutral | |
| embs = [] | |
| for file in wav_dir.rglob("*.npy"): | |
| xvec = np.load(file) | |
| embs.append(np.expand_dims(xvec, axis=0)) | |
| x = np.concatenate(embs, axis=0) # (N, 256) | |
| mean = np.mean(x, axis=0) # (256,) | |
| style_vectors = [mean] | |
| names = [DEFAULT_STYLE] | |
| for style_dir in subdirs: | |
| npy_files = list(style_dir.rglob("*.npy")) | |
| if not npy_files: | |
| continue | |
| embs = [] | |
| for file in npy_files: | |
| xvec = np.load(file) | |
| embs.append(np.expand_dims(xvec, axis=0)) | |
| x = np.concatenate(embs, axis=0) # (N, 256) | |
| mean = np.mean(x, axis=0) # (256,) | |
| style_vectors.append(mean) | |
| names.append(style_dir.name) | |
| # Stack them to make (num_styles, 256) | |
| style_vectors_npy = np.stack(style_vectors, axis=0) | |
| np.save(output_dir / "style_vectors.npy", style_vectors_npy) | |
| logger.info(f"Saved style vectors to {output_dir / 'style_vectors.npy'}") | |
| # Save style2id config to json | |
| style2id = {name: i for i, name in enumerate(names)} | |
| with open(config_path, encoding="utf-8") as f: | |
| json_dict = json.load(f) | |
| json_dict["data"]["num_styles"] = len(names) | |
| json_dict["data"]["style2id"] = style2id | |
| with open(config_output_path, "w", encoding="utf-8") as f: | |
| json.dump(json_dict, f, indent=2, ensure_ascii=False) | |
| logger.info(f"Saved style config to {config_output_path}") | |