update pipeline for model loading
Browse files
app.py
CHANGED
|
@@ -62,6 +62,10 @@ langs = {
|
|
| 62 |
"jp": 1,
|
| 63 |
}
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def gen_song(model_name, spk, texts, durs, pitchs):
|
| 66 |
fs = 44100
|
| 67 |
tempo = 120
|
|
@@ -141,15 +145,19 @@ def gen_song(model_name, spk, texts, durs, pitchs):
|
|
| 141 |
# return (fs, np.array([0.0])), "success!"
|
| 142 |
|
| 143 |
# Infer
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
if model_name == "Model①(Chinese)-zh":
|
| 154 |
sid = np.array([singer_embeddings[model_name][spk]])
|
| 155 |
output_dict = svs(batch, sids=sid)
|
|
@@ -160,7 +168,7 @@ def gen_song(model_name, spk, texts, durs, pitchs):
|
|
| 160 |
wav_info = output_dict["wav"].cpu().numpy()
|
| 161 |
|
| 162 |
# mos prediction with sr=16k
|
| 163 |
-
predictor
|
| 164 |
wav_mos = librosa.resample(wav_info, orig_sr=fs, target_sr=16000)
|
| 165 |
wav_mos = torch.from_numpy(wav_mos).unsqueeze(0)
|
| 166 |
len_mos = torch.tensor([wav_mos.shape[1]])
|
|
|
|
| 62 |
"jp": 1,
|
| 63 |
}
|
| 64 |
|
| 65 |
+
predictor = torch.hub.load("South-Twilight/SingMOS:v0.2.0", "singing_ssl_mos", trust_repo=True)
|
| 66 |
+
exist_model = "Null"
|
| 67 |
+
svs = None
|
| 68 |
+
|
| 69 |
def gen_song(model_name, spk, texts, durs, pitchs):
|
| 70 |
fs = 44100
|
| 71 |
tempo = 120
|
|
|
|
| 145 |
# return (fs, np.array([0.0])), "success!"
|
| 146 |
|
| 147 |
# Infer
|
| 148 |
+
global exist_model
|
| 149 |
+
global svs
|
| 150 |
+
if exist_model == "Null" or exist_model != model_name:
|
| 151 |
+
device = "cpu"
|
| 152 |
+
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 153 |
+
d = ModelDownloader()
|
| 154 |
+
pretrain_downloaded = d.download_and_unpack(PRETRAIN_MODEL)
|
| 155 |
+
svs = SingingGenerate(
|
| 156 |
+
train_config = pretrain_downloaded["train_config"],
|
| 157 |
+
model_file = pretrain_downloaded["model_file"],
|
| 158 |
+
device = device
|
| 159 |
+
)
|
| 160 |
+
exist_model = model_name
|
| 161 |
if model_name == "Model①(Chinese)-zh":
|
| 162 |
sid = np.array([singer_embeddings[model_name][spk]])
|
| 163 |
output_dict = svs(batch, sids=sid)
|
|
|
|
| 168 |
wav_info = output_dict["wav"].cpu().numpy()
|
| 169 |
|
| 170 |
# mos prediction with sr=16k
|
| 171 |
+
global predictor
|
| 172 |
wav_mos = librosa.resample(wav_info, orig_sr=fs, target_sr=16000)
|
| 173 |
wav_mos = torch.from_numpy(wav_mos).unsqueeze(0)
|
| 174 |
len_mos = torch.tensor([wav_mos.shape[1]])
|