积极的屁孩
commited on
Commit
·
defde46
1
Parent(s):
a8377f8
trying to fix vevo style
Browse files
app.py
CHANGED
|
@@ -385,17 +385,25 @@ def vevo_style(content_wav, style_wav):
|
|
| 385 |
else:
|
| 386 |
raise ValueError("Invalid content audio format")
|
| 387 |
|
| 388 |
-
if isinstance(style_wav,
|
| 389 |
-
|
| 390 |
-
if isinstance(style_wav[0], np.ndarray):
|
| 391 |
-
style_data, style_sr = style_wav
|
| 392 |
-
else:
|
| 393 |
-
style_sr, style_data = style_wav
|
| 394 |
-
style_tensor = torch.FloatTensor(style_data)
|
| 395 |
-
if style_tensor.ndim == 1:
|
| 396 |
-
style_tensor = style_tensor.unsqueeze(0) # 添加通道维度
|
| 397 |
else:
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
|
| 400 |
# 打印debug信息
|
| 401 |
print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")
|
|
|
|
| 385 |
else:
|
| 386 |
raise ValueError("Invalid content audio format")
|
| 387 |
|
| 388 |
+
if isinstance(style_wav[0], np.ndarray):
|
| 389 |
+
style_data, style_sr = style_wav
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
else:
|
| 391 |
+
style_sr, style_data = style_wav
|
| 392 |
+
|
| 393 |
+
# 确保是单声道
|
| 394 |
+
if len(style_data.shape) > 1 and style_data.shape[1] > 1:
|
| 395 |
+
style_data = np.mean(style_data, axis=1)
|
| 396 |
+
|
| 397 |
+
# 重采样到24kHz
|
| 398 |
+
if style_sr != 24000:
|
| 399 |
+
style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
|
| 400 |
+
style_tensor = torchaudio.functional.resample(style_tensor, style_sr, 24000)
|
| 401 |
+
style_sr = 24000
|
| 402 |
+
else:
|
| 403 |
+
style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
|
| 404 |
+
|
| 405 |
+
# 归一化音量
|
| 406 |
+
style_tensor = style_tensor / (torch.max(torch.abs(style_tensor)) + 1e-6) * 0.95
|
| 407 |
|
| 408 |
# 打印debug信息
|
| 409 |
print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")
|