Spaces:
Running
on
T4
Running
on
T4
Update tortoise/api.py
Browse files- tortoise/api.py +9 -4
tortoise/api.py
CHANGED
|
@@ -296,11 +296,16 @@ class TextToSpeech:
|
|
| 296 |
|
| 297 |
# Perform the crossfade if there is an overlap
|
| 298 |
if wav_overlap is not None:
|
| 299 |
-
|
|
|
|
| 300 |
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
|
| 305 |
# Save the last part of this chunk for overlapping with the next chunk
|
| 306 |
wav_overlap = wav_gen[-overlap_len:]
|
|
|
|
| 296 |
|
| 297 |
# Perform the crossfade if there is an overlap
|
| 298 |
if wav_overlap is not None:
|
| 299 |
+
# Using a Hanning window for smoother transition
|
| 300 |
+
crossfade_window = torch.hann_window(overlap_len).to(wav_gen.device)
|
| 301 |
|
| 302 |
+
# Equal-power crossfade
|
| 303 |
+
crossfade_wav = torch.sqrt(crossfade_window) * wav_chunk[:overlap_len]
|
| 304 |
+
wav_overlap = torch.sqrt(1 - crossfade_window) * wav_overlap
|
| 305 |
+
|
| 306 |
+
# Overlap-Add (OLA) for merging audio chunks
|
| 307 |
+
wav_chunk[:overlap_len] = F.pad(wav_overlap, (0, wav_chunk[:overlap_len].shape[0] - wav_overlap.shape[0])) + \
|
| 308 |
+
F.pad(crossfade_wav, (0, wav_chunk[:overlap_len].shape[0] - crossfade_wav.shape[0]))
|
| 309 |
|
| 310 |
# Save the last part of this chunk for overlapping with the next chunk
|
| 311 |
wav_overlap = wav_gen[-overlap_len:]
|