Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -137,11 +137,12 @@ E2TTS_ema_model2 = load_custom(
|
|
| 137 |
)
|
| 138 |
|
| 139 |
|
| 140 |
-
|
|
|
|
| 141 |
"""
|
| 142 |
Splits the input text into chunks, each with a maximum number of characters.
|
| 143 |
-
If a chunk exceeds the character limit
|
| 144 |
-
|
| 145 |
|
| 146 |
Args:
|
| 147 |
text (str): The text to be split.
|
|
@@ -153,28 +154,19 @@ def chunk_text(text, max_chars=100):
|
|
| 153 |
chunks = []
|
| 154 |
current_chunk = ""
|
| 155 |
|
| 156 |
-
# Split the text into sentences
|
| 157 |
-
|
| 158 |
|
| 159 |
-
for
|
| 160 |
-
# Check if adding this
|
| 161 |
-
if len(current_chunk) + len(
|
| 162 |
-
current_chunk +=
|
| 163 |
else:
|
| 164 |
-
# If the chunk exceeds max_chars
|
| 165 |
if current_chunk:
|
| 166 |
-
#
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
chunks.append(current_chunk[:split_index].strip())
|
| 170 |
-
current_chunk = current_chunk[split_index:].strip() + sentence
|
| 171 |
-
else:
|
| 172 |
-
# If no space is found (unusual case), append the chunk as is
|
| 173 |
-
chunks.append(current_chunk.strip())
|
| 174 |
-
else:
|
| 175 |
-
# If no chunk is being built, just append the sentence
|
| 176 |
-
current_chunk = sentence + " "
|
| 177 |
-
|
| 178 |
# Append any remaining text in current_chunk to chunks
|
| 179 |
if current_chunk:
|
| 180 |
chunks.append(current_chunk.strip())
|
|
@@ -183,6 +175,7 @@ def chunk_text(text, max_chars=100):
|
|
| 183 |
|
| 184 |
|
| 185 |
|
|
|
|
| 186 |
@gpu_decorator
|
| 187 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
|
| 188 |
if exp_name == "English":
|
|
@@ -383,7 +376,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
| 383 |
# Use the new chunk_text function to split gen_text
|
| 384 |
max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
|
| 385 |
print(f"max chars: {max_chars} ")
|
| 386 |
-
gen_text_batches = chunk_text(gen_text, max_chars=
|
| 387 |
print('ref_text', ref_text)
|
| 388 |
for i, batch_text in enumerate(gen_text_batches):
|
| 389 |
print(f'gen_text {i}', batch_text)
|
|
|
|
| 137 |
)
|
| 138 |
|
| 139 |
|
| 140 |
+
|
| 141 |
+
def chunk_text(text, max_chars=110):
|
| 142 |
"""
|
| 143 |
Splits the input text into chunks, each with a maximum number of characters.
|
| 144 |
+
If a chunk exceeds the character limit and there is no punctuation at the end,
|
| 145 |
+
it will split at the last space.
|
| 146 |
|
| 147 |
Args:
|
| 148 |
text (str): The text to be split.
|
|
|
|
| 154 |
chunks = []
|
| 155 |
current_chunk = ""
|
| 156 |
|
| 157 |
+
# Split the text into words (instead of sentences) to handle cases with no punctuation.
|
| 158 |
+
words = text.split(" ")
|
| 159 |
|
| 160 |
+
for word in words:
|
| 161 |
+
# Check if adding this word exceeds the max_chars limit
|
| 162 |
+
if len(current_chunk) + len(word) + 1 <= max_chars: # +1 for the space
|
| 163 |
+
current_chunk += word + " "
|
| 164 |
else:
|
| 165 |
+
# If the chunk exceeds max_chars, split at the last space
|
| 166 |
if current_chunk:
|
| 167 |
+
chunks.append(current_chunk.strip()) # Add the chunk
|
| 168 |
+
current_chunk = word + " " # Start a new chunk with the current word
|
| 169 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
# Append any remaining text in current_chunk to chunks
|
| 171 |
if current_chunk:
|
| 172 |
chunks.append(current_chunk.strip())
|
|
|
|
| 175 |
|
| 176 |
|
| 177 |
|
| 178 |
+
|
| 179 |
@gpu_decorator
|
| 180 |
def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence, cross_fade_duration=0.15, progress=gr.Progress()):
|
| 181 |
if exp_name == "English":
|
|
|
|
| 376 |
# Use the new chunk_text function to split gen_text
|
| 377 |
max_chars = int(0.2 * (len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr)))
|
| 378 |
print(f"max chars: {max_chars} ")
|
| 379 |
+
gen_text_batches = chunk_text(gen_text, max_chars=110)
|
| 380 |
print('ref_text', ref_text)
|
| 381 |
for i, batch_text in enumerate(gen_text_batches):
|
| 382 |
print(f'gen_text {i}', batch_text)
|