Spaces:

Gregniuki
/

f5-tts_Polish_English_German

Running on Zero

Gregniuki commited on Nov 27, 2024

Commit

cc95ac6

verified ·

1 Parent(s): 2a5272b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -60,7 +60,7 @@ print(f"Using device: {device}, dtype: {dtype}")
 pipe = pipeline(
     "automatic-speech-recognition",
-    model="Aspik101/whisper-small-pl",
     torch_dtype=torch.float16,
     device=device,
 )
@@ -110,8 +110,8 @@ def load_custom(ckpt_path: str, vocab_path="", model_cfg=None):
     return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
-F2TTS_ema_model3 = load_f5tts()
-E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
 custom_ema_model, pre_custom_path = None, ""
 chat_model_state = None
@@ -212,7 +212,7 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
         gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
     # Calculate duration based on the lengths of ref_text and gen_text
-        duration = min(2000, max(270, int(0.75 * (ref_audio_len + ref_audio_len / ref_text_len * gen_text_len / speed))))
     # Print the calculated duration
         print(f"Duration: {duration} seconds")

 pipe = pipeline(
     "automatic-speech-recognition",
+    model="openai/whisper-large-v3-turbo",
     torch_dtype=torch.float16,
     device=device,
 )
     return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
+#F2TTS_ema_model3 = load_f5tts()
+#E2TTS_ema_model4 = load_e2tts() if USING_SPACES else None
 custom_ema_model, pre_custom_path = None, ""
 chat_model_state = None
         gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
     # Calculate duration based on the lengths of ref_text and gen_text
+        duration = min(2000, max(270, int( (ref_audio_len + (ref_audio_len / ref_text_len * gen_text_len / speed))))
     # Print the calculated duration
         print(f"Duration: {duration} seconds")