Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,8 +60,8 @@ print(f"Using device: {device}, dtype: {dtype}")
|
|
| 60 |
|
| 61 |
pipe = pipeline(
|
| 62 |
"automatic-speech-recognition",
|
| 63 |
-
model="openai/whisper-
|
| 64 |
-
torch_dtype=torch.
|
| 65 |
device=device,
|
| 66 |
)
|
| 67 |
#vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
|
@@ -332,7 +332,13 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
| 332 |
|
| 333 |
if not ref_text.strip():
|
| 334 |
gr.Info("No reference text provided, transcribing reference audio...")
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
gr.Info("Finished transcription")
|
| 337 |
else:
|
| 338 |
gr.Info("Using custom reference text...")
|
|
|
|
| 60 |
|
| 61 |
pipe = pipeline(
|
| 62 |
"automatic-speech-recognition",
|
| 63 |
+
model="openai/whisper-medium-v3-turbo",
|
| 64 |
+
torch_dtype=torch.float16,
|
| 65 |
device=device,
|
| 66 |
)
|
| 67 |
#vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
|
|
|
| 332 |
|
| 333 |
if not ref_text.strip():
|
| 334 |
gr.Info("No reference text provided, transcribing reference audio...")
|
| 335 |
+
ref_text = outputs = pipe(
|
| 336 |
+
ref_audio,
|
| 337 |
+
chunk_length_s=30,
|
| 338 |
+
batch_size=128,
|
| 339 |
+
generate_kwargs={"task": "transcribe"},
|
| 340 |
+
return_timestamps=False,
|
| 341 |
+
)['text'].strip()
|
| 342 |
gr.Info("Finished transcription")
|
| 343 |
else:
|
| 344 |
gr.Info("Using custom reference text...")
|