Spaces:
Build error
Build error
Commit
·
27c18ec
1
Parent(s):
f5b3bb8
add whisper
Browse files
app.py
CHANGED
|
@@ -19,7 +19,6 @@
|
|
| 19 |
# References:
|
| 20 |
# https://gradio.app/docs/#dropdown
|
| 21 |
|
| 22 |
-
import base64
|
| 23 |
import logging
|
| 24 |
import os
|
| 25 |
import tempfile
|
|
@@ -47,11 +46,6 @@ def convert_to_wav(in_filename: str) -> str:
|
|
| 47 |
f"ffmpeg -hide_banner -loglevel error -i '{in_filename}' -ar 16000 '{out_filename}.flac'"
|
| 48 |
)
|
| 49 |
|
| 50 |
-
with open(out_filename + ".flac", "rb") as f:
|
| 51 |
-
s = "\n" + out_filename + "\n"
|
| 52 |
-
s += base64.b64encode(f.read()).decode()
|
| 53 |
-
logging.info(s)
|
| 54 |
-
|
| 55 |
return out_filename
|
| 56 |
|
| 57 |
|
|
|
|
| 19 |
# References:
|
| 20 |
# https://gradio.app/docs/#dropdown
|
| 21 |
|
|
|
|
| 22 |
import logging
|
| 23 |
import os
|
| 24 |
import tempfile
|
|
|
|
| 46 |
f"ffmpeg -hide_banner -loglevel error -i '{in_filename}' -ar 16000 '{out_filename}.flac'"
|
| 47 |
)
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
return out_filename
|
| 50 |
|
| 51 |
|
model.py
CHANGED
|
@@ -269,6 +269,39 @@ def _get_aishell2_pretrained_model(
|
|
| 269 |
return recognizer
|
| 270 |
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
@lru_cache(maxsize=10)
|
| 273 |
def _get_gigaspeech_pre_trained_model(
|
| 274 |
repo_id: str,
|
|
@@ -839,6 +872,10 @@ chinese_models = {
|
|
| 839 |
}
|
| 840 |
|
| 841 |
english_models = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 842 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
| 843 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
| 844 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|
|
|
|
| 269 |
return recognizer
|
| 270 |
|
| 271 |
|
| 272 |
+
@lru_cache(maxsize=10)
|
| 273 |
+
def _get_gigaspeech_pre_trained_model(
|
| 274 |
+
repo_id: str, decoding_method: str, num_active_paths: int
|
| 275 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
| 276 |
+
assert repo_id in ("tiny.en", "base.en", "small.en", "medium.en"), repo_id
|
| 277 |
+
name = repo_id
|
| 278 |
+
full_repo_id = "csukuangfj/sherpa-onnx-whisper-" + name
|
| 279 |
+
encoder = _get_nn_model_filename(
|
| 280 |
+
repo_id=full_repo_id,
|
| 281 |
+
filename=f"{name}-encoder.int8.ort",
|
| 282 |
+
subfolder=".",
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
decoder = _get_nn_model_filename(
|
| 286 |
+
repo_id=full_repo_id,
|
| 287 |
+
filename=f"{name}-decoder.int8.ort",
|
| 288 |
+
subfolder=".",
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
tokens = _get_token_filename(
|
| 292 |
+
repo_id=full_repo_id, subfolder=".", filename=f"{name}-tokens.txt"
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_whisper(
|
| 296 |
+
encoder=encoder,
|
| 297 |
+
decoder=decoder,
|
| 298 |
+
tokens=tokens,
|
| 299 |
+
num_threads=2,
|
| 300 |
+
)
|
| 301 |
+
|
| 302 |
+
return recognizer
|
| 303 |
+
|
| 304 |
+
|
| 305 |
@lru_cache(maxsize=10)
|
| 306 |
def _get_gigaspeech_pre_trained_model(
|
| 307 |
repo_id: str,
|
|
|
|
| 872 |
}
|
| 873 |
|
| 874 |
english_models = {
|
| 875 |
+
"whisper-tiny.en": _get_whisper_model,
|
| 876 |
+
"whisper-base.en": _get_whisper_model,
|
| 877 |
+
"whisper-small.en": _get_whisper_model,
|
| 878 |
+
"whisper-medium.en": _get_whisper_model,
|
| 879 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
| 880 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
| 881 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|