Spaces:
Build error
Build error
Commit
·
e6d227e
1
Parent(s):
0ae65b0
add a french model
Browse files
examples.py
CHANGED
|
@@ -65,6 +65,13 @@ examples = [
|
|
| 65 |
4,
|
| 66 |
"./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
|
| 67 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
[
|
| 69 |
"Chinese",
|
| 70 |
"desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
|
|
@@ -316,4 +323,18 @@ examples = [
|
|
| 316 |
4,
|
| 317 |
"./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
|
| 318 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
]
|
|
|
|
| 65 |
4,
|
| 66 |
"./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
|
| 67 |
],
|
| 68 |
+
[
|
| 69 |
+
"French",
|
| 70 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
| 71 |
+
"greedy_search",
|
| 72 |
+
4,
|
| 73 |
+
"./test_wavs/french/common_voice_fr_19364697.wav",
|
| 74 |
+
],
|
| 75 |
[
|
| 76 |
"Chinese",
|
| 77 |
"desh2608/icefall-asr-alimeeting-pruned-transducer-stateless7",
|
|
|
|
| 323 |
4,
|
| 324 |
"./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
|
| 325 |
],
|
| 326 |
+
[
|
| 327 |
+
"French",
|
| 328 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
| 329 |
+
"greedy_search",
|
| 330 |
+
4,
|
| 331 |
+
"./test_wavs/french/common_voice_fr_19738183.wav",
|
| 332 |
+
],
|
| 333 |
+
[
|
| 334 |
+
"French",
|
| 335 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
| 336 |
+
"greedy_search",
|
| 337 |
+
4,
|
| 338 |
+
"./test_wavs/french/common_voice_fr_27024649.wav",
|
| 339 |
+
],
|
| 340 |
]
|
model.py
CHANGED
|
@@ -111,8 +111,31 @@ def decode_offline_recognizer_sherpa_onnx(
|
|
| 111 |
return s.result.text.lower()
|
| 112 |
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
def decode(
|
| 115 |
-
recognizer: Union[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
filename: str,
|
| 117 |
) -> str:
|
| 118 |
if isinstance(recognizer, sherpa.OfflineRecognizer):
|
|
@@ -121,6 +144,8 @@ def decode(
|
|
| 121 |
return decode_online_recognizer(recognizer, filename)
|
| 122 |
elif isinstance(recognizer, sherpa_onnx.OfflineRecognizer):
|
| 123 |
return decode_offline_recognizer_sherpa_onnx(recognizer, filename)
|
|
|
|
|
|
|
| 124 |
else:
|
| 125 |
raise ValueError(f"Unknown recognizer type {type(recognizer)}")
|
| 126 |
|
|
@@ -155,6 +180,10 @@ def get_pretrained_model(
|
|
| 155 |
return german_models[repo_id](
|
| 156 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
| 157 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
elif repo_id in japanese_models:
|
| 159 |
return japanese_models[repo_id](
|
| 160 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
|
@@ -654,6 +683,51 @@ def _get_german_pre_trained_model(
|
|
| 654 |
return recognizer
|
| 655 |
|
| 656 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 657 |
@lru_cache(maxsize=10)
|
| 658 |
def _get_japanese_pre_trained_model(
|
| 659 |
repo_id: str,
|
|
@@ -778,6 +852,10 @@ german_models = {
|
|
| 778 |
"csukuangfj/wav2vec2.0-torchaudio": _get_german_pre_trained_model,
|
| 779 |
}
|
| 780 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 781 |
japanese_models = {
|
| 782 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent": _get_japanese_pre_trained_model,
|
| 783 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent": _get_japanese_pre_trained_model,
|
|
@@ -791,6 +869,7 @@ all_models = {
|
|
| 791 |
**tibetan_models,
|
| 792 |
**arabic_models,
|
| 793 |
**german_models,
|
|
|
|
| 794 |
}
|
| 795 |
|
| 796 |
language_to_models = {
|
|
@@ -801,4 +880,5 @@ language_to_models = {
|
|
| 801 |
"Tibetan": list(tibetan_models.keys()),
|
| 802 |
"Arabic": list(arabic_models.keys()),
|
| 803 |
"German": list(german_models.keys()),
|
|
|
|
| 804 |
}
|
|
|
|
| 111 |
return s.result.text.lower()
|
| 112 |
|
| 113 |
|
| 114 |
+
def decode_online_recognizer_sherpa_onnx(
|
| 115 |
+
recognizer: sherpa_onnx.OnlineRecognizer,
|
| 116 |
+
filename: str,
|
| 117 |
+
) -> str:
|
| 118 |
+
s = recognizer.create_stream()
|
| 119 |
+
samples, sample_rate = read_wave(filename)
|
| 120 |
+
s.accept_waveform(sample_rate, samples)
|
| 121 |
+
|
| 122 |
+
tail_paddings = np.zeros(int(0.3 * sample_rate), dtype=np.float32)
|
| 123 |
+
s.accept_waveform(sample_rate, tail_paddings)
|
| 124 |
+
s.input_finished()
|
| 125 |
+
|
| 126 |
+
while recognizer.is_ready(s):
|
| 127 |
+
recognizer.decode_stream(s)
|
| 128 |
+
|
| 129 |
+
return recognizer.get_result(s).lower()
|
| 130 |
+
|
| 131 |
+
|
| 132 |
def decode(
|
| 133 |
+
recognizer: Union[
|
| 134 |
+
sherpa.OfflineRecognizer,
|
| 135 |
+
sherpa.OnlineRecognizer,
|
| 136 |
+
sherpa_onnx.OfflineRecognizer,
|
| 137 |
+
sherpa_onnx.OnlineRecognizer,
|
| 138 |
+
],
|
| 139 |
filename: str,
|
| 140 |
) -> str:
|
| 141 |
if isinstance(recognizer, sherpa.OfflineRecognizer):
|
|
|
|
| 144 |
return decode_online_recognizer(recognizer, filename)
|
| 145 |
elif isinstance(recognizer, sherpa_onnx.OfflineRecognizer):
|
| 146 |
return decode_offline_recognizer_sherpa_onnx(recognizer, filename)
|
| 147 |
+
elif isinstance(recognizer, sherpa_onnx.OnlineRecognizer):
|
| 148 |
+
return decode_online_recognizer_sherpa_onnx(recognizer, filename)
|
| 149 |
else:
|
| 150 |
raise ValueError(f"Unknown recognizer type {type(recognizer)}")
|
| 151 |
|
|
|
|
| 180 |
return german_models[repo_id](
|
| 181 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
| 182 |
)
|
| 183 |
+
elif repo_id in french_models:
|
| 184 |
+
return french_models[repo_id](
|
| 185 |
+
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
| 186 |
+
)
|
| 187 |
elif repo_id in japanese_models:
|
| 188 |
return japanese_models[repo_id](
|
| 189 |
repo_id, decoding_method=decoding_method, num_active_paths=num_active_paths
|
|
|
|
| 683 |
return recognizer
|
| 684 |
|
| 685 |
|
| 686 |
+
@lru_cache(maxsize=10)
|
| 687 |
+
def _get_french_pre_trained_model(
|
| 688 |
+
repo_id: str,
|
| 689 |
+
decoding_method: str,
|
| 690 |
+
num_active_paths: int,
|
| 691 |
+
):
|
| 692 |
+
assert repo_id in [
|
| 693 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14",
|
| 694 |
+
], repo_id
|
| 695 |
+
|
| 696 |
+
encoder_model = _get_nn_model_filename(
|
| 697 |
+
repo_id=repo_id,
|
| 698 |
+
filename="encoder-epoch-29-avg-9-with-averaged-model.onnx",
|
| 699 |
+
subfolder=".",
|
| 700 |
+
)
|
| 701 |
+
|
| 702 |
+
decoder_model = _get_nn_model_filename(
|
| 703 |
+
repo_id=repo_id,
|
| 704 |
+
filename="decoder-epoch-29-avg-9-with-averaged-model.onnx",
|
| 705 |
+
subfolder=".",
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
+
joiner_model = _get_nn_model_filename(
|
| 709 |
+
repo_id=repo_id,
|
| 710 |
+
filename="joiner-epoch-29-avg-9-with-averaged-model.onnx",
|
| 711 |
+
subfolder=".",
|
| 712 |
+
)
|
| 713 |
+
|
| 714 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
| 715 |
+
|
| 716 |
+
recognizer = sherpa_onnx.OnlineRecognizer(
|
| 717 |
+
tokens=tokens,
|
| 718 |
+
encoder=encoder_model,
|
| 719 |
+
decoder=decoder_model,
|
| 720 |
+
joiner=joiner_model,
|
| 721 |
+
num_threads=1,
|
| 722 |
+
sample_rate=16000,
|
| 723 |
+
feature_dim=80,
|
| 724 |
+
decoding_method=decoding_method,
|
| 725 |
+
max_active_paths=num_active_paths,
|
| 726 |
+
)
|
| 727 |
+
|
| 728 |
+
return recognizer
|
| 729 |
+
|
| 730 |
+
|
| 731 |
@lru_cache(maxsize=10)
|
| 732 |
def _get_japanese_pre_trained_model(
|
| 733 |
repo_id: str,
|
|
|
|
| 852 |
"csukuangfj/wav2vec2.0-torchaudio": _get_german_pre_trained_model,
|
| 853 |
}
|
| 854 |
|
| 855 |
+
french_models = {
|
| 856 |
+
"shaojieli/sherpa-onnx-streaming-zipformer-fr-2023-04-14": _get_french_pre_trained_model,
|
| 857 |
+
}
|
| 858 |
+
|
| 859 |
japanese_models = {
|
| 860 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent": _get_japanese_pre_trained_model,
|
| 861 |
"TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent": _get_japanese_pre_trained_model,
|
|
|
|
| 869 |
**tibetan_models,
|
| 870 |
**arabic_models,
|
| 871 |
**german_models,
|
| 872 |
+
**french_models,
|
| 873 |
}
|
| 874 |
|
| 875 |
language_to_models = {
|
|
|
|
| 880 |
"Tibetan": list(tibetan_models.keys()),
|
| 881 |
"Arabic": list(arabic_models.keys()),
|
| 882 |
"German": list(german_models.keys()),
|
| 883 |
+
"French": list(french_models.keys()),
|
| 884 |
}
|
test_wavs/french/common_voice_fr_19364697.wav
ADDED
|
Binary file (228 kB). View file
|
|
|
test_wavs/french/common_voice_fr_19738183.wav
ADDED
|
Binary file (122 kB). View file
|
|
|
test_wavs/french/common_voice_fr_27024649.wav
ADDED
|
Binary file (203 kB). View file
|
|
|
test_wavs/french/trans.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
common_voice_fr_19738183 CE DERNIER A ÉVOLUÉ TOUT AU LONG DE L'HISTOIRE ROMAINE
|
| 2 |
+
common_voice_fr_27024649 SON ACTIONNAIRE MAJORITAIRE EST LE CONSEIL TERRITORIAL DE SAINT PIERRE ET MIQUELON
|
| 3 |
+
common_voice_fr_19364697 CE SITE CONTIENT QUATRE TOMBEAUX DE LA DYNASTIE ACHÉMÉNIDE ET SEPT DES SASSANIDES
|