Spaces:
Runtime error
Runtime error
Try using a KenLM model trie binary version, if not found try using the hash table binary version
Browse files
perplexity_lenses/perplexity.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
| 2 |
import re
|
| 3 |
import unicodedata
|
| 4 |
from typing import Dict
|
|
|
|
| 5 |
|
| 6 |
import kenlm
|
| 7 |
import sentencepiece
|
|
@@ -178,10 +179,16 @@ class KenlmModel:
|
|
| 178 |
return self.non_printing_chars_re.sub("", text)
|
| 179 |
|
| 180 |
def download_kenlm_model(self, model_dataset: str, language: str):
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
sentence_piece_model_url = hf_hub_url(
|
| 186 |
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.sp.model"
|
| 187 |
)
|
|
|
|
| 2 |
import re
|
| 3 |
import unicodedata
|
| 4 |
from typing import Dict
|
| 5 |
+
from requests.exceptions import HTTPError
|
| 6 |
|
| 7 |
import kenlm
|
| 8 |
import sentencepiece
|
|
|
|
| 179 |
return self.non_printing_chars_re.sub("", text)
|
| 180 |
|
| 181 |
def download_kenlm_model(self, model_dataset: str, language: str):
|
| 182 |
+
try:
|
| 183 |
+
kenlm_model_url = hf_hub_url(
|
| 184 |
+
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.trie.bin"
|
| 185 |
+
)
|
| 186 |
+
self.kenlm_model_dir = cached_download(kenlm_model_url)
|
| 187 |
+
except HTTPError:
|
| 188 |
+
kenlm_model_url = hf_hub_url(
|
| 189 |
+
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.arpa.bin"
|
| 190 |
+
)
|
| 191 |
+
self.kenlm_model_dir = cached_download(kenlm_model_url)
|
| 192 |
sentence_piece_model_url = hf_hub_url(
|
| 193 |
KENLM_MODEL_REPO, filename=f"{model_dataset}/{language}.sp.model"
|
| 194 |
)
|