Spaces:

NeerAbhy
/

Text_analyzer

Sleeping

NeerAbhy commited on Jul 7, 2024

Commit

850bf95

verified ·

1 Parent(s): 340c448

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,9 @@ from huggingface_hub import hf_hub_download
 model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model.bin")
 identification_model = fasttext.load_model(model_path)
 def lang_ident(text):
-    return indetification_model.predict(text)
 pretrained_model: str = "facebook/m2m100_1.2B"
 cache_dir: str = "models/"
@@ -23,6 +25,18 @@ clasification = pipeline(
     "audio-classification",
     model="anton-l/xtreme_s_xlsr_300m_minds14",
 )
 def audio_a_text(audio):
   text = transcription(audio)["text"]

 model_path = hf_hub_download(repo_id="cis-lmu/glotlid", filename="model.bin")
 identification_model = fasttext.load_model(model_path)
 def lang_ident(text):
+    label, array = identification_model.predict(text)
+    label = get_name(label[0])
+    return {language : label[0], socore : array[0]}
 pretrained_model: str = "facebook/m2m100_1.2B"
 cache_dir: str = "models/"
     "audio-classification",
     model="anton-l/xtreme_s_xlsr_300m_minds14",
 )
+def language_names(json_path):
+    with open(json_path, 'r') as json_file:
+        data = json.load(json_file)
+    return data
+label2name = language_names("assetslanguage_names.json")
+def get_name(label):
+    """Get the name of language from label"""
+    iso_3 = label.split('_')[0]
+    name = label2name[iso_3]
+    return name
 def audio_a_text(audio):
   text = transcription(audio)["text"]