Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| class LanguageDetector: | |
| def __init__(self): | |
| # Download the model file | |
| #model_path = hf_hub_download("facebook/fasttext-language-identification", "model.bin") | |
| # Load the FastText model | |
| #self.model = fasttext.load_model(model_path) | |
| self.tokenizer = AutoTokenizer.from_pretrained("papluca/xlm-roberta-base-language-detection") | |
| self.model = AutoModelForSequenceClassification.from_pretrained("papluca/xlm-roberta-base-language-detection") | |
| # Function to predict the language of a text | |
| def predict_language(self, text): | |
| # Tokenize the input text | |
| inputs = self.tokenizer(text, return_tensors="pt") | |
| # Get the model's predictions | |
| outputs = self.model(**inputs) | |
| # Find the index of the highest score | |
| prediction_idx = outputs.logits.argmax(dim=-1).item() | |
| # Convert the index to the corresponding language code using the model's config.id2label | |
| language_code = self.model.config.id2label[prediction_idx] | |
| return language_code | |