text-language-detection

Running

alexneakameni commited on Feb 13

Commit

da2ee46

verified ·

1 Parent(s): da91144

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# Load model and tokenizer
+model_name = "alexneakameni/language_detection"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Get label mapping
+id2label = model.config.id2label
+def predict_language(text, top_k=5):
+    """Predicts the top-k languages for the given text."""
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512).to(device)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    probs = torch.nn.functional.softmax(logits, dim=-1).squeeze()
+    top_probs, top_indices = torch.topk(probs, top_k)
+    results = [f"{id2label[str(idx.item())]}: {prob:.4f}" for prob, idx in zip(top_probs, top_indices)]
+    return "\n".join(results)
+# Create Gradio interface
+demo = gr.Interface(
+    fn=predict_language,
+    inputs=[
+        gr.Textbox(label="Enter text", placeholder="Type a sentence here..."),
+        gr.Slider(1, 10, value=5, step=1, label="Top-k Languages")
+    ],
+    outputs=gr.Textbox(label="Predicted Languages"),
+    title="🌍 Language Detection",
+    description="Detects the language of a given text using a fine-tuned BERT model. Returns the top-k most probable languages."
+)
+demo.launch()