Spaces:

jeevitha-app
/

Sentiment_analyzer

Sleeping

App Files Files Community

jeevitha-app commited on 23 days ago

Commit

f585727

verified ·

1 Parent(s): 9e57519

Create app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import joblib
+import re
+import numpy as np
+# ==========================================================
+# 🔹 1️⃣ Load Models and Vectorizers
+# ==========================================================
+english_model = joblib.load("logistic_regression_english.pkl")
+english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl")
+persian_model = joblib.load("logistic_regression_persian.pkl")
+persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl")
+# Label mapping from training
+label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
+# ==========================================================
+# 🔹 2️⃣ Preprocessing (must match training exactly)
+# ==========================================================
+def clean_english_text(text):
+    text = text.lower()
+    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
+    text = re.sub(r"[^a-zA-Z\s]", "", text)
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+def clean_persian_text(text):
+    text = re.sub(r"[^\u0600-\u06FF\s]", "", text)  # keep only Persian chars
+    text = re.sub(r"\s+", " ", text).strip()
+    return text
+# ==========================================================
+# 🔹 3️⃣ Prediction Function
+# ==========================================================
+def predict_sentiment(text, language):
+    if not text.strip():
+        return "⚠ Please enter some text."
+    if language == "English":
+        cleaned = clean_english_text(text)
+        vec = english_vectorizer.transform([cleaned])
+        probs = english_model.predict_proba(vec)[0]
+        pred = np.argmax(probs)
+        return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
+    elif language == "Persian":
+        cleaned = clean_persian_text(text)
+        vec = persian_vectorizer.transform([cleaned])
+        probs = persian_model.predict_proba(vec)[0]
+        pred = np.argmax(probs)
+        return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
+    else:
+        return "❌ Invalid language option selected."
+# ==========================================================
+# 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size)
+# ==========================================================
+print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}")
+print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}")
+# ==========================================================
+# 🔹 5️⃣ Gradio Interface
+# ==========================================================
+iface = gr.Interface(
+    fn=predict_sentiment,
+    inputs=[
+        gr.Textbox(lines=3, label="Enter Text"),
+        gr.Radio(["English", "Persian"], label="Select Language", value="English")
+    ],
+    outputs=gr.Textbox(label="Predicted Sentiment"),
+    title="🌍 Multilingual Sentiment Classifier (English & Persian)",
+    description="Choose your language and get sentiment prediction with confidence score.",
+    examples=[
+        ["This movie was amazing!", "English"],
+        ["The worst experience ever", "English"],
+        ["این فیلم خیلی بد بود", "Persian"],
+        ["من این محصول را دوست دارم", "Persian"]
+    ]
+)
+iface.launch()