jeevitha-app's picture
Create app.py
f585727 verified
raw
history blame
3.29 kB
import gradio as gr
import joblib
import re
import numpy as np
# ==========================================================
# 🔹 1️⃣ Load Models and Vectorizers
# ==========================================================
english_model = joblib.load("logistic_regression_english.pkl")
english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl")
persian_model = joblib.load("logistic_regression_persian.pkl")
persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl")
# Label mapping from training
label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
# ==========================================================
# 🔹 2️⃣ Preprocessing (must match training exactly)
# ==========================================================
def clean_english_text(text):
text = text.lower()
text = re.sub(r"http\S+|www\S+|https\S+", "", text)
text = re.sub(r"[^a-zA-Z\s]", "", text)
text = re.sub(r"\s+", " ", text).strip()
return text
def clean_persian_text(text):
text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # keep only Persian chars
text = re.sub(r"\s+", " ", text).strip()
return text
# ==========================================================
# 🔹 3️⃣ Prediction Function
# ==========================================================
def predict_sentiment(text, language):
if not text.strip():
return "⚠ Please enter some text."
if language == "English":
cleaned = clean_english_text(text)
vec = english_vectorizer.transform([cleaned])
probs = english_model.predict_proba(vec)[0]
pred = np.argmax(probs)
return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
elif language == "Persian":
cleaned = clean_persian_text(text)
vec = persian_vectorizer.transform([cleaned])
probs = persian_model.predict_proba(vec)[0]
pred = np.argmax(probs)
return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
else:
return "❌ Invalid language option selected."
# ==========================================================
# 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size)
# ==========================================================
print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}")
print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}")
# ==========================================================
# 🔹 5️⃣ Gradio Interface
# ==========================================================
iface = gr.Interface(
fn=predict_sentiment,
inputs=[
gr.Textbox(lines=3, label="Enter Text"),
gr.Radio(["English", "Persian"], label="Select Language", value="English")
],
outputs=gr.Textbox(label="Predicted Sentiment"),
title="🌍 Multilingual Sentiment Classifier (English & Persian)",
description="Choose your language and get sentiment prediction with confidence score.",
examples=[
["This movie was amazing!", "English"],
["The worst experience ever", "English"],
["این فیلم خیلی بد بود", "Persian"],
["من این محصول را دوست دارم", "Persian"]
]
)
iface.launch()