Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import joblib | |
| import re | |
| import numpy as np | |
| # ========================================================== | |
| # 🔹 1️⃣ Load Models and Vectorizers | |
| # ========================================================== | |
| english_model = joblib.load("logistic_regression_english.pkl") | |
| english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl") | |
| persian_model = joblib.load("logistic_regression_persian.pkl") | |
| persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl") | |
| # Label mapping from training | |
| label_map = {0: "Negative", 1: "Neutral", 2: "Positive"} | |
| # ========================================================== | |
| # 🔹 2️⃣ Preprocessing (must match training exactly) | |
| # ========================================================== | |
| def clean_english_text(text): | |
| text = text.lower() | |
| text = re.sub(r"http\S+|www\S+|https\S+", "", text) | |
| text = re.sub(r"[^a-zA-Z\s]", "", text) | |
| text = re.sub(r"\s+", " ", text).strip() | |
| return text | |
| def clean_persian_text(text): | |
| text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # keep only Persian chars | |
| text = re.sub(r"\s+", " ", text).strip() | |
| return text | |
| # ========================================================== | |
| # 🔹 3️⃣ Prediction Function | |
| # ========================================================== | |
| def predict_sentiment(text, language): | |
| if not text.strip(): | |
| return "⚠ Please enter some text." | |
| if language == "English": | |
| cleaned = clean_english_text(text) | |
| vec = english_vectorizer.transform([cleaned]) | |
| probs = english_model.predict_proba(vec)[0] | |
| pred = np.argmax(probs) | |
| return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)" | |
| elif language == "Persian": | |
| cleaned = clean_persian_text(text) | |
| vec = persian_vectorizer.transform([cleaned]) | |
| probs = persian_model.predict_proba(vec)[0] | |
| pred = np.argmax(probs) | |
| return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)" | |
| else: | |
| return "❌ Invalid language option selected." | |
| # ========================================================== | |
| # 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size) | |
| # ========================================================== | |
| print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}") | |
| print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}") | |
| # ========================================================== | |
| # 🔹 5️⃣ Gradio Interface | |
| # ========================================================== | |
| iface = gr.Interface( | |
| fn=predict_sentiment, | |
| inputs=[ | |
| gr.Textbox(lines=3, label="Enter Text"), | |
| gr.Radio(["English", "Persian"], label="Select Language", value="English") | |
| ], | |
| outputs=gr.Textbox(label="Predicted Sentiment"), | |
| title="🌍 Multilingual Sentiment Classifier (English & Persian)", | |
| description="Choose your language and get sentiment prediction with confidence score.", | |
| examples=[ | |
| ["This movie was amazing!", "English"], | |
| ["The worst experience ever", "English"], | |
| ["این فیلم خیلی بد بود", "Persian"], | |
| ["من این محصول را دوست دارم", "Persian"] | |
| ] | |
| ) | |
| iface.launch() |