jeevitha-app commited on
Commit
f585727
·
verified ·
1 Parent(s): 9e57519

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import re
4
+ import numpy as np
5
+
6
+ # ==========================================================
7
+ # 🔹 1️⃣ Load Models and Vectorizers
8
+ # ==========================================================
9
+ english_model = joblib.load("logistic_regression_english.pkl")
10
+ english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl")
11
+
12
+ persian_model = joblib.load("logistic_regression_persian.pkl")
13
+ persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl")
14
+
15
+ # Label mapping from training
16
+ label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
17
+
18
+ # ==========================================================
19
+ # 🔹 2️⃣ Preprocessing (must match training exactly)
20
+ # ==========================================================
21
+ def clean_english_text(text):
22
+ text = text.lower()
23
+ text = re.sub(r"http\S+|www\S+|https\S+", "", text)
24
+ text = re.sub(r"[^a-zA-Z\s]", "", text)
25
+ text = re.sub(r"\s+", " ", text).strip()
26
+ return text
27
+
28
+ def clean_persian_text(text):
29
+ text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # keep only Persian chars
30
+ text = re.sub(r"\s+", " ", text).strip()
31
+ return text
32
+
33
+ # ==========================================================
34
+ # 🔹 3️⃣ Prediction Function
35
+ # ==========================================================
36
+ def predict_sentiment(text, language):
37
+ if not text.strip():
38
+ return "⚠ Please enter some text."
39
+
40
+ if language == "English":
41
+ cleaned = clean_english_text(text)
42
+ vec = english_vectorizer.transform([cleaned])
43
+ probs = english_model.predict_proba(vec)[0]
44
+ pred = np.argmax(probs)
45
+ return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
46
+
47
+ elif language == "Persian":
48
+ cleaned = clean_persian_text(text)
49
+ vec = persian_vectorizer.transform([cleaned])
50
+ probs = persian_model.predict_proba(vec)[0]
51
+ pred = np.argmax(probs)
52
+ return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"
53
+
54
+ else:
55
+ return "❌ Invalid language option selected."
56
+
57
+ # ==========================================================
58
+ # 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size)
59
+ # ==========================================================
60
+ print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}")
61
+ print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}")
62
+
63
+ # ==========================================================
64
+ # 🔹 5️⃣ Gradio Interface
65
+ # ==========================================================
66
+ iface = gr.Interface(
67
+ fn=predict_sentiment,
68
+ inputs=[
69
+ gr.Textbox(lines=3, label="Enter Text"),
70
+ gr.Radio(["English", "Persian"], label="Select Language", value="English")
71
+ ],
72
+ outputs=gr.Textbox(label="Predicted Sentiment"),
73
+ title="🌍 Multilingual Sentiment Classifier (English & Persian)",
74
+ description="Choose your language and get sentiment prediction with confidence score.",
75
+ examples=[
76
+ ["This movie was amazing!", "English"],
77
+ ["The worst experience ever", "English"],
78
+ ["این فیلم خیلی بد بود", "Persian"],
79
+ ["من این محصول را دوست دارم", "Persian"]
80
+ ]
81
+ )
82
+
83
+ iface.launch()