jeevitha-app commited on
Commit
eced96d
·
verified ·
1 Parent(s): a8b0020

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import os
4
+ import re
5
+ import emoji
6
+ import demoji
7
+ import numpy as np
8
+
9
+ # ==========================================================
10
+ # 📦 Load all models
11
+ # ==========================================================
12
+ vectorizer_en = joblib.load("tfidf_vectorizer_en.pkl")
13
+ le_en = joblib.load("label_encoder_en.pkl")
14
+ stacking_en = joblib.load("stacking_en.pkl")
15
+
16
+ vectorizer_fa = joblib.load("tfidf_vectorizer_fa.pkl")
17
+ le_fa = joblib.load("label_encoder_fa.pkl")
18
+ stacking_fa = joblib.load("stacking_fa.pkl")
19
+
20
+ # ==========================================================
21
+ # 🧹 Text cleaning functions
22
+ # ==========================================================
23
+ import nltk
24
+ from nltk.corpus import stopwords
25
+ from nltk.tokenize import word_tokenize
26
+ from nltk.stem import WordNetLemmatizer
27
+ from hazm import Normalizer, Lemmatizer as HazmLemmatizer, word_tokenize as hazm_tokenize
28
+ nltk.download("punkt")
29
+ nltk.download("stopwords")
30
+ nltk.download("wordnet")
31
+
32
+ # English preprocess
33
+ lemmatizer = WordNetLemmatizer()
34
+ STOPWORDS = set(stopwords.words("english"))
35
+ RE_URL = re.compile(r"http\S+|www\.\S+")
36
+ RE_HTML = re.compile(r"<.*?>")
37
+ RE_NONALPHA = re.compile(r"[^a-zA-Z\s]")
38
+
39
+ def preprocess_english(text):
40
+ text = str(text).lower()
41
+ text = emoji.demojize(text)
42
+ text = demoji.replace(text, "")
43
+ text = RE_URL.sub(" ", text)
44
+ text = RE_HTML.sub(" ", text)
45
+ text = RE_NONALPHA.sub(" ", text)
46
+ text = re.sub(r"\s+", " ", text).strip()
47
+ tokens = word_tokenize(text)
48
+ tokens = [lemmatizer.lemmatize(t) for t in tokens if t not in STOPWORDS and len(t) > 2]
49
+ return " ".join(tokens)
50
+
51
+ # Persian preprocess
52
+ normalizer = Normalizer()
53
+ hazm_lemmatizer = HazmLemmatizer()
54
+ RE_URL_FA = re.compile(r"http\S+|www\.\S+")
55
+ RE_NONPERSIAN = re.compile(r"[^\u0600-\u06FFA-Za-z\s]")
56
+
57
+ def preprocess_persian(text):
58
+ text = str(text)
59
+ text = normalizer.normalize(text)
60
+ text = emoji.demojize(text)
61
+ text = demoji.replace(text, "")
62
+ text = RE_URL_FA.sub(" ", text)
63
+ text = re.sub(r"@\w+|#\w+|\d+", " ", text)
64
+ text = RE_NONPERSIAN.sub(" ", text)
65
+ text = re.sub(r"\s+", " ", text).strip()
66
+ tokens = hazm_tokenize(text)
67
+ tokens = [hazm_lemmatizer.lemmatize(t) for t in tokens if len(t) > 1]
68
+ return " ".join(tokens)
69
+
70
+ # ==========================================================
71
+ # 🔮 Prediction function
72
+ # ==========================================================
73
+ def predict_sentiment(comment, language):
74
+ if language == "English":
75
+ clean_text = preprocess_english(comment)
76
+ X = vectorizer_en.transform([clean_text])
77
+ pred = stacking_en.predict(X)[0]
78
+ probs = stacking_en.predict_proba(X)[0]
79
+ classes = le_en.classes_
80
+ else:
81
+ clean_text = preprocess_persian(comment)
82
+ X = vectorizer_fa.transform([clean_text])
83
+ pred = stacking_fa.predict(X)[0]
84
+ probs = stacking_fa.predict_proba(X)[0]
85
+ classes = le_fa.classes_
86
+
87
+ result_str = f"🔹 **Predicted Sentiment:** {pred}\n\n"
88
+ prob_table = "\n".join([f"{cls}: {round(p,3)}" for cls, p in zip(classes, probs)])
89
+ return f"🗣️ **Input:** {comment}\n\n{result_str}**Prediction Probabilities:**\n{prob_table}"
90
+
91
+ # ==========================================================
92
+ # 🎨 Gradio UI
93
+ # ==========================================================
94
+ lang_dropdown = gr.Dropdown(["English", "Persian"], label="Select Language", value="English")
95
+ input_box = gr.Textbox(label="Enter your comment here")
96
+ output_box = gr.Markdown()
97
+
98
+ iface = gr.Interface(
99
+ fn=predict_sentiment,
100
+ inputs=[input_box, lang_dropdown],
101
+ outputs=output_box,
102
+ title="🌍 Multilingual Sentiment Analyzer (English + Persian)",
103
+ description="Enter a comment in English or Persian to see the predicted sentiment and probabilities.",
104
+ examples=[
105
+ ["I loved the show! It was amazing!", "English"],
106
+ ["برنامه خیلی عالی بود و مجری هم خوب بود", "Persian"],
107
+ ["It was an average episode, not too bad.", "English"],
108
+ ]
109
+ )
110
+
111
+ if __name__ == "__main__":
112
+ iface.launch()