Spaces:

jeevitha-app
/

Sentiment_analyzer

Sleeping

App Files Files Community

Sentiment_analyzer / app.py

jeevitha-app

Create app.py

f585727 verified 30 days ago

raw

history blame

3.29 kB

	import gradio as gr
	import joblib
	import re
	import numpy as np

	# ==========================================================
	# 🔹 1️⃣ Load Models and Vectorizers
	# ==========================================================
	english_model = joblib.load("logistic_regression_english.pkl")
	english_vectorizer = joblib.load("tfidf_vectorizer_english.pkl")

	persian_model = joblib.load("logistic_regression_persian.pkl")
	persian_vectorizer = joblib.load("tfidf_vectorizer_persian.pkl")

	# Label mapping from training
	label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}

	# ==========================================================
	# 🔹 2️⃣ Preprocessing (must match training exactly)
	# ==========================================================
	def clean_english_text(text):
	text = text.lower()
	text = re.sub(r"http\S+\|www\S+\|https\S+", "", text)
	text = re.sub(r"[^a-zA-Z\s]", "", text)
	text = re.sub(r"\s+", " ", text).strip()
	return text

	def clean_persian_text(text):
	text = re.sub(r"[^\u0600-\u06FF\s]", "", text) # keep only Persian chars
	text = re.sub(r"\s+", " ", text).strip()
	return text

	# ==========================================================
	# 🔹 3️⃣ Prediction Function
	# ==========================================================
	def predict_sentiment(text, language):
	if not text.strip():
	return "⚠ Please enter some text."

	if language == "English":
	cleaned = clean_english_text(text)
	vec = english_vectorizer.transform([cleaned])
	probs = english_model.predict_proba(vec)[0]
	pred = np.argmax(probs)
	return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"

	elif language == "Persian":
	cleaned = clean_persian_text(text)
	vec = persian_vectorizer.transform([cleaned])
	probs = persian_model.predict_proba(vec)[0]
	pred = np.argmax(probs)
	return f"Prediction: {label_map[pred]} ({probs[pred]:.2f} confidence)"

	else:
	return "❌ Invalid language option selected."

	# ==========================================================
	# 🔹 4️⃣ Debug Info (Optional - Check Vocabulary Size)
	# ==========================================================
	print(f"✅ English vectorizer vocabulary size: {len(english_vectorizer.get_feature_names_out())}")
	print(f"✅ Persian vectorizer vocabulary size: {len(persian_vectorizer.get_feature_names_out())}")

	# ==========================================================
	# 🔹 5️⃣ Gradio Interface
	# ==========================================================
	iface = gr.Interface(
	fn=predict_sentiment,
	inputs=[
	gr.Textbox(lines=3, label="Enter Text"),
	gr.Radio(["English", "Persian"], label="Select Language", value="English")
	],
	outputs=gr.Textbox(label="Predicted Sentiment"),
	title="🌍 Multilingual Sentiment Classifier (English & Persian)",
	description="Choose your language and get sentiment prediction with confidence score.",
	examples=[
	["This movie was amazing!", "English"],
	["The worst experience ever", "English"],
	["این فیلم خیلی بد بود", "Persian"],
	["من این محصول را دوست دارم", "Persian"]
	]
	)

	iface.launch()