daniellegauthier's picture
Update app.py
dd6720a verified
raw
history blame
6.61 kB
import os
import nltk
import spacy
import torch
import matplotlib.pyplot as plt
import io
from typing import Tuple, Dict
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util
import torch.nn.functional as F
# --------- lightweight setup helpers ---------
def ensure_spacy():
try:
return spacy.load("en_core_web_sm")
except Exception:
import spacy.cli
spacy.cli.download("en_core_web_sm")
return spacy.load("en_core_web_sm")
def ensure_nltk():
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
nltk.download("punkt")
# --------- load resources once (cached) ---------
ensure_nltk()
nlp = ensure_spacy()
sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english"
)
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
# --------- domain definitions & colors ---------
GNH_DOMAINS: Dict[str, str] = {
"Mental Wellness": "mental health, emotional clarity, peace of mind",
"Social Wellness": "relationships, community, friendship, social harmony",
"Economic Wellness": "income, savings, financial stability, cost of living",
"Workplace Wellness": "career, work-life balance, promotion, productivity",
"Physical Wellness": "physical health, sleep, fitness, exercise",
"Environmental Wellness": "green space, nature, environmental care",
"Health": "healthcare, medical care, recovery, well-being",
"Education Value": "learning, education, school, knowledge, wisdom",
"Good Governance": "freedom, justice, fairness, democratic participation",
"Living Standards": "housing, wealth, basic needs, affordability",
"Cultural Diversity": "tradition, language, cultural expression, heritage",
"Political Wellness": "rights, law, free speech, civic participation",
"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
}
GNH_COLORS: Dict[str, str] = {
"Economic Wellness": "#808080",
"Mental Wellness": "#ffc0cb",
"Workplace Wellness": "#ffd700",
"Physical Wellness": "#f5deb3",
"Social Wellness": "#ffa500",
"Political Wellness": "#ffffff",
"Environmental Wellness": "#87ceeb",
"Ecological Diversity": "#228B22",
"Health": "#ff6347",
"Good Governance": "#000000",
"Education Value": "#8b4513",
"Living Standards": "#ffff00",
"Cultural Diversity": "#9370db",
}
# --------- core scoring functions ---------
def classify_emotion(text: str) -> Tuple[str, float]:
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
with torch.no_grad():
logits = emotion_model(**inputs).logits
probs = F.softmax(logits, dim=1).squeeze()
labels = emotion_model.config.id2label
top_idx = torch.argmax(probs).item()
return labels[top_idx], float(probs[top_idx].item())
def score_sentiment(text: str) -> float:
"""
BERT sentiment → scale to [1..10]
POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
"""
out = bert_sentiment(text[:512])[0]
label, score = out["label"], out["score"]
if label == "POSITIVE":
scaled = 5 + 5 * score
else:
scaled = 1 + 4 * (1 - score)
return round(max(1, min(10, scaled)), 2)
def score_accomplishment(text: str) -> float:
doc = nlp(text)
score = 5.0
key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"}
for token in doc:
if token.text.lower() in key_phrases:
score += 1.5
if token.tag_ in {"VBD", "VBN"}: # past tense / participle
score += 0.5
return round(max(1, min(10, score)), 2)
def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
"""
SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
"""
text_vec = sbert_model.encode(text, convert_to_tensor=True)
out: Dict[str, float] = {}
for label, desc in GNH_DOMAINS.items():
desc_vec = sbert_model.encode(desc, convert_to_tensor=True)
sim = float(util.cos_sim(text_vec, desc_vec).item())
sim = max(0.0, min(1.0, sim))
blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
out[label] = round(blended, 3)
return dict(sorted(out.items(), key=lambda kv: -kv[1]))
# --------- plotting helper ---------
def indicators_plot(indicators: Dict[str, float]):
labels = list(indicators.keys())
values = list(indicators.values())
colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
fig = plt.figure(figsize=(8, 5))
plt.barh(labels, values, color=colors)
plt.gca().invert_yaxis()
plt.title("GNH Indicator Similarity (Sentiment-weighted)")
plt.xlabel("Score")
plt.tight_layout()
return fig
# --------- Gradio app ---------
def analyze(text: str):
if not text or not text.strip():
return 5.0, "neutral (0.0)", "[]", None, 5.0
sentiment = score_sentiment(text)
emotion, emo_conf = classify_emotion(text)
accomplishment = score_accomplishment(text)
indicators = semantic_indicator_mapping(text, sentiment)
top5 = list(indicators.items())[:5]
top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
fig = indicators_plot(indicators)
return (
sentiment,
f"{emotion} ({emo_conf:.3f})",
top5_str,
fig,
accomplishment,
)
with gr.Blocks(title="La Matriz — GNH Analyzer") as demo:
gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
with gr.Row():
inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
with gr.Row():
btn = gr.Button("Analyze", variant="primary")
with gr.Row():
sent = gr.Number(label="Sentiment (1–10)")
emo = gr.Text(label="Emotion")
acc = gr.Number(label="Accomplishment (1–10)")
with gr.Row():
top = gr.Text(label="Top GNH Indicators")
with gr.Row():
plot = gr.Plot(label="GNH Similarity")
btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])
if __name__ == "__main__":
demo.launch()