File size: 6,613 Bytes
dd6720a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import os
import nltk
import spacy
import torch
import matplotlib.pyplot as plt
import io
from typing import Tuple, Dict
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util
import torch.nn.functional as F
# --------- lightweight setup helpers ---------
def ensure_spacy():
try:
return spacy.load("en_core_web_sm")
except Exception:
import spacy.cli
spacy.cli.download("en_core_web_sm")
return spacy.load("en_core_web_sm")
def ensure_nltk():
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
nltk.download("punkt")
# --------- load resources once (cached) ---------
ensure_nltk()
nlp = ensure_spacy()
sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline(
"sentiment-analysis",
model="distilbert-base-uncased-finetuned-sst-2-english"
)
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
# --------- domain definitions & colors ---------
GNH_DOMAINS: Dict[str, str] = {
"Mental Wellness": "mental health, emotional clarity, peace of mind",
"Social Wellness": "relationships, community, friendship, social harmony",
"Economic Wellness": "income, savings, financial stability, cost of living",
"Workplace Wellness": "career, work-life balance, promotion, productivity",
"Physical Wellness": "physical health, sleep, fitness, exercise",
"Environmental Wellness": "green space, nature, environmental care",
"Health": "healthcare, medical care, recovery, well-being",
"Education Value": "learning, education, school, knowledge, wisdom",
"Good Governance": "freedom, justice, fairness, democratic participation",
"Living Standards": "housing, wealth, basic needs, affordability",
"Cultural Diversity": "tradition, language, cultural expression, heritage",
"Political Wellness": "rights, law, free speech, civic participation",
"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
}
GNH_COLORS: Dict[str, str] = {
"Economic Wellness": "#808080",
"Mental Wellness": "#ffc0cb",
"Workplace Wellness": "#ffd700",
"Physical Wellness": "#f5deb3",
"Social Wellness": "#ffa500",
"Political Wellness": "#ffffff",
"Environmental Wellness": "#87ceeb",
"Ecological Diversity": "#228B22",
"Health": "#ff6347",
"Good Governance": "#000000",
"Education Value": "#8b4513",
"Living Standards": "#ffff00",
"Cultural Diversity": "#9370db",
}
# --------- core scoring functions ---------
def classify_emotion(text: str) -> Tuple[str, float]:
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
with torch.no_grad():
logits = emotion_model(**inputs).logits
probs = F.softmax(logits, dim=1).squeeze()
labels = emotion_model.config.id2label
top_idx = torch.argmax(probs).item()
return labels[top_idx], float(probs[top_idx].item())
def score_sentiment(text: str) -> float:
"""
BERT sentiment → scale to [1..10]
POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
"""
out = bert_sentiment(text[:512])[0]
label, score = out["label"], out["score"]
if label == "POSITIVE":
scaled = 5 + 5 * score
else:
scaled = 1 + 4 * (1 - score)
return round(max(1, min(10, scaled)), 2)
def score_accomplishment(text: str) -> float:
doc = nlp(text)
score = 5.0
key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"}
for token in doc:
if token.text.lower() in key_phrases:
score += 1.5
if token.tag_ in {"VBD", "VBN"}: # past tense / participle
score += 0.5
return round(max(1, min(10, score)), 2)
def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
"""
SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
"""
text_vec = sbert_model.encode(text, convert_to_tensor=True)
out: Dict[str, float] = {}
for label, desc in GNH_DOMAINS.items():
desc_vec = sbert_model.encode(desc, convert_to_tensor=True)
sim = float(util.cos_sim(text_vec, desc_vec).item())
sim = max(0.0, min(1.0, sim))
blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
out[label] = round(blended, 3)
return dict(sorted(out.items(), key=lambda kv: -kv[1]))
# --------- plotting helper ---------
def indicators_plot(indicators: Dict[str, float]):
labels = list(indicators.keys())
values = list(indicators.values())
colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
fig = plt.figure(figsize=(8, 5))
plt.barh(labels, values, color=colors)
plt.gca().invert_yaxis()
plt.title("GNH Indicator Similarity (Sentiment-weighted)")
plt.xlabel("Score")
plt.tight_layout()
return fig
# --------- Gradio app ---------
def analyze(text: str):
if not text or not text.strip():
return 5.0, "neutral (0.0)", "[]", None, 5.0
sentiment = score_sentiment(text)
emotion, emo_conf = classify_emotion(text)
accomplishment = score_accomplishment(text)
indicators = semantic_indicator_mapping(text, sentiment)
top5 = list(indicators.items())[:5]
top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
fig = indicators_plot(indicators)
return (
sentiment,
f"{emotion} ({emo_conf:.3f})",
top5_str,
fig,
accomplishment,
)
with gr.Blocks(title="La Matriz — GNH Analyzer") as demo:
gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
with gr.Row():
inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
with gr.Row():
btn = gr.Button("Analyze", variant="primary")
with gr.Row():
sent = gr.Number(label="Sentiment (1–10)")
emo = gr.Text(label="Emotion")
acc = gr.Number(label="Accomplishment (1–10)")
with gr.Row():
top = gr.Text(label="Top GNH Indicators")
with gr.Row():
plot = gr.Plot(label="GNH Similarity")
btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])
if __name__ == "__main__":
demo.launch()
|