|
|
import os |
|
|
import nltk |
|
|
import spacy |
|
|
import torch |
|
|
import matplotlib.pyplot as plt |
|
|
import io |
|
|
from typing import Tuple, Dict |
|
|
|
|
|
import gradio as gr |
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
import torch.nn.functional as F |
|
|
|
|
|
|
|
|
def ensure_spacy(): |
|
|
try: |
|
|
return spacy.load("en_core_web_sm") |
|
|
except Exception: |
|
|
import spacy.cli |
|
|
spacy.cli.download("en_core_web_sm") |
|
|
return spacy.load("en_core_web_sm") |
|
|
|
|
|
def ensure_nltk(): |
|
|
try: |
|
|
nltk.data.find("tokenizers/punkt") |
|
|
except LookupError: |
|
|
nltk.download("punkt") |
|
|
|
|
|
|
|
|
ensure_nltk() |
|
|
nlp = ensure_spacy() |
|
|
|
|
|
sbert_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
bert_sentiment = pipeline( |
|
|
"sentiment-analysis", |
|
|
model="distilbert-base-uncased-finetuned-sst-2-english" |
|
|
) |
|
|
|
|
|
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base" |
|
|
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name) |
|
|
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name) |
|
|
|
|
|
|
|
|
GNH_DOMAINS: Dict[str, str] = { |
|
|
"Mental Wellness": "mental health, emotional clarity, peace of mind", |
|
|
"Social Wellness": "relationships, community, friendship, social harmony", |
|
|
"Economic Wellness": "income, savings, financial stability, cost of living", |
|
|
"Workplace Wellness": "career, work-life balance, promotion, productivity", |
|
|
"Physical Wellness": "physical health, sleep, fitness, exercise", |
|
|
"Environmental Wellness": "green space, nature, environmental care", |
|
|
"Health": "healthcare, medical care, recovery, well-being", |
|
|
"Education Value": "learning, education, school, knowledge, wisdom", |
|
|
"Good Governance": "freedom, justice, fairness, democratic participation", |
|
|
"Living Standards": "housing, wealth, basic needs, affordability", |
|
|
"Cultural Diversity": "tradition, language, cultural expression, heritage", |
|
|
"Political Wellness": "rights, law, free speech, civic participation", |
|
|
"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife" |
|
|
} |
|
|
|
|
|
GNH_COLORS: Dict[str, str] = { |
|
|
"Economic Wellness": "#808080", |
|
|
"Mental Wellness": "#ffc0cb", |
|
|
"Workplace Wellness": "#ffd700", |
|
|
"Physical Wellness": "#f5deb3", |
|
|
"Social Wellness": "#ffa500", |
|
|
"Political Wellness": "#ffffff", |
|
|
"Environmental Wellness": "#87ceeb", |
|
|
"Ecological Diversity": "#228B22", |
|
|
"Health": "#ff6347", |
|
|
"Good Governance": "#000000", |
|
|
"Education Value": "#8b4513", |
|
|
"Living Standards": "#ffff00", |
|
|
"Cultural Diversity": "#9370db", |
|
|
} |
|
|
|
|
|
|
|
|
def classify_emotion(text: str) -> Tuple[str, float]: |
|
|
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True) |
|
|
with torch.no_grad(): |
|
|
logits = emotion_model(**inputs).logits |
|
|
probs = F.softmax(logits, dim=1).squeeze() |
|
|
labels = emotion_model.config.id2label |
|
|
top_idx = torch.argmax(probs).item() |
|
|
return labels[top_idx], float(probs[top_idx].item()) |
|
|
|
|
|
def score_sentiment(text: str) -> float: |
|
|
""" |
|
|
BERT sentiment → scale to [1..10] |
|
|
POSITIVE: ~[6..10]; NEGATIVE: ~[1..5] |
|
|
""" |
|
|
out = bert_sentiment(text[:512])[0] |
|
|
label, score = out["label"], out["score"] |
|
|
if label == "POSITIVE": |
|
|
scaled = 5 + 5 * score |
|
|
else: |
|
|
scaled = 1 + 4 * (1 - score) |
|
|
return round(max(1, min(10, scaled)), 2) |
|
|
|
|
|
def score_accomplishment(text: str) -> float: |
|
|
doc = nlp(text) |
|
|
score = 5.0 |
|
|
key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"} |
|
|
for token in doc: |
|
|
if token.text.lower() in key_phrases: |
|
|
score += 1.5 |
|
|
if token.tag_ in {"VBD", "VBN"}: |
|
|
score += 0.5 |
|
|
return round(max(1, min(10, score)), 2) |
|
|
|
|
|
def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]: |
|
|
""" |
|
|
SBERT cosine similarity to domain descriptions, then blend with sentiment_score. |
|
|
""" |
|
|
text_vec = sbert_model.encode(text, convert_to_tensor=True) |
|
|
out: Dict[str, float] = {} |
|
|
for label, desc in GNH_DOMAINS.items(): |
|
|
desc_vec = sbert_model.encode(desc, convert_to_tensor=True) |
|
|
sim = float(util.cos_sim(text_vec, desc_vec).item()) |
|
|
sim = max(0.0, min(1.0, sim)) |
|
|
blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0) |
|
|
out[label] = round(blended, 3) |
|
|
return dict(sorted(out.items(), key=lambda kv: -kv[1])) |
|
|
|
|
|
|
|
|
def indicators_plot(indicators: Dict[str, float]): |
|
|
labels = list(indicators.keys()) |
|
|
values = list(indicators.values()) |
|
|
colors = [GNH_COLORS.get(label, "#cccccc") for label in labels] |
|
|
|
|
|
fig = plt.figure(figsize=(8, 5)) |
|
|
plt.barh(labels, values, color=colors) |
|
|
plt.gca().invert_yaxis() |
|
|
plt.title("GNH Indicator Similarity (Sentiment-weighted)") |
|
|
plt.xlabel("Score") |
|
|
plt.tight_layout() |
|
|
return fig |
|
|
|
|
|
|
|
|
def analyze(text: str): |
|
|
if not text or not text.strip(): |
|
|
return 5.0, "neutral (0.0)", "[]", None, 5.0 |
|
|
sentiment = score_sentiment(text) |
|
|
emotion, emo_conf = classify_emotion(text) |
|
|
accomplishment = score_accomplishment(text) |
|
|
indicators = semantic_indicator_mapping(text, sentiment) |
|
|
|
|
|
top5 = list(indicators.items())[:5] |
|
|
top5_str = "\n".join(f"{k}: {v}" for k, v in top5) |
|
|
fig = indicators_plot(indicators) |
|
|
|
|
|
return ( |
|
|
sentiment, |
|
|
f"{emotion} ({emo_conf:.3f})", |
|
|
top5_str, |
|
|
fig, |
|
|
accomplishment, |
|
|
) |
|
|
|
|
|
with gr.Blocks(title="La Matriz — GNH Analyzer") as demo: |
|
|
gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.") |
|
|
with gr.Row(): |
|
|
inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.") |
|
|
with gr.Row(): |
|
|
btn = gr.Button("Analyze", variant="primary") |
|
|
with gr.Row(): |
|
|
sent = gr.Number(label="Sentiment (1–10)") |
|
|
emo = gr.Text(label="Emotion") |
|
|
acc = gr.Number(label="Accomplishment (1–10)") |
|
|
with gr.Row(): |
|
|
top = gr.Text(label="Top GNH Indicators") |
|
|
with gr.Row(): |
|
|
plot = gr.Plot(label="GNH Similarity") |
|
|
|
|
|
btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|