File size: 6,613 Bytes
dd6720a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import os
import nltk
import spacy
import torch
import matplotlib.pyplot as plt
import io
from typing import Tuple, Dict

import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util
import torch.nn.functional as F

# --------- lightweight setup helpers ---------
def ensure_spacy():
    try:
        return spacy.load("en_core_web_sm")
    except Exception:
        import spacy.cli
        spacy.cli.download("en_core_web_sm")
        return spacy.load("en_core_web_sm")

def ensure_nltk():
    try:
        nltk.data.find("tokenizers/punkt")
    except LookupError:
        nltk.download("punkt")

# --------- load resources once (cached) ---------
ensure_nltk()
nlp = ensure_spacy()

sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

# --------- domain definitions & colors ---------
GNH_DOMAINS: Dict[str, str] = {
    "Mental Wellness": "mental health, emotional clarity, peace of mind",
    "Social Wellness": "relationships, community, friendship, social harmony",
    "Economic Wellness": "income, savings, financial stability, cost of living",
    "Workplace Wellness": "career, work-life balance, promotion, productivity",
    "Physical Wellness": "physical health, sleep, fitness, exercise",
    "Environmental Wellness": "green space, nature, environmental care",
    "Health": "healthcare, medical care, recovery, well-being",
    "Education Value": "learning, education, school, knowledge, wisdom",
    "Good Governance": "freedom, justice, fairness, democratic participation",
    "Living Standards": "housing, wealth, basic needs, affordability",
    "Cultural Diversity": "tradition, language, cultural expression, heritage",
    "Political Wellness": "rights, law, free speech, civic participation",
    "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
}

GNH_COLORS: Dict[str, str] = {
    "Economic Wellness": "#808080",
    "Mental Wellness": "#ffc0cb",
    "Workplace Wellness": "#ffd700",
    "Physical Wellness": "#f5deb3",
    "Social Wellness": "#ffa500",
    "Political Wellness": "#ffffff",
    "Environmental Wellness": "#87ceeb",
    "Ecological Diversity": "#228B22",
    "Health": "#ff6347",
    "Good Governance": "#000000",
    "Education Value": "#8b4513",
    "Living Standards": "#ffff00",
    "Cultural Diversity": "#9370db",
}

# --------- core scoring functions ---------
def classify_emotion(text: str) -> Tuple[str, float]:
    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        logits = emotion_model(**inputs).logits
        probs = F.softmax(logits, dim=1).squeeze()
    labels = emotion_model.config.id2label
    top_idx = torch.argmax(probs).item()
    return labels[top_idx], float(probs[top_idx].item())

def score_sentiment(text: str) -> float:
    """
    BERT sentiment → scale to [1..10]
    POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
    """
    out = bert_sentiment(text[:512])[0]
    label, score = out["label"], out["score"]
    if label == "POSITIVE":
        scaled = 5 + 5 * score
    else:
        scaled = 1 + 4 * (1 - score)
    return round(max(1, min(10, scaled)), 2)

def score_accomplishment(text: str) -> float:
    doc = nlp(text)
    score = 5.0
    key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"}
    for token in doc:
        if token.text.lower() in key_phrases:
            score += 1.5
        if token.tag_ in {"VBD", "VBN"}:  # past tense / participle
            score += 0.5
    return round(max(1, min(10, score)), 2)

def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
    """
    SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
    """
    text_vec = sbert_model.encode(text, convert_to_tensor=True)
    out: Dict[str, float] = {}
    for label, desc in GNH_DOMAINS.items():
        desc_vec = sbert_model.encode(desc, convert_to_tensor=True)
        sim = float(util.cos_sim(text_vec, desc_vec).item())
        sim = max(0.0, min(1.0, sim))
        blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
        out[label] = round(blended, 3)
    return dict(sorted(out.items(), key=lambda kv: -kv[1]))

# --------- plotting helper ---------
def indicators_plot(indicators: Dict[str, float]):
    labels = list(indicators.keys())
    values = list(indicators.values())
    colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]

    fig = plt.figure(figsize=(8, 5))
    plt.barh(labels, values, color=colors)
    plt.gca().invert_yaxis()
    plt.title("GNH Indicator Similarity (Sentiment-weighted)")
    plt.xlabel("Score")
    plt.tight_layout()
    return fig

# --------- Gradio app ---------
def analyze(text: str):
    if not text or not text.strip():
        return 5.0, "neutral (0.0)", "[]", None, 5.0
    sentiment = score_sentiment(text)
    emotion, emo_conf = classify_emotion(text)
    accomplishment = score_accomplishment(text)
    indicators = semantic_indicator_mapping(text, sentiment)

    top5 = list(indicators.items())[:5]
    top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
    fig = indicators_plot(indicators)

    return (
        sentiment,
        f"{emotion} ({emo_conf:.3f})",
        top5_str,
        fig,
        accomplishment,
    )

with gr.Blocks(title="La Matriz — GNH Analyzer") as demo:
    gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
    with gr.Row():
        inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
    with gr.Row():
        btn = gr.Button("Analyze", variant="primary")
    with gr.Row():
        sent = gr.Number(label="Sentiment (1–10)")
        emo = gr.Text(label="Emotion")
        acc = gr.Number(label="Accomplishment (1–10)")
    with gr.Row():
        top = gr.Text(label="Top GNH Indicators")
    with gr.Row():
        plot = gr.Plot(label="GNH Similarity")

    btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])

if __name__ == "__main__":
    demo.launch()