Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Sleeping

File size: 6,613 Bytes

dd6720a

import os
import nltk
import spacy
import torch
import matplotlib.pyplot as plt
import io
from typing import Tuple, Dict

import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util
import torch.nn.functional as F

# --------- lightweight setup helpers ---------
def ensure_spacy():
    try:
        return spacy.load("en_core_web_sm")
    except Exception:
        import spacy.cli
        spacy.cli.download("en_core_web_sm")
        return spacy.load("en_core_web_sm")

def ensure_nltk():
    try:
        nltk.data.find("tokenizers/punkt")
    except LookupError:
        nltk.download("punkt")

# --------- load resources once (cached) ---------
ensure_nltk()
nlp = ensure_spacy()

sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english"
)

emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

# --------- domain definitions & colors ---------
GNH_DOMAINS: Dict[str, str] = {
    "Mental Wellness": "mental health, emotional clarity, peace of mind",
    "Social Wellness": "relationships, community, friendship, social harmony",
    "Economic Wellness": "income, savings, financial stability, cost of living",
    "Workplace Wellness": "career, work-life balance, promotion, productivity",
    "Physical Wellness": "physical health, sleep, fitness, exercise",
    "Environmental Wellness": "green space, nature, environmental care",
    "Health": "healthcare, medical care, recovery, well-being",
    "Education Value": "learning, education, school, knowledge, wisdom",
    "Good Governance": "freedom, justice, fairness, democratic participation",
    "Living Standards": "housing, wealth, basic needs, affordability",
    "Cultural Diversity": "tradition, language, cultural expression, heritage",
    "Political Wellness": "rights, law, free speech, civic participation",
    "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
}

GNH_COLORS: Dict[str, str] = {
    "Economic Wellness": "#808080",
    "Mental Wellness": "#ffc0cb",
    "Workplace Wellness": "#ffd700",
    "Physical Wellness": "#f5deb3",
    "Social Wellness": "#ffa500",
    "Political Wellness": "#ffffff",
    "Environmental Wellness": "#87ceeb",
    "Ecological Diversity": "#228B22",
    "Health": "#ff6347",
    "Good Governance": "#000000",
    "Education Value": "#8b4513",
    "Living Standards": "#ffff00",
    "Cultural Diversity": "#9370db",
}

# --------- core scoring functions ---------
def classify_emotion(text: str) -> Tuple[str, float]:
    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        logits = emotion_model(**inputs).logits
        probs = F.softmax(logits, dim=1).squeeze()
    labels = emotion_model.config.id2label
    top_idx = torch.argmax(probs).item()
    return labels[top_idx], float(probs[top_idx].item())

def score_sentiment(text: str) -> float:
    """
    BERT sentiment → scale to [1..10]
    POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
    """
    out = bert_sentiment(text[:512])[0]
    label, score = out["label"], out["score"]
    if label == "POSITIVE":
        scaled = 5 + 5 * score
    else:
        scaled = 1 + 4 * (1 - score)
    return round(max(1, min(10, scaled)), 2)

def score_accomplishment(text: str) -> float:
    doc = nlp(text)
    score = 5.0
    key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"}
    for token in doc:
        if token.text.lower() in key_phrases:
            score += 1.5
        if token.tag_ in {"VBD", "VBN"}:  # past tense / participle
            score += 0.5
    return round(max(1, min(10, score)), 2)

def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
    """
    SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
    """
    text_vec = sbert_model.encode(text, convert_to_tensor=True)
    out: Dict[str, float] = {}
    for label, desc in GNH_DOMAINS.items():
        desc_vec = sbert_model.encode(desc, convert_to_tensor=True)
        sim = float(util.cos_sim(text_vec, desc_vec).item())
        sim = max(0.0, min(1.0, sim))
        blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
        out[label] = round(blended, 3)
    return dict(sorted(out.items(), key=lambda kv: -kv[1]))

# --------- plotting helper ---------
def indicators_plot(indicators: Dict[str, float]):
    labels = list(indicators.keys())
    values = list(indicators.values())
    colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]

    fig = plt.figure(figsize=(8, 5))
    plt.barh(labels, values, color=colors)
    plt.gca().invert_yaxis()
    plt.title("GNH Indicator Similarity (Sentiment-weighted)")
    plt.xlabel("Score")
    plt.tight_layout()
    return fig

# --------- Gradio app ---------
def analyze(text: str):
    if not text or not text.strip():
        return 5.0, "neutral (0.0)", "[]", None, 5.0
    sentiment = score_sentiment(text)
    emotion, emo_conf = classify_emotion(text)
    accomplishment = score_accomplishment(text)
    indicators = semantic_indicator_mapping(text, sentiment)

    top5 = list(indicators.items())[:5]
    top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
    fig = indicators_plot(indicators)

    return (
        sentiment,
        f"{emotion} ({emo_conf:.3f})",
        top5_str,
        fig,
        accomplishment,
    )

with gr.Blocks(title="La Matriz — GNH Analyzer") as demo:
    gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
    with gr.Row():
        inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
    with gr.Row():
        btn = gr.Button("Analyze", variant="primary")
    with gr.Row():
        sent = gr.Number(label="Sentiment (1–10)")
        emo = gr.Text(label="Emotion")
        acc = gr.Number(label="Accomplishment (1–10)")
    with gr.Row():
        top = gr.Text(label="Top GNH Indicators")
    with gr.Row():
        plot = gr.Plot(label="GNH Similarity")

    btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])

if __name__ == "__main__":
    demo.launch()