|
|
import os |
|
|
import re |
|
|
from typing import Dict, Tuple, List |
|
|
|
|
|
import nltk |
|
|
import spacy |
|
|
import torch |
|
|
import torch.nn.functional as F |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
import gradio as gr |
|
|
|
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def ensure_spacy(): |
|
|
try: |
|
|
return spacy.load("en_core_web_sm") |
|
|
except Exception: |
|
|
import spacy.cli |
|
|
spacy.cli.download("en_core_web_sm") |
|
|
return spacy.load("en_core_web_sm") |
|
|
|
|
|
def ensure_nltk(): |
|
|
try: |
|
|
nltk.data.find("tokenizers/punkt") |
|
|
except LookupError: |
|
|
nltk.download("punkt") |
|
|
|
|
|
ensure_nltk() |
|
|
nlp = ensure_spacy() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sbert_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") |
|
|
|
|
|
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base" |
|
|
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name) |
|
|
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
GNH_DOMAINS: Dict[str, str] = { |
|
|
"Mental Wellness": "mental health, emotional clarity, peace of mind", |
|
|
"Social Wellness": "relationships, community, friendship, social harmony", |
|
|
"Economic Wellness": "income, savings, financial stability, cost of living", |
|
|
"Workplace Wellness": "career, work-life balance, promotion, productivity", |
|
|
"Physical Wellness": "physical health, sleep, fitness, exercise", |
|
|
"Environmental Wellness": "green space, nature, environmental care", |
|
|
"Health": "healthcare, medical care, recovery, well-being", |
|
|
"Education Value": "learning, education, school, knowledge, wisdom", |
|
|
"Good Governance": "freedom, justice, fairness, democratic participation", |
|
|
"Living Standards": "housing, wealth, basic needs, affordability", |
|
|
"Cultural Diversity": "tradition, language, cultural expression, heritage", |
|
|
"Political Wellness": "rights, law, free speech, civic participation", |
|
|
"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife", |
|
|
} |
|
|
|
|
|
GNH_COLORS: Dict[str, str] = { |
|
|
"Economic Wellness": "#808080", |
|
|
"Mental Wellness": "#ffc0cb", |
|
|
"Workplace Wellness": "#ffd700", |
|
|
"Physical Wellness": "#f5deb3", |
|
|
"Social Wellness": "#ffa500", |
|
|
"Political Wellness": "#ffffff", |
|
|
"Environmental Wellness": "#87ceeb", |
|
|
"Ecological Diversity": "#228B22", |
|
|
"Health": "#ff6347", |
|
|
"Good Governance": "#000000", |
|
|
"Education Value": "#8b4513", |
|
|
"Living Standards": "#ffff00", |
|
|
"Cultural Diversity": "#9370db", |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CSV_PATH = "la matrice plus.csv" |
|
|
|
|
|
|
|
|
SEQUENCE_ALIASES = { |
|
|
"Auto (recommend)": "auto", |
|
|
"Direct": "direct", |
|
|
"Fem": "feminine", |
|
|
"Knot": "knot", |
|
|
"Masc": "masculine", |
|
|
"Pain": "pain", |
|
|
"Prayer": "prayer", |
|
|
"Precise": "precise", |
|
|
"Practical": "practical", |
|
|
"Plot": "plot", |
|
|
"Spiritual": "spiritual", |
|
|
"Sad": "sad" |
|
|
} |
|
|
|
|
|
SEQUENCE_IMAGE_FILES = { |
|
|
"direct": "direct pathway.png", |
|
|
"feminine": "fem pathway.png", |
|
|
"knot": "knot pathway.png", |
|
|
"masc": "masc pathway.png", |
|
|
"pain": "pain pathway.png", |
|
|
"prayer": "prayer pathway.png", |
|
|
"precise": "precise pathway.png", |
|
|
"practical": "practical pathway.png", |
|
|
"plot": "plot pathway.png", |
|
|
"spiritual": "spiritual pathway.png", |
|
|
"sad": "sad pathway.png" |
|
|
} |
|
|
|
|
|
|
|
|
def load_pathway_info(csv_path: str): |
|
|
df = pd.read_csv(csv_path) |
|
|
keys_we_know = set(SEQUENCE_ALIASES.values()) - {"auto"} |
|
|
rows = df[df["color"].astype(str).str.lower().isin(keys_we_know)].copy() |
|
|
|
|
|
phrases: Dict[str, str] = {} |
|
|
seq_to_colors: Dict[str, List[str]] = {} |
|
|
color_to_seqs: Dict[str, List[str]] = {} |
|
|
|
|
|
|
|
|
cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")] |
|
|
|
|
|
for _, row in rows.iterrows(): |
|
|
key = str(row["color"]).strip().lower() |
|
|
|
|
|
|
|
|
colors_field = str(row.get("r", "") or "") |
|
|
colors = [c.strip().lower() for c in re.split(r"[,\s]+", colors_field) if c.strip()] |
|
|
colors = list(dict.fromkeys(colors)) |
|
|
seq_to_colors[key] = colors |
|
|
|
|
|
for c in colors: |
|
|
color_to_seqs.setdefault(c, []) |
|
|
if key not in color_to_seqs[c]: |
|
|
color_to_seqs[c].append(key) |
|
|
|
|
|
|
|
|
vals = [] |
|
|
for c in cols_for_phrase: |
|
|
v = row.get(c) |
|
|
if pd.notna(v): |
|
|
vs = str(v).strip() |
|
|
if vs and vs.lower() != "nan": |
|
|
vals.append(vs) |
|
|
phrase = " ".join(vals) |
|
|
phrase = " ".join(phrase.split()) |
|
|
phrases[key] = phrase |
|
|
|
|
|
|
|
|
color_vocab = sorted(color_to_seqs.keys()) |
|
|
return phrases, seq_to_colors, color_to_seqs, color_vocab |
|
|
|
|
|
PATHWAY_PHRASES, SEQ_TO_COLORS, COLOR_TO_SEQS, COLOR_VOCAB = load_pathway_info(CSV_PATH) |
|
|
|
|
|
def sequence_to_image_path(seq_key: str) -> str | None: |
|
|
fname = SEQUENCE_IMAGE_FILES.get(seq_key) |
|
|
return fname if (fname and os.path.exists(fname)) else None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def classify_emotion(text: str) -> Tuple[str, float]: |
|
|
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True) |
|
|
with torch.no_grad(): |
|
|
logits = emotion_model(**inputs).logits |
|
|
probs = F.softmax(logits, dim=1).squeeze() |
|
|
labels = emotion_model.config.id2label |
|
|
idx = int(torch.argmax(probs).item()) |
|
|
return labels[idx], float(probs[idx].item()) |
|
|
|
|
|
def score_sentiment(text: str) -> float: |
|
|
out = bert_sentiment(text[:512])[0] |
|
|
label, score = out["label"], out["score"] |
|
|
scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score) |
|
|
return round(min(10, max(1, scaled)), 2) |
|
|
|
|
|
def score_accomplishment(text: str) -> float: |
|
|
doc = nlp(text) |
|
|
score = 5.0 |
|
|
key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away", "returned", "return"} |
|
|
for token in doc: |
|
|
if token.text.lower() in key_phrases: |
|
|
score += 1.5 |
|
|
if token.tag_ in {"VBD", "VBN"}: |
|
|
score += 0.5 |
|
|
return round(min(10, max(1, score)), 2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def encode_text(t: str): |
|
|
return sbert_model.encode(t, convert_to_tensor=True) |
|
|
|
|
|
def composite_vector( |
|
|
base_text: str, |
|
|
boost_terms: List[str], |
|
|
boost_seq_keys: List[str], |
|
|
limit_seq_keys: List[str], |
|
|
boost_w: float = 0.6, |
|
|
limit_w: float = 0.6, |
|
|
): |
|
|
v = encode_text(base_text) |
|
|
|
|
|
for term in boost_terms: |
|
|
t = term.strip() |
|
|
if t: |
|
|
v = v + boost_w * encode_text(t) |
|
|
|
|
|
for key in boost_seq_keys: |
|
|
phrase = PATHWAY_PHRASES.get(key, "") |
|
|
if phrase: |
|
|
v = v + boost_w * encode_text(phrase) |
|
|
|
|
|
for key in limit_seq_keys: |
|
|
phrase = PATHWAY_PHRASES.get(key, "") |
|
|
if phrase: |
|
|
v = v - limit_w * encode_text(phrase) |
|
|
|
|
|
return v |
|
|
|
|
|
def best_sequence_for_vector(vec) -> Tuple[str, float]: |
|
|
best_key, best_sim = None, -1.0 |
|
|
for key, phrase in PATHWAY_PHRASES.items(): |
|
|
if not phrase: |
|
|
continue |
|
|
sim = float(util.cos_sim(vec, encode_text(phrase)).item()) |
|
|
if sim > best_sim: |
|
|
best_key, best_sim = key, sim |
|
|
return best_key or "direct", best_sim |
|
|
|
|
|
def semantic_indicator_mapping_from_vec(vec, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]: |
|
|
out: Dict[str, float] = {} |
|
|
for label, desc in GNH_DOMAINS.items(): |
|
|
desc_vec = encode_text(desc) |
|
|
sim = float(util.cos_sim(vec, desc_vec).item()) |
|
|
sim = max(0.0, min(1.0, sim)) |
|
|
blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0) |
|
|
out[label] = round(blended, 3) |
|
|
return dict(sorted(out.items(), key=lambda kv: -kv[1])) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_COLOR_RE = re.compile(r"\b(" + "|".join(map(re.escape, COLOR_VOCAB)) + r")\s*(?:\-?\s*pathway)?\b", re.I) |
|
|
_LIMIT_CUES = {"limit", "reduce", "lessen", "avoid", "diminish", "lower", "constrain", "suppress"} |
|
|
|
|
|
def infer_color_directives(text: str) -> Tuple[List[str], List[str]]: |
|
|
""" |
|
|
Parse '... limit ... red-pathway ...' → limit 'red' |
|
|
otherwise treat mentioned colors as boost. |
|
|
Returns (boost_colors, limit_colors) as lists of color strings. |
|
|
""" |
|
|
tokens = re.findall(r"\w+|\S", text.lower()) |
|
|
idxs = [] |
|
|
for m in _COLOR_RE.finditer(text): |
|
|
start = m.start() |
|
|
|
|
|
char_count = 0 |
|
|
tok_index = 0 |
|
|
for i, tok in enumerate(tokens): |
|
|
char_count += len(tok) + 1 |
|
|
if char_count > start: |
|
|
tok_index = i |
|
|
break |
|
|
idxs.append((tok_index, m.group(1).lower())) |
|
|
|
|
|
boost_colors, limit_colors = [], [] |
|
|
for idx, col in idxs: |
|
|
|
|
|
window = tokens[max(0, idx-4):idx] |
|
|
if any(w in _LIMIT_CUES for w in window): |
|
|
limit_colors.append(col) |
|
|
else: |
|
|
boost_colors.append(col) |
|
|
|
|
|
boost_colors = list(dict.fromkeys(boost_colors)) |
|
|
limit_colors = list(dict.fromkeys(limit_colors)) |
|
|
return boost_colors, limit_colors |
|
|
|
|
|
def colors_to_seq_keys(colors: List[str]) -> List[str]: |
|
|
keys: List[str] = [] |
|
|
for c in colors: |
|
|
for k in COLOR_TO_SEQS.get(c, []): |
|
|
if k not in keys: |
|
|
keys.append(k) |
|
|
return keys |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def indicators_plot(indicators: Dict[str, float]): |
|
|
labels = list(indicators.keys()) |
|
|
values = list(indicators.values()) |
|
|
colors = [GNH_COLORS.get(label, "#cccccc") for label in labels] |
|
|
fig = plt.figure(figsize=(8, 5)) |
|
|
plt.barh(labels, values, color=colors) |
|
|
plt.gca().invert_yaxis() |
|
|
plt.title("GNH Indicator Similarity (Pathway-weighted)") |
|
|
plt.xlabel("Score") |
|
|
plt.tight_layout() |
|
|
return fig |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SEQ_CHOICES = list(SEQUENCE_ALIASES.keys()) |
|
|
SEQ_MULTI_CHOICES = [k for k in SEQUENCE_ALIASES.keys() if k != "Auto (recommend)"] |
|
|
|
|
|
def normalize_seq_keys(ui_labels: List[str]) -> List[str]: |
|
|
keys = [] |
|
|
for lab in ui_labels: |
|
|
k = SEQUENCE_ALIASES.get(lab, lab).lower() |
|
|
keys.append(k) |
|
|
return keys |
|
|
|
|
|
def analyze( |
|
|
text: str, |
|
|
seq_choice: str, |
|
|
boost_terms_raw: str, |
|
|
boost_seq_labels: List[str], |
|
|
limit_seq_labels: List[str], |
|
|
boost_w: float, |
|
|
limit_w: float, |
|
|
): |
|
|
if not text or not text.strip(): |
|
|
return (5.0, "neutral (0.0)", 5.0, "—", "—", "{}", None, None) |
|
|
|
|
|
|
|
|
sentiment = score_sentiment(text) |
|
|
emotion, emo_conf = classify_emotion(text) |
|
|
accomplishment = score_accomplishment(text) |
|
|
|
|
|
|
|
|
boost_seqs_user = normalize_seq_keys(boost_seq_labels) |
|
|
limit_seqs_user = normalize_seq_keys(limit_seq_labels) |
|
|
|
|
|
|
|
|
boost_terms = [t.strip() for t in boost_terms_raw.split(",")] if boost_terms_raw else [] |
|
|
|
|
|
|
|
|
boost_colors, limit_colors = infer_color_directives(text) |
|
|
boost_seqs_from_colors = colors_to_seq_keys(boost_colors) |
|
|
limit_seqs_from_colors = colors_to_seq_keys(limit_colors) |
|
|
|
|
|
|
|
|
def _merge(a: List[str], b: List[str]) -> List[str]: |
|
|
out = list(a) |
|
|
for x in b: |
|
|
if x not in out: |
|
|
out.append(x) |
|
|
return out |
|
|
|
|
|
boost_seq_keys = _merge(boost_seqs_user, boost_seqs_from_colors) |
|
|
limit_seq_keys = _merge(limit_seqs_user, limit_seqs_from_colors) |
|
|
|
|
|
|
|
|
context_vec = composite_vector( |
|
|
base_text=text, |
|
|
boost_terms=boost_terms, |
|
|
boost_seq_keys=boost_seq_keys, |
|
|
limit_seq_keys=limit_seq_keys, |
|
|
boost_w=boost_w, |
|
|
limit_w=limit_w, |
|
|
) |
|
|
|
|
|
|
|
|
chosen_key = SEQUENCE_ALIASES.get(seq_choice, "auto") |
|
|
if chosen_key == "auto": |
|
|
final_key, final_sim = best_sequence_for_vector(context_vec) |
|
|
else: |
|
|
final_key = chosen_key |
|
|
phrase_for_final = PATHWAY_PHRASES.get(final_key, "") |
|
|
final_sim = float(util.cos_sim(context_vec, encode_text(phrase_for_final)).item()) if phrase_for_final else 0.0 |
|
|
|
|
|
|
|
|
phrase = PATHWAY_PHRASES.get(final_key, "—") |
|
|
img_path = sequence_to_image_path(final_key) |
|
|
|
|
|
indicators = semantic_indicator_mapping_from_vec(context_vec, sentiment_score=sentiment) |
|
|
fig = indicators_plot(indicators) |
|
|
|
|
|
top5 = list(indicators.items())[:5] |
|
|
top5_str = "\n".join(f"{k}: {v}" for k, v in top5) |
|
|
|
|
|
|
|
|
emo_str = f"{emotion} ({emo_conf:.3f})" |
|
|
meta = f"{final_key} (relevance={final_sim:.3f})" |
|
|
|
|
|
if boost_colors or limit_colors: |
|
|
meta += f" | boost colors: {', '.join(boost_colors) or '—'} → {', '.join(boost_seqs_from_colors) or '—'}" |
|
|
meta += f" | limit colors: {', '.join(limit_colors) or '—'} → {', '.join(limit_seqs_from_colors) or '—'}" |
|
|
|
|
|
return ( |
|
|
sentiment, |
|
|
emo_str, |
|
|
accomplishment, |
|
|
meta, |
|
|
phrase, |
|
|
top5_str, |
|
|
fig, |
|
|
img_path, |
|
|
) |
|
|
|
|
|
with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo: |
|
|
gr.Markdown("## RGB Root Matriz Color Plotter\n" |
|
|
"Type a phrase. Choose a **Sequence** or keep **Auto** to recommend a pathway. " |
|
|
"You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.") |
|
|
|
|
|
with gr.Row(): |
|
|
inp = gr.Textbox( |
|
|
lines=4, |
|
|
label="Input text", |
|
|
placeholder="e.g., use gratitude from a return and inspiration from clarity to limit from red-pathway the pain from orange-pathway." |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
seq = gr.Dropdown(choices=SEQ_CHOICES, value="Auto (recommend)", label="Primary Pathway") |
|
|
|
|
|
with gr.Row(): |
|
|
boost_terms = gr.Textbox(label="Boost terms (comma-separated)", placeholder="gratitude, inspiration, clarity") |
|
|
with gr.Row(): |
|
|
boost_seqs = gr.CheckboxGroup(choices=[c for c in SEQ_CHOICES if c != "Auto (recommend)"], |
|
|
label="Boost sequences (optional)") |
|
|
limit_seqs = gr.CheckboxGroup(choices=[c for c in SEQ_CHOICES if c != "Auto (recommend)"], |
|
|
label="Limit sequences (optional)") |
|
|
|
|
|
with gr.Row(): |
|
|
boost_w = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Boost weight") |
|
|
limit_w = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Limit weight") |
|
|
|
|
|
btn = gr.Button("Analyze", variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
|
sent = gr.Number(label="Sentiment (1–10)") |
|
|
emo = gr.Text(label="Emotion") |
|
|
acc = gr.Number(label="Accomplishment (1–10)") |
|
|
|
|
|
with gr.Row(): |
|
|
chosen = gr.Text(label="Chosen pathway (relevance + color mapping)") |
|
|
phrase_out = gr.Text(label="Pathway phrase") |
|
|
|
|
|
with gr.Row(): |
|
|
gnh_top = gr.Text(label="Top GNH Indicators (Top 5)") |
|
|
gnh_plot = gr.Plot(label="GNH Similarity (Pathway-weighted)") |
|
|
|
|
|
with gr.Row(): |
|
|
pathway_img = gr.Image(label="Pathway image", type="filepath") |
|
|
|
|
|
btn.click( |
|
|
fn=analyze, |
|
|
inputs=[inp, seq, boost_terms, boost_seqs, limit_seqs, boost_w, limit_w], |
|
|
outputs=[sent, emo, acc, chosen, phrase_out, gnh_top, gnh_plot, pathway_img] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|
|
|
|
|
|
|