|
|
import os, re |
|
|
from typing import Dict, Tuple, List |
|
|
|
|
|
import nltk, spacy, torch, pandas as pd, matplotlib.pyplot as plt |
|
|
import torch.nn.functional as F |
|
|
import gradio as gr |
|
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
|
|
|
def ensure_spacy(): |
|
|
try: |
|
|
return spacy.load("en_core_web_sm") |
|
|
except Exception: |
|
|
import spacy.cli |
|
|
spacy.cli.download("en_core_web_sm") |
|
|
return spacy.load("en_core_web_sm") |
|
|
|
|
|
def ensure_nltk(): |
|
|
try: |
|
|
nltk.data.find("tokenizers/punkt") |
|
|
except LookupError: |
|
|
nltk.download("punkt") |
|
|
|
|
|
ensure_nltk() |
|
|
nlp = ensure_spacy() |
|
|
|
|
|
sbert_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
|
bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") |
|
|
|
|
|
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base" |
|
|
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name) |
|
|
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name) |
|
|
|
|
|
|
|
|
CSV_PATH_PLUS = "la matrice plus.csv" |
|
|
CSV_PATH_COLOR = "la matrice.csv" |
|
|
|
|
|
SEQUENCE_ALIASES = { |
|
|
"Direct": "direct", |
|
|
"Feminine": "feminine", |
|
|
"Knot": "knot", |
|
|
"Masculine": "masc", |
|
|
"Pain": "pain", |
|
|
"Prayer": "prayer", |
|
|
"Precise": "precise", |
|
|
"Practical": "practical", |
|
|
"Plot": "plot", |
|
|
"Spiritual": "spiritual", |
|
|
"Sad": "sad", |
|
|
} |
|
|
|
|
|
SEQUENCE_IMAGE_FILES = { |
|
|
"direct": "direct pathway.png", |
|
|
"feminine": "fem pathway.png", |
|
|
"knot": "knot pathway.png", |
|
|
"masc": "masc pathway.png", |
|
|
"pain": "pain pathway.png", |
|
|
"prayer": "prayer pathway.png", |
|
|
"precise": "precise pathway.png", |
|
|
"practical": "practical pathway.png", |
|
|
"plot": "plot pathway.png", |
|
|
"spiritual": "spiritual pathway.png", |
|
|
"sad": "sad pathway.png" |
|
|
} |
|
|
|
|
|
GNH_DOMAINS: Dict[str, str] = { |
|
|
"Mental Wellness": "mental health, emotional clarity, peace of mind", |
|
|
"Social Wellness": "relationships, community, friendship, social harmony", |
|
|
"Economic Wellness": "income, savings, financial stability, cost of living", |
|
|
"Workplace Wellness": "career, work-life balance, promotion, productivity", |
|
|
"Physical Wellness": "physical health, sleep, fitness, exercise", |
|
|
"Environmental Wellness": "green space, nature, environmental care", |
|
|
"Health": "healthcare, medical care, recovery, well-being", |
|
|
"Education Value": "learning, education, school, knowledge, wisdom", |
|
|
"Good Governance": "freedom, justice, fairness, democratic participation", |
|
|
"Living Standards": "housing, wealth, basic needs, affordability", |
|
|
"Cultural Diversity": "tradition, language, cultural expression, heritage", |
|
|
"Political Wellness": "rights, law, free speech, civic participation", |
|
|
"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife", |
|
|
} |
|
|
|
|
|
GNH_COLORS: Dict[str, str] = { |
|
|
"Economic Wellness": "#808080", |
|
|
"Mental Wellness": "#FA005A", |
|
|
"Workplace Wellness": "#ffd700", |
|
|
"Physical Wellness": "#FAB478", |
|
|
"Social Wellness": "#ffa500", |
|
|
"Political Wellness": "#ffffff", |
|
|
"Environmental Wellness": "#0000FF", |
|
|
"Ecological Diversity": "#00FF00", |
|
|
"Health": "#FF0000", |
|
|
"Good Governance": "#000000", |
|
|
"Education Value": "#8b4513", |
|
|
"Living Standards": "#ffff00", |
|
|
"Cultural Diversity": "#B432FF", |
|
|
} |
|
|
|
|
|
WORD_MODES = ["Matrice1", "Matrice", "English", "GNH Indicators"] |
|
|
MAX_COLORS = 8 |
|
|
|
|
|
|
|
|
def _find_col(df: pd.DataFrame, candidates: List[str]) -> str | None: |
|
|
names = {c.lower(): c for c in df.columns} |
|
|
for c in candidates: |
|
|
if c.lower() in names: return names[c.lower()] |
|
|
for want in candidates: |
|
|
ww = want.replace(" ", "").replace("-", "") |
|
|
for lc, orig in names.items(): |
|
|
if ww in lc.replace(" ", "").replace("-", ""): |
|
|
return orig |
|
|
return None |
|
|
|
|
|
def load_pathway_info(csv_path_plus: str): |
|
|
df = pd.read_csv(csv_path_plus) |
|
|
keys = set(SEQUENCE_ALIASES.values()) |
|
|
rows = df[df["color"].astype(str).str.lower().isin(keys)].copy() |
|
|
|
|
|
seq_to_colors: Dict[str, List[str]] = {} |
|
|
seq_phrase: Dict[str, str] = {} |
|
|
|
|
|
|
|
|
cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")] |
|
|
for _, row in rows.iterrows(): |
|
|
key = str(row["color"]).strip().lower() |
|
|
color_list = str(row.get("r", "") or "") |
|
|
colors = [c.strip().lower() for c in re.split(r"[,\s]+", color_list) if c.strip()] |
|
|
seq_to_colors[key] = list(dict.fromkeys(colors)) |
|
|
|
|
|
vals = [] |
|
|
for c in cols_for_phrase: |
|
|
v = row.get(c) |
|
|
if pd.notna(v): |
|
|
s = str(v).strip() |
|
|
if s and s.lower() != "nan": |
|
|
vals.append(s) |
|
|
phrase = " ".join(" ".join(vals).split()) |
|
|
seq_phrase[key] = phrase |
|
|
|
|
|
return seq_to_colors, seq_phrase |
|
|
|
|
|
def _split_words(s: str) -> List[str]: |
|
|
if not isinstance(s, str): return [] |
|
|
parts = re.split(r"[,\;/\|\s]+", s.strip()) |
|
|
return [p for p in (w.strip().lower() for w in parts) if p] |
|
|
|
|
|
def load_color_lexicon(csv_path_color: str): |
|
|
df = pd.read_csv(csv_path_color) |
|
|
color_col = _find_col(df, ["color", "colour"]) |
|
|
m1_col = _find_col(df, ["matrice1", "matrice 1"]) |
|
|
m_col = _find_col(df, ["matrice"]) |
|
|
en_col = _find_col(df, ["english-words-code", "english words code", "english_words_code", "english"]) |
|
|
|
|
|
lex: Dict[str, Dict[str, List[str]]] = {} |
|
|
for _, row in df.iterrows(): |
|
|
cname = str(row.get(color_col, "")).strip().lower() |
|
|
if not cname: continue |
|
|
lex[cname] = { |
|
|
"matrice1": _split_words(str(row.get(m1_col, ""))), |
|
|
"matrice": _split_words(str(row.get(m_col, ""))), |
|
|
"english": _split_words(str(row.get(en_col, ""))), |
|
|
} |
|
|
return lex |
|
|
|
|
|
SEQ_TO_COLORS, SEQ_PHRASE = load_pathway_info(CSV_PATH_PLUS) |
|
|
COLOR_LEX = load_color_lexicon(CSV_PATH_COLOR) |
|
|
|
|
|
def sequence_to_image_path(seq_key: str) -> str | None: |
|
|
fname = SEQUENCE_IMAGE_FILES.get(seq_key) |
|
|
return fname if (fname and os.path.exists(fname)) else None |
|
|
|
|
|
|
|
|
def encode_text(t: str): |
|
|
return sbert_model.encode(t, convert_to_tensor=True) |
|
|
|
|
|
def classify_emotion(text: str) -> Tuple[str, float]: |
|
|
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True) |
|
|
with torch.no_grad(): |
|
|
logits = emotion_model(**inputs).logits |
|
|
probs = F.softmax(logits, dim=1).squeeze() |
|
|
labels = emotion_model.config.id2label |
|
|
idx = int(torch.argmax(probs).item()) |
|
|
return labels[idx], float(probs[idx].item()) |
|
|
|
|
|
def score_sentiment(text: str) -> float: |
|
|
out = bert_sentiment(text[:512])[0] |
|
|
label, score = out["label"], out["score"] |
|
|
scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score) |
|
|
return round(min(10, max(1, scaled)), 2) |
|
|
|
|
|
def score_accomplishment(text: str) -> float: |
|
|
doc = nlp(text); score = 5.0 |
|
|
key_phrases = {"finally","told","decided","quit","refused","stood","walked","walked away","returned","return"} |
|
|
for token in doc: |
|
|
if token.text.lower() in key_phrases: score += 1.5 |
|
|
if token.tag_ in {"VBD","VBN"}: score += 0.5 |
|
|
return round(min(10, max(1, score)), 2) |
|
|
|
|
|
def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]: |
|
|
v = encode_text(text) |
|
|
out: Dict[str, float] = {} |
|
|
for dom, desc in GNH_DOMAINS.items(): |
|
|
sim = float(util.cos_sim(v, encode_text(desc)).item()) |
|
|
sim = max(0.0, min(1.0, sim)) |
|
|
blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0) |
|
|
out[dom] = round(blended, 3) |
|
|
return dict(sorted(out.items(), key=lambda kv: -kv[1])) |
|
|
|
|
|
def indicators_plot(indicators: Dict[str, float]): |
|
|
labels = list(indicators.keys()); values = list(indicators.values()) |
|
|
colors = [GNH_COLORS.get(label, "#cccccc") for label in labels] |
|
|
fig = plt.figure(figsize=(8,5)) |
|
|
plt.barh(labels, values, color=colors) |
|
|
plt.gca().invert_yaxis() |
|
|
plt.title("GNH Indicator Similarity") |
|
|
plt.xlabel("Score") |
|
|
plt.tight_layout() |
|
|
return fig |
|
|
|
|
|
|
|
|
def join_all_words(color: str) -> List[str]: |
|
|
d = COLOR_LEX.get(color.lower(), {}) |
|
|
return list(dict.fromkeys(d.get("matrice1", []) + d.get("matrice", []) + d.get("english", []))) |
|
|
|
|
|
def nearest_gnh_domain_for_color(color: str) -> Tuple[str, float]: |
|
|
words = " ".join(join_all_words(color)) |
|
|
if not words: |
|
|
return "Mental Wellness", 0.0 |
|
|
v = encode_text(words) |
|
|
best, best_sim = None, -1.0 |
|
|
for dom, desc in GNH_DOMAINS.items(): |
|
|
sim = float(util.cos_sim(v, encode_text(desc)).item()) |
|
|
if sim > best_sim: |
|
|
best, best_sim = dom, sim |
|
|
return best or "Mental Wellness", best_sim |
|
|
|
|
|
def labels_for_mode(colors: List[str], mode: str) -> List[str]: |
|
|
if mode.lower().startswith("gnh"): |
|
|
return [nearest_gnh_domain_for_color(c)[0] for c in colors] |
|
|
return [c.capitalize() for c in colors] |
|
|
|
|
|
def placeholder_for(color: str, mode: str) -> str: |
|
|
""" |
|
|
Always show a meaningful placeholder driven by the chosen mode. |
|
|
""" |
|
|
color_lc = color.lower() |
|
|
if mode.lower().startswith("gnh"): |
|
|
dom, _ = nearest_gnh_domain_for_color(color_lc) |
|
|
return f"{dom}: {GNH_DOMAINS.get(dom, '')}" |
|
|
|
|
|
|
|
|
mode_key = { |
|
|
"matrice1": "matrice1", |
|
|
"matrice": "matrice", |
|
|
"english": "english", |
|
|
}.get(mode.lower(), "matrice") |
|
|
|
|
|
lex = COLOR_LEX.get(color_lc, {}) |
|
|
primary = lex.get(mode_key, []) |
|
|
|
|
|
|
|
|
if primary: |
|
|
return ", ".join(primary[:12]) |
|
|
|
|
|
|
|
|
fallback_order = [k for k in ("matrice1", "matrice", "english") if k != mode_key] |
|
|
for fb in fallback_order: |
|
|
words = lex.get(fb, []) |
|
|
if words: |
|
|
label = "Matrice1" if fb == "matrice1" else ("Matrice" if fb == "matrice" else "English") |
|
|
return f"(from {label}) " + ", ".join(words[:12]) |
|
|
|
|
|
|
|
|
dom, _ = nearest_gnh_domain_for_color(color_lc) |
|
|
return f"(mapped GNH) {dom}: {GNH_DOMAINS.get(dom, '')}" |
|
|
|
|
|
|
|
|
def simple_color_legend(colors: List[str]) -> str: |
|
|
if not colors: |
|
|
return "No prompts available for this pathway." |
|
|
parts = [] |
|
|
for c in colors: |
|
|
dot = f"<span style='display:inline-block;width:10px;height:10px;border-radius:50%;background:{c};margin-right:8px;border:1px solid #999;vertical-align:middle'></span>" |
|
|
parts.append(f"<div style='margin:4px 0'>{dot}<b>{c.capitalize()}</b></div>") |
|
|
return "<div>" + "".join(parts) + "</div>" |
|
|
|
|
|
def colors_for_sequence(seq_key: str) -> List[str]: |
|
|
return SEQ_TO_COLORS.get(seq_key, []) |
|
|
|
|
|
def update_prompt_ui(seq_choice: str, word_mode: str): |
|
|
key = SEQUENCE_ALIASES.get(seq_choice) |
|
|
colors = colors_for_sequence(key) |
|
|
labels = labels_for_mode(colors, word_mode) |
|
|
legend_html = simple_color_legend(colors) |
|
|
|
|
|
updates = [] |
|
|
for i in range(MAX_COLORS): |
|
|
if i < len(colors): |
|
|
lab = labels[i] if i < len(labels) else f"Input {i+1}" |
|
|
ph = placeholder_for(colors[i], word_mode) |
|
|
updates.append(gr.update(visible=True, label=f"{lab} meaning", placeholder=ph, value="")) |
|
|
else: |
|
|
updates.append(gr.update(visible=False, value="", label=f"Input {i+1}", placeholder="—")) |
|
|
return (legend_html, *updates) |
|
|
|
|
|
|
|
|
def render_phrase_template(base_phrase: str, colors: List[str], labels: List[str], inputs: List[str]) -> str: |
|
|
""" |
|
|
Replace occurrences of '<color>-pathway' (any spacing/hyphen variants) with the user's phrase for that color. |
|
|
If user left it empty, keep the label (color name or mapped GNH indicator). |
|
|
Finally, append a compact legend ' // Label: input'. |
|
|
""" |
|
|
text = base_phrase or "" |
|
|
|
|
|
rep: Dict[str, str] = {} |
|
|
for color, label, user in zip(colors, labels, inputs): |
|
|
use = user.strip() if isinstance(user, str) and user.strip() else label |
|
|
rep[color.lower()] = use |
|
|
|
|
|
|
|
|
for color, replacement in rep.items(): |
|
|
|
|
|
pattern = re.compile(rf"\b{re.escape(color)}\s*-\s*pathway\b", re.IGNORECASE) |
|
|
text = pattern.sub(replacement, text) |
|
|
|
|
|
|
|
|
|
|
|
suffix_parts = [] |
|
|
for color, label, user in zip(colors, labels, inputs): |
|
|
if isinstance(user, str) and user.strip(): |
|
|
suffix_parts.append(f"{label}: {user.strip()}") |
|
|
if suffix_parts: |
|
|
text = (text + " // " + " // ".join(suffix_parts)).strip() |
|
|
|
|
|
return text |
|
|
|
|
|
|
|
|
def analyze(text: str, seq_choice: str, word_mode: str, *color_inputs): |
|
|
key = SEQUENCE_ALIASES.get(seq_choice) |
|
|
if key not in SEQ_PHRASE: |
|
|
return (5.0, "neutral (0.0)", 5.0, "Choose a valid pathway.", "{}", None, None, f"{seq_choice} (unavailable)", |
|
|
*update_prompt_ui(seq_choice, word_mode)) |
|
|
|
|
|
colors = colors_for_sequence(key) |
|
|
labels = labels_for_mode(colors, word_mode) |
|
|
base_phrase = SEQ_PHRASE.get(key, "") |
|
|
|
|
|
|
|
|
user_inputs = list(color_inputs)[:len(colors)] |
|
|
updated_phrase = render_phrase_template(base_phrase, colors, labels, user_inputs) |
|
|
|
|
|
|
|
|
combined_text = " ".join([t for t in [text, updated_phrase] if t and t.strip()]) |
|
|
sentiment = score_sentiment(combined_text) |
|
|
emotion, emo_conf = classify_emotion(combined_text) |
|
|
accomplishment = score_accomplishment(combined_text) |
|
|
|
|
|
indicators = semantic_indicator_mapping(combined_text, sentiment_score=sentiment) |
|
|
fig = indicators_plot(indicators) |
|
|
top5 = list(indicators.items())[:5] |
|
|
top5_str = "\n".join(f"{k}: {v}" for k, v in top5) |
|
|
|
|
|
img_path = sequence_to_image_path(key) |
|
|
meta = f"{key} | colors: {', '.join(colors) if colors else '—'}" |
|
|
emo_str = f"{emotion} ({emo_conf:.3f})" |
|
|
|
|
|
|
|
|
prompt_updates = update_prompt_ui(seq_choice, word_mode) |
|
|
|
|
|
return ( |
|
|
sentiment, emo_str, accomplishment, |
|
|
updated_phrase, top5_str, fig, img_path, meta, |
|
|
*prompt_updates |
|
|
) |
|
|
|
|
|
|
|
|
SEQ_CHOICES = list(SEQUENCE_ALIASES.keys()) |
|
|
DEFAULT_SEQ = "Knot" if "Knot" in SEQ_CHOICES else SEQ_CHOICES[0] |
|
|
|
|
|
with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo: |
|
|
gr.Markdown("## RGB Root Matriz Color Plotter\n" |
|
|
"Type a phrase. Choose a **Sequence**. " |
|
|
"You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.") |
|
|
|
|
|
with gr.Row(): |
|
|
inp = gr.Textbox(lines=4, label="Your situation / obstacle", placeholder="Describe the situation...") |
|
|
|
|
|
with gr.Row(): |
|
|
seq = gr.Dropdown(choices=SEQ_CHOICES, value=DEFAULT_SEQ, label="Pathway") |
|
|
word_mode = gr.Radio(choices=WORD_MODES, value="Matrice", label="Word Mode") |
|
|
|
|
|
legend = gr.HTML() |
|
|
|
|
|
color_boxes: List[gr.Textbox] = [] |
|
|
for i in range(MAX_COLORS): |
|
|
color_boxes.append(gr.Textbox(visible=False, label=f"Input {i+1}", placeholder="—")) |
|
|
|
|
|
run = gr.Button("Generate Pathway Analysis", variant="primary") |
|
|
|
|
|
with gr.Row(): |
|
|
sent = gr.Number(label="Sentiment (1–10)") |
|
|
emo = gr.Text(label="Emotion") |
|
|
acc = gr.Number(label="Accomplishment (1–10)") |
|
|
|
|
|
with gr.Row(): |
|
|
phrase_out = gr.Text(label="Updated Pathway Phrase (template with your meanings)") |
|
|
gnh_top = gr.Text(label="Top GNH Indicators (Top 5)") |
|
|
|
|
|
gnh_plot = gr.Plot(label="GNH Similarity") |
|
|
img_out = gr.Image(label="Pathway image", type="filepath") |
|
|
meta_out = gr.Text(label="Chosen pathway / colors") |
|
|
|
|
|
def _update_ui(seq_choice, mode): |
|
|
return update_prompt_ui(seq_choice, mode) |
|
|
|
|
|
seq.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes]) |
|
|
word_mode.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes]) |
|
|
|
|
|
run.click( |
|
|
fn=analyze, |
|
|
inputs=[inp, seq, word_mode, *color_boxes], |
|
|
outputs=[sent, emo, acc, phrase_out, gnh_top, gnh_plot, img_out, meta_out, legend, *color_boxes], |
|
|
) |
|
|
|
|
|
demo.load(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|