File size: 17,112 Bytes
27aafc2 617928b 27aafc2 617928b dd6720a 2de35e1 dd6720a 2dd7883 dd6720a 2de35e1 2dd7883 dd6720a 2dd7883 64742da 27aafc2 2dd7883 27aafc2 882bed0 33221b5 27aafc2 882bed0 27aafc2 dd6720a 617928b dd6720a 2d09bac dd6720a 2d09bac dd6720a 2d09bac dd6720a 2d09bac dd6720a 2dd7883 27aafc2 64742da 27aafc2 617928b 2de35e1 27aafc2 2de35e1 2dd7883 2de35e1 617928b 2dd7883 2de35e1 2dd7883 27aafc2 2dd7883 27aafc2 617928b 2de35e1 617928b 2dd7883 64742da dd6720a 2de35e1 dd6720a 617928b dd6720a 64742da 27aafc2 dd6720a 27aafc2 617928b dd6720a 64742da dd6720a 64742da dd6720a 64742da dd6720a 27aafc2 dd6720a 27aafc2 dd6720a 64742da dd6720a 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 92b24cb 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 64742da 2dd7883 2de35e1 dd6720a 64742da 2dd7883 27aafc2 2dd7883 2de35e1 dd6720a 27aafc2 64742da 2dd7883 dd6720a 2dd7883 64742da 2dd7883 64742da 617928b 2de35e1 445f0ca 617928b dd6720a 64742da 2de35e1 64742da 2dd7883 2de35e1 2dd7883 2de35e1 2dd7883 64742da 2dd7883 617928b 27aafc2 617928b dd6720a 27aafc2 617928b 2dd7883 27aafc2 617928b 64742da 27aafc2 64742da 617928b 64742da 2dd7883 dd6720a 27aafc2 2de35e1 2dd7883 617928b dd6720a 2dd7883 92b24cb dd6720a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 |
import os, re
from typing import Dict, Tuple, List
import nltk, spacy, torch, pandas as pd, matplotlib.pyplot as plt
import torch.nn.functional as F
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util
# -------------------- setup --------------------
def ensure_spacy():
try:
return spacy.load("en_core_web_sm")
except Exception:
import spacy.cli
spacy.cli.download("en_core_web_sm")
return spacy.load("en_core_web_sm")
def ensure_nltk():
try:
nltk.data.find("tokenizers/punkt")
except LookupError:
nltk.download("punkt")
ensure_nltk()
nlp = ensure_spacy()
sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
# -------------------- constants --------------------
CSV_PATH_PLUS = "la matrice plus.csv" # pathways + colors + template words
CSV_PATH_COLOR = "la matrice.csv" # color lexicon
SEQUENCE_ALIASES = {
"Direct": "direct",
"Feminine": "feminine",
"Knot": "knot",
"Masculine": "masc",
"Pain": "pain",
"Prayer": "prayer",
"Precise": "precise",
"Practical": "practical",
"Plot": "plot",
"Spiritual": "spiritual",
"Sad": "sad",
}
SEQUENCE_IMAGE_FILES = {
"direct": "direct pathway.png",
"feminine": "fem pathway.png",
"knot": "knot pathway.png",
"masc": "masc pathway.png",
"pain": "pain pathway.png",
"prayer": "prayer pathway.png",
"precise": "precise pathway.png",
"practical": "practical pathway.png",
"plot": "plot pathway.png",
"spiritual": "spiritual pathway.png",
"sad": "sad pathway.png"
}
GNH_DOMAINS: Dict[str, str] = {
"Mental Wellness": "mental health, emotional clarity, peace of mind",
"Social Wellness": "relationships, community, friendship, social harmony",
"Economic Wellness": "income, savings, financial stability, cost of living",
"Workplace Wellness": "career, work-life balance, promotion, productivity",
"Physical Wellness": "physical health, sleep, fitness, exercise",
"Environmental Wellness": "green space, nature, environmental care",
"Health": "healthcare, medical care, recovery, well-being",
"Education Value": "learning, education, school, knowledge, wisdom",
"Good Governance": "freedom, justice, fairness, democratic participation",
"Living Standards": "housing, wealth, basic needs, affordability",
"Cultural Diversity": "tradition, language, cultural expression, heritage",
"Political Wellness": "rights, law, free speech, civic participation",
"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
}
GNH_COLORS: Dict[str, str] = {
"Economic Wellness": "#808080",
"Mental Wellness": "#FA005A",
"Workplace Wellness": "#ffd700",
"Physical Wellness": "#FAB478",
"Social Wellness": "#ffa500",
"Political Wellness": "#ffffff",
"Environmental Wellness": "#0000FF",
"Ecological Diversity": "#00FF00",
"Health": "#FF0000",
"Good Governance": "#000000",
"Education Value": "#8b4513",
"Living Standards": "#ffff00",
"Cultural Diversity": "#B432FF",
}
WORD_MODES = ["Matrice1", "Matrice", "English", "GNH Indicators"]
MAX_COLORS = 8
# -------------------- loaders --------------------
def _find_col(df: pd.DataFrame, candidates: List[str]) -> str | None:
names = {c.lower(): c for c in df.columns}
for c in candidates:
if c.lower() in names: return names[c.lower()]
for want in candidates:
ww = want.replace(" ", "").replace("-", "")
for lc, orig in names.items():
if ww in lc.replace(" ", "").replace("-", ""):
return orig
return None
def load_pathway_info(csv_path_plus: str):
df = pd.read_csv(csv_path_plus)
keys = set(SEQUENCE_ALIASES.values())
rows = df[df["color"].astype(str).str.lower().isin(keys)].copy()
seq_to_colors: Dict[str, List[str]] = {}
seq_phrase: Dict[str, str] = {}
# colors live in 'r' (list), template = concat of the other fields
cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")]
for _, row in rows.iterrows():
key = str(row["color"]).strip().lower()
color_list = str(row.get("r", "") or "")
colors = [c.strip().lower() for c in re.split(r"[,\s]+", color_list) if c.strip()]
seq_to_colors[key] = list(dict.fromkeys(colors))
vals = []
for c in cols_for_phrase:
v = row.get(c)
if pd.notna(v):
s = str(v).strip()
if s and s.lower() != "nan":
vals.append(s)
phrase = " ".join(" ".join(vals).split()) # base template
seq_phrase[key] = phrase
return seq_to_colors, seq_phrase
def _split_words(s: str) -> List[str]:
if not isinstance(s, str): return []
parts = re.split(r"[,\;/\|\s]+", s.strip())
return [p for p in (w.strip().lower() for w in parts) if p]
def load_color_lexicon(csv_path_color: str):
df = pd.read_csv(csv_path_color)
color_col = _find_col(df, ["color", "colour"])
m1_col = _find_col(df, ["matrice1", "matrice 1"])
m_col = _find_col(df, ["matrice"])
en_col = _find_col(df, ["english-words-code", "english words code", "english_words_code", "english"])
lex: Dict[str, Dict[str, List[str]]] = {}
for _, row in df.iterrows():
cname = str(row.get(color_col, "")).strip().lower()
if not cname: continue
lex[cname] = {
"matrice1": _split_words(str(row.get(m1_col, ""))),
"matrice": _split_words(str(row.get(m_col, ""))),
"english": _split_words(str(row.get(en_col, ""))),
}
return lex
SEQ_TO_COLORS, SEQ_PHRASE = load_pathway_info(CSV_PATH_PLUS)
COLOR_LEX = load_color_lexicon(CSV_PATH_COLOR)
def sequence_to_image_path(seq_key: str) -> str | None:
fname = SEQUENCE_IMAGE_FILES.get(seq_key)
return fname if (fname and os.path.exists(fname)) else None
# -------------------- NLP helpers --------------------
def encode_text(t: str):
return sbert_model.encode(t, convert_to_tensor=True)
def classify_emotion(text: str) -> Tuple[str, float]:
inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
with torch.no_grad():
logits = emotion_model(**inputs).logits
probs = F.softmax(logits, dim=1).squeeze()
labels = emotion_model.config.id2label
idx = int(torch.argmax(probs).item())
return labels[idx], float(probs[idx].item())
def score_sentiment(text: str) -> float:
out = bert_sentiment(text[:512])[0]
label, score = out["label"], out["score"]
scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
return round(min(10, max(1, scaled)), 2)
def score_accomplishment(text: str) -> float:
doc = nlp(text); score = 5.0
key_phrases = {"finally","told","decided","quit","refused","stood","walked","walked away","returned","return"}
for token in doc:
if token.text.lower() in key_phrases: score += 1.5
if token.tag_ in {"VBD","VBN"}: score += 0.5
return round(min(10, max(1, score)), 2)
def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
v = encode_text(text)
out: Dict[str, float] = {}
for dom, desc in GNH_DOMAINS.items():
sim = float(util.cos_sim(v, encode_text(desc)).item())
sim = max(0.0, min(1.0, sim))
blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
out[dom] = round(blended, 3)
return dict(sorted(out.items(), key=lambda kv: -kv[1]))
def indicators_plot(indicators: Dict[str, float]):
labels = list(indicators.keys()); values = list(indicators.values())
colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
fig = plt.figure(figsize=(8,5))
plt.barh(labels, values, color=colors)
plt.gca().invert_yaxis()
plt.title("GNH Indicator Similarity")
plt.xlabel("Score")
plt.tight_layout()
return fig
# -------------------- prompt building (legible placeholders) --------------------
def join_all_words(color: str) -> List[str]:
d = COLOR_LEX.get(color.lower(), {})
return list(dict.fromkeys(d.get("matrice1", []) + d.get("matrice", []) + d.get("english", [])))
def nearest_gnh_domain_for_color(color: str) -> Tuple[str, float]:
words = " ".join(join_all_words(color))
if not words:
return "Mental Wellness", 0.0
v = encode_text(words)
best, best_sim = None, -1.0
for dom, desc in GNH_DOMAINS.items():
sim = float(util.cos_sim(v, encode_text(desc)).item())
if sim > best_sim:
best, best_sim = dom, sim
return best or "Mental Wellness", best_sim
def labels_for_mode(colors: List[str], mode: str) -> List[str]:
if mode.lower().startswith("gnh"):
return [nearest_gnh_domain_for_color(c)[0] for c in colors]
return [c.capitalize() for c in colors]
def placeholder_for(color: str, mode: str) -> str:
"""
Always show a meaningful placeholder driven by the chosen mode.
"""
color_lc = color.lower()
if mode.lower().startswith("gnh"):
dom, _ = nearest_gnh_domain_for_color(color_lc)
return f"{dom}: {GNH_DOMAINS.get(dom, '')}"
# map mode -> CSV column key
mode_key = {
"matrice1": "matrice1",
"matrice": "matrice",
"english": "english",
}.get(mode.lower(), "matrice")
lex = COLOR_LEX.get(color_lc, {})
primary = lex.get(mode_key, [])
# If the chosen column has entries, use them.
if primary:
return ", ".join(primary[:12])
# Otherwise, try the other two lexicon columns (ordered).
fallback_order = [k for k in ("matrice1", "matrice", "english") if k != mode_key]
for fb in fallback_order:
words = lex.get(fb, [])
if words:
label = "Matrice1" if fb == "matrice1" else ("Matrice" if fb == "matrice" else "English")
return f"(from {label}) " + ", ".join(words[:12])
# Final fallback: mapped GNH domain description (still a “meaning”, just not from lexicon).
dom, _ = nearest_gnh_domain_for_color(color_lc)
return f"(mapped GNH) {dom}: {GNH_DOMAINS.get(dom, '')}"
def simple_color_legend(colors: List[str]) -> str:
if not colors:
return "No prompts available for this pathway."
parts = []
for c in colors:
dot = f"<span style='display:inline-block;width:10px;height:10px;border-radius:50%;background:{c};margin-right:8px;border:1px solid #999;vertical-align:middle'></span>"
parts.append(f"<div style='margin:4px 0'>{dot}<b>{c.capitalize()}</b></div>")
return "<div>" + "".join(parts) + "</div>"
def colors_for_sequence(seq_key: str) -> List[str]:
return SEQ_TO_COLORS.get(seq_key, [])
def update_prompt_ui(seq_choice: str, word_mode: str):
key = SEQUENCE_ALIASES.get(seq_choice)
colors = colors_for_sequence(key)
labels = labels_for_mode(colors, word_mode)
legend_html = simple_color_legend(colors)
updates = []
for i in range(MAX_COLORS):
if i < len(colors):
lab = labels[i] if i < len(labels) else f"Input {i+1}"
ph = placeholder_for(colors[i], word_mode)
updates.append(gr.update(visible=True, label=f"{lab} meaning", placeholder=ph, value=""))
else:
updates.append(gr.update(visible=False, value="", label=f"Input {i+1}", placeholder="—"))
return (legend_html, *updates)
# -------------------- template replacement --------------------
def render_phrase_template(base_phrase: str, colors: List[str], labels: List[str], inputs: List[str]) -> str:
"""
Replace occurrences of '<color>-pathway' (any spacing/hyphen variants) with the user's phrase for that color.
If user left it empty, keep the label (color name or mapped GNH indicator).
Finally, append a compact legend ' // Label: input'.
"""
text = base_phrase or ""
# build replacement map color -> replacement text
rep: Dict[str, str] = {}
for color, label, user in zip(colors, labels, inputs):
use = user.strip() if isinstance(user, str) and user.strip() else label
rep[color.lower()] = use
# replace each token case-insensitively
for color, replacement in rep.items():
# match 'brown-pathway', 'brown pathway', 'Brown- Pathway', etc.
pattern = re.compile(rf"\b{re.escape(color)}\s*-\s*pathway\b", re.IGNORECASE)
text = pattern.sub(replacement, text)
# if the template had no tokens, fall back to readable construction:
# "use A to B the C of D as a new E" is preserved, but we still append meanings
suffix_parts = []
for color, label, user in zip(colors, labels, inputs):
if isinstance(user, str) and user.strip():
suffix_parts.append(f"{label}: {user.strip()}")
if suffix_parts:
text = (text + " // " + " // ".join(suffix_parts)).strip()
return text
# -------------------- main analysis --------------------
def analyze(text: str, seq_choice: str, word_mode: str, *color_inputs):
key = SEQUENCE_ALIASES.get(seq_choice)
if key not in SEQ_PHRASE:
return (5.0, "neutral (0.0)", 5.0, "Choose a valid pathway.", "{}", None, None, f"{seq_choice} (unavailable)",
*update_prompt_ui(seq_choice, word_mode))
colors = colors_for_sequence(key)
labels = labels_for_mode(colors, word_mode)
base_phrase = SEQ_PHRASE.get(key, "")
# updated phrase with template replacement
user_inputs = list(color_inputs)[:len(colors)]
updated_phrase = render_phrase_template(base_phrase, colors, labels, user_inputs)
# analysis on original + updated
combined_text = " ".join([t for t in [text, updated_phrase] if t and t.strip()])
sentiment = score_sentiment(combined_text)
emotion, emo_conf = classify_emotion(combined_text)
accomplishment = score_accomplishment(combined_text)
indicators = semantic_indicator_mapping(combined_text, sentiment_score=sentiment)
fig = indicators_plot(indicators)
top5 = list(indicators.items())[:5]
top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
img_path = sequence_to_image_path(key)
meta = f"{key} | colors: {', '.join(colors) if colors else '—'}"
emo_str = f"{emotion} ({emo_conf:.3f})"
# keep prompt area synced
prompt_updates = update_prompt_ui(seq_choice, word_mode)
return (
sentiment, emo_str, accomplishment,
updated_phrase, top5_str, fig, img_path, meta,
*prompt_updates
)
# -------------------- UI --------------------
SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
DEFAULT_SEQ = "Knot" if "Knot" in SEQ_CHOICES else SEQ_CHOICES[0]
with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
gr.Markdown("## RGB Root Matriz Color Plotter\n"
"Type a phrase. Choose a **Sequence**. "
"You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")
with gr.Row():
inp = gr.Textbox(lines=4, label="Your situation / obstacle", placeholder="Describe the situation...")
with gr.Row():
seq = gr.Dropdown(choices=SEQ_CHOICES, value=DEFAULT_SEQ, label="Pathway")
word_mode = gr.Radio(choices=WORD_MODES, value="Matrice", label="Word Mode")
legend = gr.HTML()
color_boxes: List[gr.Textbox] = []
for i in range(MAX_COLORS):
color_boxes.append(gr.Textbox(visible=False, label=f"Input {i+1}", placeholder="—"))
run = gr.Button("Generate Pathway Analysis", variant="primary")
with gr.Row():
sent = gr.Number(label="Sentiment (1–10)")
emo = gr.Text(label="Emotion")
acc = gr.Number(label="Accomplishment (1–10)")
with gr.Row():
phrase_out = gr.Text(label="Updated Pathway Phrase (template with your meanings)")
gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")
gnh_plot = gr.Plot(label="GNH Similarity")
img_out = gr.Image(label="Pathway image", type="filepath")
meta_out = gr.Text(label="Chosen pathway / colors")
def _update_ui(seq_choice, mode):
return update_prompt_ui(seq_choice, mode)
seq.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])
word_mode.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])
run.click(
fn=analyze,
inputs=[inp, seq, word_mode, *color_boxes],
outputs=[sent, emo, acc, phrase_out, gnh_top, gnh_plot, img_out, meta_out, legend, *color_boxes],
)
demo.load(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])
if __name__ == "__main__":
demo.launch()
|