import os, re from typing import Dict, Tuple, List import nltk, spacy, torch, pandas as pd, matplotlib.pyplot as plt import torch.nn.functional as F import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification from sentence_transformers import SentenceTransformer, util # -------------------- setup -------------------- def ensure_spacy(): try: return spacy.load("en_core_web_sm") except Exception: import spacy.cli spacy.cli.download("en_core_web_sm") return spacy.load("en_core_web_sm") def ensure_nltk(): try: nltk.data.find("tokenizers/punkt") except LookupError: nltk.download("punkt") ensure_nltk() nlp = ensure_spacy() sbert_model = SentenceTransformer("all-MiniLM-L6-v2") bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") emotion_model_name = "j-hartmann/emotion-english-distilroberta-base" emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name) emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name) # -------------------- constants -------------------- CSV_PATH_PLUS = "la matrice plus.csv" # pathways + colors + template words CSV_PATH_COLOR = "la matrice.csv" # color lexicon SEQUENCE_ALIASES = { "Direct": "direct", "Feminine": "feminine", "Knot": "knot", "Masculine": "masc", "Pain": "pain", "Prayer": "prayer", "Precise": "precise", "Practical": "practical", "Plot": "plot", "Spiritual": "spiritual", "Sad": "sad", } SEQUENCE_IMAGE_FILES = { "direct": "direct pathway.png", "feminine": "fem pathway.png", "knot": "knot pathway.png", "masc": "masc pathway.png", "pain": "pain pathway.png", "prayer": "prayer pathway.png", "precise": "precise pathway.png", "practical": "practical pathway.png", "plot": "plot pathway.png", "spiritual": "spiritual pathway.png", "sad": "sad pathway.png" } GNH_DOMAINS: Dict[str, str] = { "Mental Wellness": "mental health, emotional clarity, peace of mind", "Social Wellness": "relationships, community, friendship, social harmony", "Economic Wellness": "income, savings, financial stability, cost of living", "Workplace Wellness": "career, work-life balance, promotion, productivity", "Physical Wellness": "physical health, sleep, fitness, exercise", "Environmental Wellness": "green space, nature, environmental care", "Health": "healthcare, medical care, recovery, well-being", "Education Value": "learning, education, school, knowledge, wisdom", "Good Governance": "freedom, justice, fairness, democratic participation", "Living Standards": "housing, wealth, basic needs, affordability", "Cultural Diversity": "tradition, language, cultural expression, heritage", "Political Wellness": "rights, law, free speech, civic participation", "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife", } GNH_COLORS: Dict[str, str] = { "Economic Wellness": "#808080", "Mental Wellness": "#FA005A", "Workplace Wellness": "#ffd700", "Physical Wellness": "#FAB478", "Social Wellness": "#ffa500", "Political Wellness": "#ffffff", "Environmental Wellness": "#0000FF", "Ecological Diversity": "#00FF00", "Health": "#FF0000", "Good Governance": "#000000", "Education Value": "#8b4513", "Living Standards": "#ffff00", "Cultural Diversity": "#B432FF", } WORD_MODES = ["Matrice1", "Matrice", "English", "GNH Indicators"] MAX_COLORS = 8 # -------------------- loaders -------------------- def _find_col(df: pd.DataFrame, candidates: List[str]) -> str | None: names = {c.lower(): c for c in df.columns} for c in candidates: if c.lower() in names: return names[c.lower()] for want in candidates: ww = want.replace(" ", "").replace("-", "") for lc, orig in names.items(): if ww in lc.replace(" ", "").replace("-", ""): return orig return None def load_pathway_info(csv_path_plus: str): df = pd.read_csv(csv_path_plus) keys = set(SEQUENCE_ALIASES.values()) rows = df[df["color"].astype(str).str.lower().isin(keys)].copy() seq_to_colors: Dict[str, List[str]] = {} seq_phrase: Dict[str, str] = {} # colors live in 'r' (list), template = concat of the other fields cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")] for _, row in rows.iterrows(): key = str(row["color"]).strip().lower() color_list = str(row.get("r", "") or "") colors = [c.strip().lower() for c in re.split(r"[,\s]+", color_list) if c.strip()] seq_to_colors[key] = list(dict.fromkeys(colors)) vals = [] for c in cols_for_phrase: v = row.get(c) if pd.notna(v): s = str(v).strip() if s and s.lower() != "nan": vals.append(s) phrase = " ".join(" ".join(vals).split()) # base template seq_phrase[key] = phrase return seq_to_colors, seq_phrase def _split_words(s: str) -> List[str]: if not isinstance(s, str): return [] parts = re.split(r"[,\;/\|\s]+", s.strip()) return [p for p in (w.strip().lower() for w in parts) if p] def load_color_lexicon(csv_path_color: str): df = pd.read_csv(csv_path_color) color_col = _find_col(df, ["color", "colour"]) m1_col = _find_col(df, ["matrice1", "matrice 1"]) m_col = _find_col(df, ["matrice"]) en_col = _find_col(df, ["english-words-code", "english words code", "english_words_code", "english"]) lex: Dict[str, Dict[str, List[str]]] = {} for _, row in df.iterrows(): cname = str(row.get(color_col, "")).strip().lower() if not cname: continue lex[cname] = { "matrice1": _split_words(str(row.get(m1_col, ""))), "matrice": _split_words(str(row.get(m_col, ""))), "english": _split_words(str(row.get(en_col, ""))), } return lex SEQ_TO_COLORS, SEQ_PHRASE = load_pathway_info(CSV_PATH_PLUS) COLOR_LEX = load_color_lexicon(CSV_PATH_COLOR) def sequence_to_image_path(seq_key: str) -> str | None: fname = SEQUENCE_IMAGE_FILES.get(seq_key) return fname if (fname and os.path.exists(fname)) else None # -------------------- NLP helpers -------------------- def encode_text(t: str): return sbert_model.encode(t, convert_to_tensor=True) def classify_emotion(text: str) -> Tuple[str, float]: inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True) with torch.no_grad(): logits = emotion_model(**inputs).logits probs = F.softmax(logits, dim=1).squeeze() labels = emotion_model.config.id2label idx = int(torch.argmax(probs).item()) return labels[idx], float(probs[idx].item()) def score_sentiment(text: str) -> float: out = bert_sentiment(text[:512])[0] label, score = out["label"], out["score"] scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score) return round(min(10, max(1, scaled)), 2) def score_accomplishment(text: str) -> float: doc = nlp(text); score = 5.0 key_phrases = {"finally","told","decided","quit","refused","stood","walked","walked away","returned","return"} for token in doc: if token.text.lower() in key_phrases: score += 1.5 if token.tag_ in {"VBD","VBN"}: score += 0.5 return round(min(10, max(1, score)), 2) def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]: v = encode_text(text) out: Dict[str, float] = {} for dom, desc in GNH_DOMAINS.items(): sim = float(util.cos_sim(v, encode_text(desc)).item()) sim = max(0.0, min(1.0, sim)) blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0) out[dom] = round(blended, 3) return dict(sorted(out.items(), key=lambda kv: -kv[1])) def indicators_plot(indicators: Dict[str, float]): labels = list(indicators.keys()); values = list(indicators.values()) colors = [GNH_COLORS.get(label, "#cccccc") for label in labels] fig = plt.figure(figsize=(8,5)) plt.barh(labels, values, color=colors) plt.gca().invert_yaxis() plt.title("GNH Indicator Similarity") plt.xlabel("Score") plt.tight_layout() return fig # -------------------- prompt building (legible placeholders) -------------------- def join_all_words(color: str) -> List[str]: d = COLOR_LEX.get(color.lower(), {}) return list(dict.fromkeys(d.get("matrice1", []) + d.get("matrice", []) + d.get("english", []))) def nearest_gnh_domain_for_color(color: str) -> Tuple[str, float]: words = " ".join(join_all_words(color)) if not words: return "Mental Wellness", 0.0 v = encode_text(words) best, best_sim = None, -1.0 for dom, desc in GNH_DOMAINS.items(): sim = float(util.cos_sim(v, encode_text(desc)).item()) if sim > best_sim: best, best_sim = dom, sim return best or "Mental Wellness", best_sim def labels_for_mode(colors: List[str], mode: str) -> List[str]: if mode.lower().startswith("gnh"): return [nearest_gnh_domain_for_color(c)[0] for c in colors] return [c.capitalize() for c in colors] def placeholder_for(color: str, mode: str) -> str: """ Always show a meaningful placeholder driven by the chosen mode. """ color_lc = color.lower() if mode.lower().startswith("gnh"): dom, _ = nearest_gnh_domain_for_color(color_lc) return f"{dom}: {GNH_DOMAINS.get(dom, '')}" # map mode -> CSV column key mode_key = { "matrice1": "matrice1", "matrice": "matrice", "english": "english", }.get(mode.lower(), "matrice") lex = COLOR_LEX.get(color_lc, {}) primary = lex.get(mode_key, []) # If the chosen column has entries, use them. if primary: return ", ".join(primary[:12]) # Otherwise, try the other two lexicon columns (ordered). fallback_order = [k for k in ("matrice1", "matrice", "english") if k != mode_key] for fb in fallback_order: words = lex.get(fb, []) if words: label = "Matrice1" if fb == "matrice1" else ("Matrice" if fb == "matrice" else "English") return f"(from {label}) " + ", ".join(words[:12]) # Final fallback: mapped GNH domain description (still a “meaning”, just not from lexicon). dom, _ = nearest_gnh_domain_for_color(color_lc) return f"(mapped GNH) {dom}: {GNH_DOMAINS.get(dom, '')}" def simple_color_legend(colors: List[str]) -> str: if not colors: return "No prompts available for this pathway." parts = [] for c in colors: dot = f"" parts.append(f"
{dot}{c.capitalize()}
") return "
" + "".join(parts) + "
" def colors_for_sequence(seq_key: str) -> List[str]: return SEQ_TO_COLORS.get(seq_key, []) def update_prompt_ui(seq_choice: str, word_mode: str): key = SEQUENCE_ALIASES.get(seq_choice) colors = colors_for_sequence(key) labels = labels_for_mode(colors, word_mode) legend_html = simple_color_legend(colors) updates = [] for i in range(MAX_COLORS): if i < len(colors): lab = labels[i] if i < len(labels) else f"Input {i+1}" ph = placeholder_for(colors[i], word_mode) updates.append(gr.update(visible=True, label=f"{lab} meaning", placeholder=ph, value="")) else: updates.append(gr.update(visible=False, value="", label=f"Input {i+1}", placeholder="—")) return (legend_html, *updates) # -------------------- template replacement -------------------- def render_phrase_template(base_phrase: str, colors: List[str], labels: List[str], inputs: List[str]) -> str: """ Replace occurrences of '-pathway' (any spacing/hyphen variants) with the user's phrase for that color. If user left it empty, keep the label (color name or mapped GNH indicator). Finally, append a compact legend ' // Label: input'. """ text = base_phrase or "" # build replacement map color -> replacement text rep: Dict[str, str] = {} for color, label, user in zip(colors, labels, inputs): use = user.strip() if isinstance(user, str) and user.strip() else label rep[color.lower()] = use # replace each token case-insensitively for color, replacement in rep.items(): # match 'brown-pathway', 'brown pathway', 'Brown- Pathway', etc. pattern = re.compile(rf"\b{re.escape(color)}\s*-\s*pathway\b", re.IGNORECASE) text = pattern.sub(replacement, text) # if the template had no tokens, fall back to readable construction: # "use A to B the C of D as a new E" is preserved, but we still append meanings suffix_parts = [] for color, label, user in zip(colors, labels, inputs): if isinstance(user, str) and user.strip(): suffix_parts.append(f"{label}: {user.strip()}") if suffix_parts: text = (text + " // " + " // ".join(suffix_parts)).strip() return text # -------------------- main analysis -------------------- def analyze(text: str, seq_choice: str, word_mode: str, *color_inputs): key = SEQUENCE_ALIASES.get(seq_choice) if key not in SEQ_PHRASE: return (5.0, "neutral (0.0)", 5.0, "Choose a valid pathway.", "{}", None, None, f"{seq_choice} (unavailable)", *update_prompt_ui(seq_choice, word_mode)) colors = colors_for_sequence(key) labels = labels_for_mode(colors, word_mode) base_phrase = SEQ_PHRASE.get(key, "") # updated phrase with template replacement user_inputs = list(color_inputs)[:len(colors)] updated_phrase = render_phrase_template(base_phrase, colors, labels, user_inputs) # analysis on original + updated combined_text = " ".join([t for t in [text, updated_phrase] if t and t.strip()]) sentiment = score_sentiment(combined_text) emotion, emo_conf = classify_emotion(combined_text) accomplishment = score_accomplishment(combined_text) indicators = semantic_indicator_mapping(combined_text, sentiment_score=sentiment) fig = indicators_plot(indicators) top5 = list(indicators.items())[:5] top5_str = "\n".join(f"{k}: {v}" for k, v in top5) img_path = sequence_to_image_path(key) meta = f"{key} | colors: {', '.join(colors) if colors else '—'}" emo_str = f"{emotion} ({emo_conf:.3f})" # keep prompt area synced prompt_updates = update_prompt_ui(seq_choice, word_mode) return ( sentiment, emo_str, accomplishment, updated_phrase, top5_str, fig, img_path, meta, *prompt_updates ) # -------------------- UI -------------------- SEQ_CHOICES = list(SEQUENCE_ALIASES.keys()) DEFAULT_SEQ = "Knot" if "Knot" in SEQ_CHOICES else SEQ_CHOICES[0] with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo: gr.Markdown("## RGB Root Matriz Color Plotter\n" "Type a phrase. Choose a **Sequence**. " "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.") with gr.Row(): inp = gr.Textbox(lines=4, label="Your situation / obstacle", placeholder="Describe the situation...") with gr.Row(): seq = gr.Dropdown(choices=SEQ_CHOICES, value=DEFAULT_SEQ, label="Pathway") word_mode = gr.Radio(choices=WORD_MODES, value="Matrice", label="Word Mode") legend = gr.HTML() color_boxes: List[gr.Textbox] = [] for i in range(MAX_COLORS): color_boxes.append(gr.Textbox(visible=False, label=f"Input {i+1}", placeholder="—")) run = gr.Button("Generate Pathway Analysis", variant="primary") with gr.Row(): sent = gr.Number(label="Sentiment (1–10)") emo = gr.Text(label="Emotion") acc = gr.Number(label="Accomplishment (1–10)") with gr.Row(): phrase_out = gr.Text(label="Updated Pathway Phrase (template with your meanings)") gnh_top = gr.Text(label="Top GNH Indicators (Top 5)") gnh_plot = gr.Plot(label="GNH Similarity") img_out = gr.Image(label="Pathway image", type="filepath") meta_out = gr.Text(label="Chosen pathway / colors") def _update_ui(seq_choice, mode): return update_prompt_ui(seq_choice, mode) seq.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes]) word_mode.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes]) run.click( fn=analyze, inputs=[inp, seq, word_mode, *color_boxes], outputs=[sent, emo, acc, phrase_out, gnh_top, gnh_plot, img_out, meta_out, legend, *color_boxes], ) demo.load(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes]) if __name__ == "__main__": demo.launch()