Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Running

File size: 17,112 Bytes

27aafc2
617928b
 
27aafc2
617928b
dd6720a
2de35e1
dd6720a
 
2dd7883
dd6720a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2de35e1
2dd7883
dd6720a
 
 
 
2dd7883
 
64742da
27aafc2
 
 
2dd7883
27aafc2
882bed0
33221b5
27aafc2
 
 
 
 
 
 
 
 
 
 
 
882bed0
27aafc2
 
 
 
 
 
 
 
 
dd6720a
 
 
 
 
 
 
 
 
 
 
 
 
617928b
dd6720a
 
 
 
2d09bac
dd6720a
2d09bac
dd6720a
 
2d09bac
 
 
dd6720a
 
 
2d09bac
dd6720a
 
2dd7883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27aafc2
 
64742da
27aafc2
617928b
2de35e1
27aafc2
2de35e1
2dd7883
2de35e1
617928b
 
2dd7883
 
 
2de35e1
 
 
 
 
2dd7883
 
 
 
27aafc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2dd7883
27aafc2
617928b
 
 
2de35e1
617928b
2dd7883
64742da
 
 
dd6720a
 
 
 
 
 
2de35e1
 
dd6720a
 
 
 
617928b
 
dd6720a
 
64742da
27aafc2
dd6720a
27aafc2
 
617928b
dd6720a
64742da
 
dd6720a
64742da
 
dd6720a
 
64742da
dd6720a
 
 
27aafc2
dd6720a
27aafc2
dd6720a
 
64742da
dd6720a
 
 
 
2dd7883
 
64742da
2dd7883
64742da
 
2dd7883
 
64742da
2dd7883
64742da
 
 
 
 
 
 
 
 
2dd7883
64742da
 
2dd7883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64742da
 
 
 
 
2dd7883
64742da
2dd7883
64742da
 
92b24cb
2dd7883
 
64742da
2dd7883
 
64742da
2dd7883
 
64742da
2dd7883
 
 
64742da
2dd7883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64742da
 
2dd7883
 
64742da
 
 
 
2dd7883
 
 
 
 
 
 
 
 
 
 
 
2de35e1
dd6720a
 
 
64742da
2dd7883
 
27aafc2
2dd7883
 
2de35e1
dd6720a
27aafc2
64742da
2dd7883
dd6720a
 
2dd7883
64742da
2dd7883
64742da
617928b
2de35e1
445f0ca
617928b
 
dd6720a
64742da
2de35e1
 
64742da
2dd7883
2de35e1
2dd7883
2de35e1
2dd7883
64742da
2dd7883
617928b
27aafc2
617928b
dd6720a
 
27aafc2
 
617928b
 
2dd7883
27aafc2
617928b
64742da
27aafc2
64742da
617928b
64742da
 
 
2dd7883
 
dd6720a
27aafc2
2de35e1
2dd7883
 
617928b
dd6720a
2dd7883
92b24cb
dd6720a

import os, re
from typing import Dict, Tuple, List

import nltk, spacy, torch, pandas as pd, matplotlib.pyplot as plt
import torch.nn.functional as F
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util

# -------------------- setup --------------------
def ensure_spacy():
    try:
        return spacy.load("en_core_web_sm")
    except Exception:
        import spacy.cli
        spacy.cli.download("en_core_web_sm")
        return spacy.load("en_core_web_sm")

def ensure_nltk():
    try:
        nltk.data.find("tokenizers/punkt")
    except LookupError:
        nltk.download("punkt")

ensure_nltk()
nlp = ensure_spacy()

sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

# -------------------- constants --------------------
CSV_PATH_PLUS  = "la matrice plus.csv"   # pathways + colors + template words
CSV_PATH_COLOR = "la matrice.csv"        # color lexicon

SEQUENCE_ALIASES = {
    "Direct": "direct",
    "Feminine": "feminine",
    "Knot": "knot",
    "Masculine": "masc",
    "Pain": "pain",
    "Prayer": "prayer",
    "Precise": "precise",
    "Practical": "practical",
    "Plot": "plot",
    "Spiritual": "spiritual",
    "Sad": "sad",
}

SEQUENCE_IMAGE_FILES = {
    "direct": "direct pathway.png",
    "feminine": "fem pathway.png",
    "knot": "knot pathway.png",
    "masc": "masc pathway.png",
    "pain": "pain pathway.png",
    "prayer": "prayer pathway.png",
    "precise": "precise pathway.png",
    "practical": "practical pathway.png",
    "plot": "plot pathway.png",
    "spiritual": "spiritual pathway.png",
    "sad": "sad pathway.png"
}

GNH_DOMAINS: Dict[str, str] = {
    "Mental Wellness": "mental health, emotional clarity, peace of mind",
    "Social Wellness": "relationships, community, friendship, social harmony",
    "Economic Wellness": "income, savings, financial stability, cost of living",
    "Workplace Wellness": "career, work-life balance, promotion, productivity",
    "Physical Wellness": "physical health, sleep, fitness, exercise",
    "Environmental Wellness": "green space, nature, environmental care",
    "Health": "healthcare, medical care, recovery, well-being",
    "Education Value": "learning, education, school, knowledge, wisdom",
    "Good Governance": "freedom, justice, fairness, democratic participation",
    "Living Standards": "housing, wealth, basic needs, affordability",
    "Cultural Diversity": "tradition, language, cultural expression, heritage",
    "Political Wellness": "rights, law, free speech, civic participation",
    "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
}

GNH_COLORS: Dict[str, str] = {
    "Economic Wellness": "#808080",
    "Mental Wellness": "#FA005A",
    "Workplace Wellness": "#ffd700",
    "Physical Wellness": "#FAB478",
    "Social Wellness": "#ffa500",
    "Political Wellness": "#ffffff",
    "Environmental Wellness": "#0000FF",
    "Ecological Diversity": "#00FF00",
    "Health": "#FF0000",
    "Good Governance": "#000000",
    "Education Value": "#8b4513",
    "Living Standards": "#ffff00",
    "Cultural Diversity": "#B432FF",
}

WORD_MODES = ["Matrice1", "Matrice", "English", "GNH Indicators"]
MAX_COLORS = 8

# -------------------- loaders --------------------
def _find_col(df: pd.DataFrame, candidates: List[str]) -> str | None:
    names = {c.lower(): c for c in df.columns}
    for c in candidates:
        if c.lower() in names: return names[c.lower()]
    for want in candidates:
        ww = want.replace(" ", "").replace("-", "")
        for lc, orig in names.items():
            if ww in lc.replace(" ", "").replace("-", ""):
                return orig
    return None

def load_pathway_info(csv_path_plus: str):
    df = pd.read_csv(csv_path_plus)
    keys = set(SEQUENCE_ALIASES.values())
    rows = df[df["color"].astype(str).str.lower().isin(keys)].copy()

    seq_to_colors: Dict[str, List[str]] = {}
    seq_phrase: Dict[str, str] = {}

    # colors live in 'r' (list), template = concat of the other fields
    cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")]
    for _, row in rows.iterrows():
        key = str(row["color"]).strip().lower()
        color_list = str(row.get("r", "") or "")
        colors = [c.strip().lower() for c in re.split(r"[,\s]+", color_list) if c.strip()]
        seq_to_colors[key] = list(dict.fromkeys(colors))

        vals = []
        for c in cols_for_phrase:
            v = row.get(c)
            if pd.notna(v):
                s = str(v).strip()
                if s and s.lower() != "nan":
                    vals.append(s)
        phrase = " ".join(" ".join(vals).split())  # base template
        seq_phrase[key] = phrase

    return seq_to_colors, seq_phrase

def _split_words(s: str) -> List[str]:
    if not isinstance(s, str): return []
    parts = re.split(r"[,\;/\|\s]+", s.strip())
    return [p for p in (w.strip().lower() for w in parts) if p]

def load_color_lexicon(csv_path_color: str):
    df = pd.read_csv(csv_path_color)
    color_col = _find_col(df, ["color", "colour"])
    m1_col = _find_col(df, ["matrice1", "matrice 1"])
    m_col  = _find_col(df, ["matrice"])
    en_col = _find_col(df, ["english-words-code", "english words code", "english_words_code", "english"])

    lex: Dict[str, Dict[str, List[str]]] = {}
    for _, row in df.iterrows():
        cname = str(row.get(color_col, "")).strip().lower()
        if not cname: continue
        lex[cname] = {
            "matrice1": _split_words(str(row.get(m1_col, ""))),
            "matrice":  _split_words(str(row.get(m_col,  ""))),
            "english":  _split_words(str(row.get(en_col, ""))),
        }
    return lex

SEQ_TO_COLORS, SEQ_PHRASE = load_pathway_info(CSV_PATH_PLUS)
COLOR_LEX = load_color_lexicon(CSV_PATH_COLOR)

def sequence_to_image_path(seq_key: str) -> str | None:
    fname = SEQUENCE_IMAGE_FILES.get(seq_key)
    return fname if (fname and os.path.exists(fname)) else None

# -------------------- NLP helpers --------------------
def encode_text(t: str):
    return sbert_model.encode(t, convert_to_tensor=True)

def classify_emotion(text: str) -> Tuple[str, float]:
    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        logits = emotion_model(**inputs).logits
        probs = F.softmax(logits, dim=1).squeeze()
    labels = emotion_model.config.id2label
    idx = int(torch.argmax(probs).item())
    return labels[idx], float(probs[idx].item())

def score_sentiment(text: str) -> float:
    out = bert_sentiment(text[:512])[0]
    label, score = out["label"], out["score"]
    scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
    return round(min(10, max(1, scaled)), 2)

def score_accomplishment(text: str) -> float:
    doc = nlp(text); score = 5.0
    key_phrases = {"finally","told","decided","quit","refused","stood","walked","walked away","returned","return"}
    for token in doc:
        if token.text.lower() in key_phrases: score += 1.5
        if token.tag_ in {"VBD","VBN"}:       score += 0.5
    return round(min(10, max(1, score)), 2)

def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
    v = encode_text(text)
    out: Dict[str, float] = {}
    for dom, desc in GNH_DOMAINS.items():
        sim = float(util.cos_sim(v, encode_text(desc)).item())
        sim = max(0.0, min(1.0, sim))
        blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
        out[dom] = round(blended, 3)
    return dict(sorted(out.items(), key=lambda kv: -kv[1]))

def indicators_plot(indicators: Dict[str, float]):
    labels = list(indicators.keys()); values = list(indicators.values())
    colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
    fig = plt.figure(figsize=(8,5))
    plt.barh(labels, values, color=colors)
    plt.gca().invert_yaxis()
    plt.title("GNH Indicator Similarity")
    plt.xlabel("Score")
    plt.tight_layout()
    return fig

# -------------------- prompt building (legible placeholders) --------------------
def join_all_words(color: str) -> List[str]:
    d = COLOR_LEX.get(color.lower(), {})
    return list(dict.fromkeys(d.get("matrice1", []) + d.get("matrice", []) + d.get("english", [])))

def nearest_gnh_domain_for_color(color: str) -> Tuple[str, float]:
    words = " ".join(join_all_words(color))
    if not words:
        return "Mental Wellness", 0.0
    v = encode_text(words)
    best, best_sim = None, -1.0
    for dom, desc in GNH_DOMAINS.items():
        sim = float(util.cos_sim(v, encode_text(desc)).item())
        if sim > best_sim:
            best, best_sim = dom, sim
    return best or "Mental Wellness", best_sim

def labels_for_mode(colors: List[str], mode: str) -> List[str]:
    if mode.lower().startswith("gnh"):
        return [nearest_gnh_domain_for_color(c)[0] for c in colors]
    return [c.capitalize() for c in colors]

def placeholder_for(color: str, mode: str) -> str:
    """
    Always show a meaningful placeholder driven by the chosen mode.
    """
    color_lc = color.lower()
    if mode.lower().startswith("gnh"):
        dom, _ = nearest_gnh_domain_for_color(color_lc)
        return f"{dom}: {GNH_DOMAINS.get(dom, '')}"

    # map mode -> CSV column key
    mode_key = {
        "matrice1": "matrice1",
        "matrice":  "matrice",
        "english":  "english",
    }.get(mode.lower(), "matrice")

    lex = COLOR_LEX.get(color_lc, {})
    primary = lex.get(mode_key, [])

    # If the chosen column has entries, use them.
    if primary:
        return ", ".join(primary[:12])

    # Otherwise, try the other two lexicon columns (ordered).
    fallback_order = [k for k in ("matrice1", "matrice", "english") if k != mode_key]
    for fb in fallback_order:
        words = lex.get(fb, [])
        if words:
            label = "Matrice1" if fb == "matrice1" else ("Matrice" if fb == "matrice" else "English")
            return f"(from {label}) " + ", ".join(words[:12])

    # Final fallback: mapped GNH domain description (still a “meaning”, just not from lexicon).
    dom, _ = nearest_gnh_domain_for_color(color_lc)
    return f"(mapped GNH) {dom}: {GNH_DOMAINS.get(dom, '')}"


def simple_color_legend(colors: List[str]) -> str:
    if not colors:
        return "No prompts available for this pathway."
    parts = []
    for c in colors:
        dot = f"<span style='display:inline-block;width:10px;height:10px;border-radius:50%;background:{c};margin-right:8px;border:1px solid #999;vertical-align:middle'></span>"
        parts.append(f"<div style='margin:4px 0'>{dot}<b>{c.capitalize()}</b></div>")
    return "<div>" + "".join(parts) + "</div>"

def colors_for_sequence(seq_key: str) -> List[str]:
    return SEQ_TO_COLORS.get(seq_key, [])

def update_prompt_ui(seq_choice: str, word_mode: str):
    key = SEQUENCE_ALIASES.get(seq_choice)
    colors = colors_for_sequence(key)
    labels = labels_for_mode(colors, word_mode)
    legend_html = simple_color_legend(colors)

    updates = []
    for i in range(MAX_COLORS):
        if i < len(colors):
            lab = labels[i] if i < len(labels) else f"Input {i+1}"
            ph  = placeholder_for(colors[i], word_mode)
            updates.append(gr.update(visible=True, label=f"{lab} meaning", placeholder=ph, value=""))
        else:
            updates.append(gr.update(visible=False, value="", label=f"Input {i+1}", placeholder="—"))
    return (legend_html, *updates)

# -------------------- template replacement --------------------
def render_phrase_template(base_phrase: str, colors: List[str], labels: List[str], inputs: List[str]) -> str:
    """
    Replace occurrences of '<color>-pathway' (any spacing/hyphen variants) with the user's phrase for that color.
    If user left it empty, keep the label (color name or mapped GNH indicator).
    Finally, append a compact legend ' // Label: input'.
    """
    text = base_phrase or ""
    # build replacement map color -> replacement text
    rep: Dict[str, str] = {}
    for color, label, user in zip(colors, labels, inputs):
        use = user.strip() if isinstance(user, str) and user.strip() else label
        rep[color.lower()] = use

    # replace each token case-insensitively
    for color, replacement in rep.items():
        # match 'brown-pathway', 'brown pathway', 'Brown- Pathway', etc.
        pattern = re.compile(rf"\b{re.escape(color)}\s*-\s*pathway\b", re.IGNORECASE)
        text = pattern.sub(replacement, text)

    # if the template had no tokens, fall back to readable construction:
    # "use A to B the C of D as a new E" is preserved, but we still append meanings
    suffix_parts = []
    for color, label, user in zip(colors, labels, inputs):
        if isinstance(user, str) and user.strip():
            suffix_parts.append(f"{label}: {user.strip()}")
    if suffix_parts:
        text = (text + " // " + " // ".join(suffix_parts)).strip()

    return text

# -------------------- main analysis --------------------
def analyze(text: str, seq_choice: str, word_mode: str, *color_inputs):
    key = SEQUENCE_ALIASES.get(seq_choice)
    if key not in SEQ_PHRASE:
        return (5.0, "neutral (0.0)", 5.0, "Choose a valid pathway.", "{}", None, None, f"{seq_choice} (unavailable)",
                *update_prompt_ui(seq_choice, word_mode))

    colors = colors_for_sequence(key)
    labels = labels_for_mode(colors, word_mode)
    base_phrase = SEQ_PHRASE.get(key, "")

    # updated phrase with template replacement
    user_inputs = list(color_inputs)[:len(colors)]
    updated_phrase = render_phrase_template(base_phrase, colors, labels, user_inputs)

    # analysis on original + updated
    combined_text = " ".join([t for t in [text, updated_phrase] if t and t.strip()])
    sentiment = score_sentiment(combined_text)
    emotion, emo_conf = classify_emotion(combined_text)
    accomplishment = score_accomplishment(combined_text)

    indicators = semantic_indicator_mapping(combined_text, sentiment_score=sentiment)
    fig = indicators_plot(indicators)
    top5 = list(indicators.items())[:5]
    top5_str = "\n".join(f"{k}: {v}" for k, v in top5)

    img_path = sequence_to_image_path(key)
    meta = f"{key} | colors: {', '.join(colors) if colors else '—'}"
    emo_str = f"{emotion} ({emo_conf:.3f})"

    # keep prompt area synced
    prompt_updates = update_prompt_ui(seq_choice, word_mode)

    return (
        sentiment, emo_str, accomplishment,
        updated_phrase, top5_str, fig, img_path, meta,
        *prompt_updates
    )

# -------------------- UI --------------------
SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
DEFAULT_SEQ = "Knot" if "Knot" in SEQ_CHOICES else SEQ_CHOICES[0]

with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
    gr.Markdown("## RGB Root Matriz Color Plotter\n"
                "Type a phrase. Choose a **Sequence**. "
                "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")

    with gr.Row():
        inp = gr.Textbox(lines=4, label="Your situation / obstacle", placeholder="Describe the situation...")

    with gr.Row():
        seq = gr.Dropdown(choices=SEQ_CHOICES, value=DEFAULT_SEQ, label="Pathway")
        word_mode = gr.Radio(choices=WORD_MODES, value="Matrice", label="Word Mode")

    legend = gr.HTML()

    color_boxes: List[gr.Textbox] = []
    for i in range(MAX_COLORS):
        color_boxes.append(gr.Textbox(visible=False, label=f"Input {i+1}", placeholder="—"))

    run = gr.Button("Generate Pathway Analysis", variant="primary")

    with gr.Row():
        sent = gr.Number(label="Sentiment (1–10)")
        emo  = gr.Text(label="Emotion")
        acc  = gr.Number(label="Accomplishment (1–10)")

    with gr.Row():
        phrase_out = gr.Text(label="Updated Pathway Phrase (template with your meanings)")
        gnh_top    = gr.Text(label="Top GNH Indicators (Top 5)")

    gnh_plot = gr.Plot(label="GNH Similarity")
    img_out  = gr.Image(label="Pathway image", type="filepath")
    meta_out = gr.Text(label="Chosen pathway / colors")

    def _update_ui(seq_choice, mode):
        return update_prompt_ui(seq_choice, mode)

    seq.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])
    word_mode.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])

    run.click(
        fn=analyze,
        inputs=[inp, seq, word_mode, *color_boxes],
        outputs=[sent, emo, acc, phrase_out, gnh_top, gnh_plot, img_out, meta_out, legend, *color_boxes],
    )

    demo.load(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])

if __name__ == "__main__":
    demo.launch()