Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Sleeping

App Files Files Community

daniellegauthier commited on Sep 6

Commit

2de35e1

verified ·

1 Parent(s): ca33b05

Update app.py

Browse files

Files changed (1) hide show

app.py +249 -115

app.py CHANGED Viewed

@@ -1,21 +1,16 @@
 import os
-import io
 from typing import Dict, Tuple, List
 import nltk
 import spacy
 import torch
-import matplotlib.pyplot as plt
 import torch.nn.functional as F
 import pandas as pd
 import gradio as gr
-from transformers import (
-    pipeline,
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-)
 from sentence_transformers import SentenceTransformer, util
 # =========================
@@ -42,11 +37,7 @@ nlp = ensure_spacy()
 # 1) Models (cached)
 # =========================
 sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
-bert_sentiment = pipeline(
-    "sentiment-analysis",
-    model="distilbert-base-uncased-finetuned-sst-2-english"
-)
 emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
 emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
@@ -88,13 +79,11 @@ GNH_COLORS: Dict[str, str] = {
 }
 # =========================
-# 3) Pathway data
-#    - Reads phrases from bottom of "la matrice plus.csv"
-#    - Maps sequence keys -> phrase & image path
 # =========================
 CSV_PATH = "la matrice plus.csv"
-# Aliases so your UI label → CSV row & image file
 SEQUENCE_ALIASES = {
     "Auto (recommend)": "auto",
     "Direct": "direct",
@@ -124,39 +113,57 @@ SEQUENCE_IMAGE_FILES = {
     "sad": "sad pathway.png"
 }
-def load_pathway_phrases(csv_path: str) -> Dict[str, str]:
-    """
-    Build pathway phrase text by concatenating non-null columns
-    from 'matrice1' onward for each sequence row at the bottom of the sheet.
-    """
     df = pd.read_csv(csv_path)
     phrases: Dict[str, str] = {}
-    # We consider any row whose 'color' is one of our known sequences
-    valid_keys = set(SEQUENCE_IMAGE_FILES.keys()) | {"spiritual", "sad"}
-    rows = df[df["color"].astype(str).str.lower().isin(valid_keys)].copy()
     for _, row in rows.iterrows():
         key = str(row["color"]).strip().lower()
-        # join from column index 4 onward (matrice1 .. last "Unnamed")
-        text = " ".join(
-            str(v) for v in row.iloc[4:].tolist() if pd.notna(v)
-        ).strip()
-        # clean duplicate/missing spaces
-        text = " ".join(text.split())
-        phrases[key] = text
-    return phrases
-PATHWAY_PHRASES = load_pathway_phrases(CSV_PATH)
 def sequence_to_image_path(seq_key: str) -> str | None:
     fname = SEQUENCE_IMAGE_FILES.get(seq_key)
-    if fname and os.path.exists(fname):
-        return fname
-    return None  # image optional—app will handle gracefully
 # =========================
-# 4) Core scoring functions
 # =========================
 def classify_emotion(text: str) -> Tuple[str, float]:
     inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
@@ -164,8 +171,8 @@ def classify_emotion(text: str) -> Tuple[str, float]:
         logits = emotion_model(**inputs).logits
         probs = F.softmax(logits, dim=1).squeeze()
     labels = emotion_model.config.id2label
-    top_idx = int(torch.argmax(probs).item())
-    return labels[top_idx], float(probs[top_idx].item())
 def score_sentiment(text: str) -> float:
     out = bert_sentiment(text[:512])[0]
@@ -176,7 +183,7 @@ def score_sentiment(text: str) -> float:
 def score_accomplishment(text: str) -> float:
     doc = nlp(text)
     score = 5.0
-    key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"}
     for token in doc:
         if token.text.lower() in key_phrases:
             score += 1.5
@@ -184,48 +191,108 @@ def score_accomplishment(text: str) -> float:
             score += 0.5
     return round(min(10, max(1, score)), 2)
-def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
-    text_vec = sbert_model.encode(text, convert_to_tensor=True)
     out: Dict[str, float] = {}
     for label, desc in GNH_DOMAINS.items():
-        desc_vec = sbert_model.encode(desc, convert_to_tensor=True)
-        sim = float(util.cos_sim(text_vec, desc_vec).item())
         sim = max(0.0, min(1.0, sim))
         blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
         out[label] = round(blended, 3)
     return dict(sorted(out.items(), key=lambda kv: -kv[1]))
 # =========================
-# 5) Pathway selection logic
 # =========================
-def suggest_sequence(text: str) -> Tuple[str, float]:
     """
-    Choose the best pathway by SBERT similarity between the input text
-    and each pathway phrase from the CSV.
-    Returns (sequence_key, similarity_score).
     """
-    if not PATHWAY_PHRASES:
-        return "direct", 0.0
-    text_vec = sbert_model.encode(text, convert_to_tensor=True)
-    best_key, best_sim = None, -1.0
-    for key, phrase in PATHWAY_PHRASES.items():
-        if not phrase:
-            continue
-        phrase_vec = sbert_model.encode(phrase, convert_to_tensor=True)
-        sim = float(util.cos_sim(text_vec, phrase_vec).item())
-        if sim > best_sim:
-            best_key, best_sim = key, sim
-    return (best_key or "direct"), best_sim
-def pathway_payload(seq_key: str) -> Tuple[str, str | None]:
-    """Return (phrase, image_path) for a given sequence key."""
-    key = seq_key.strip().lower()
-    phrase = PATHWAY_PHRASES.get(key, "")
-    img = sequence_to_image_path(key)
-    return phrase, img
 # =========================
-# 6) Plot helper (GNH bars)
 # =========================
 def indicators_plot(indicators: Dict[str, float]):
     labels = list(indicators.keys())
@@ -234,64 +301,138 @@ def indicators_plot(indicators: Dict[str, float]):
     fig = plt.figure(figsize=(8, 5))
     plt.barh(labels, values, color=colors)
     plt.gca().invert_yaxis()
-    plt.title("GNH Indicator Similarity (Sentiment-weighted)")
     plt.xlabel("Score")
     plt.tight_layout()
     return fig
 # =========================
-# 7) Gradio app
 # =========================
 SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
-def analyze(text: str, seq_choice: str):
     if not text or not text.strip():
-        return (
-            5.0, "neutral (0.0)", 5.0,
-            "—", None,
-            "{}", None, "—", 0.0
-        )
     # 1) scores
     sentiment = score_sentiment(text)
     emotion, emo_conf = classify_emotion(text)
     accomplishment = score_accomplishment(text)
-    indicators = semantic_indicator_mapping(text, sentiment)
-    fig = indicators_plot(indicators)
-    # 2) pathway
-    chosen_key = SEQUENCE_ALIASES.get(seq_choice, "auto")
-    auto_key, auto_sim = suggest_sequence(text) if chosen_key == "auto" else (chosen_key, None)
-    final_key = auto_key
-    phrase, img_path = pathway_payload(final_key)
-    # outputs
     top5 = list(indicators.items())[:5]
     top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
     return (
-        sentiment,
-        f"{emotion} ({emo_conf:.3f})",
-        accomplishment,
-        final_key,                           # selected sequence key
-        phrase or "—",
-        top5_str,
-        fig,
-        img_path,                            # pathway image (optional)
-        auto_key if chosen_key == "auto" else seq_choice,
-        float(auto_sim or 0.0)
     )
 with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
-    gr.Markdown("## La Matriz Consulting, feat. BERT Emotion + GNH + Pathway\n"
                 "Type a phrase. Choose a **Sequence** or keep **Auto** to recommend a pathway. "
                 "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")
     with gr.Row():
-        inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
     with gr.Row():
-        seq = gr.Dropdown(choices=SEQ_CHOICES, value="Auto (recommend)", label="Sequence choice")
     btn = gr.Button("Analyze", variant="primary")
@@ -301,30 +442,23 @@ with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
         acc = gr.Number(label="Accomplishment (1–10)")
     with gr.Row():
-        seq_used = gr.Text(label="Chosen pathway key")
         phrase_out = gr.Text(label="Pathway phrase")
     with gr.Row():
         gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")
-        gnh_plot = gr.Plot(label="GNH Similarity")
     with gr.Row():
         pathway_img = gr.Image(label="Pathway image", type="filepath")
-        auto_meta = gr.Text(label="Auto selection (key, similarity)")
-    def _wrap_analyze(text, seq_choice):
-        result = analyze(text, seq_choice)
-        # build auto meta text
-        auto_key = result[-2]
-        auto_sim = result[-1]
-        meta = f"{auto_key} (similarity={auto_sim:.3f})" if seq_choice == "Auto (recommend)" else "—"
-        return (*result[:-2], meta)
     btn.click(
-        fn=_wrap_analyze,
-        inputs=[inp, seq],
-        outputs=[sent, emo, acc, seq_used, phrase_out, gnh_top, gnh_plot, pathway_img, auto_meta]
     )
 if __name__ == "__main__":
     demo.launch()

 import os
+import re
 from typing import Dict, Tuple, List
 import nltk
 import spacy
 import torch
 import torch.nn.functional as F
+import matplotlib.pyplot as plt
 import pandas as pd
 import gradio as gr
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from sentence_transformers import SentenceTransformer, util
 # =========================
 # 1) Models (cached)
 # =========================
 sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
+bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
 emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
 emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
 }
 # =========================
+# 3) Pathways (CSV + images)
 # =========================
 CSV_PATH = "la matrice plus.csv"
+# UI label → internal key
 SEQUENCE_ALIASES = {
     "Auto (recommend)": "auto",
     "Direct": "direct",
     "sad": "sad pathway.png"
 }
+# ---- load pathway phrases + colors (many-to-many) ----
+def load_pathway_info(csv_path: str):
     df = pd.read_csv(csv_path)
+    keys_we_know = set(SEQUENCE_ALIASES.values()) - {"auto"}
+    rows = df[df["color"].astype(str).str.lower().isin(keys_we_know)].copy()
     phrases: Dict[str, str] = {}
+    seq_to_colors: Dict[str, List[str]] = {}
+    color_to_seqs: Dict[str, List[str]] = {}
+    # columns to stitch into a phrase (all except color/r/g/b)
+    cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")]
     for _, row in rows.iterrows():
         key = str(row["color"]).strip().lower()
+        # parse colors list from column 'r' (e.g., "red, orange")
+        colors_field = str(row.get("r", "") or "")
+        colors = [c.strip().lower() for c in re.split(r"[,\s]+", colors_field) if c.strip()]
+        colors = list(dict.fromkeys(colors))  # dedupe, keep order
+        seq_to_colors[key] = colors
+        for c in colors:
+            color_to_seqs.setdefault(c, [])
+            if key not in color_to_seqs[c]:
+                color_to_seqs[c].append(key)
+        # phrase: join all non-null from the other columns (keeps "let's ..." fragments etc.)
+        vals = []
+        for c in cols_for_phrase:
+            v = row.get(c)
+            if pd.notna(v):
+                vs = str(v).strip()
+                if vs and vs.lower() != "nan":
+                    vals.append(vs)
+        phrase = " ".join(vals)
+        phrase = " ".join(phrase.split())
+        phrases[key] = phrase
+    # color vocab for parsing "red-pathway" in text
+    color_vocab = sorted(color_to_seqs.keys())
+    return phrases, seq_to_colors, color_to_seqs, color_vocab
+PATHWAY_PHRASES, SEQ_TO_COLORS, COLOR_TO_SEQS, COLOR_VOCAB = load_pathway_info(CSV_PATH)
 def sequence_to_image_path(seq_key: str) -> str | None:
     fname = SEQUENCE_IMAGE_FILES.get(seq_key)
+    return fname if (fname and os.path.exists(fname)) else None
 # =========================
+# 4) Scoring
 # =========================
 def classify_emotion(text: str) -> Tuple[str, float]:
     inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
         logits = emotion_model(**inputs).logits
         probs = F.softmax(logits, dim=1).squeeze()
     labels = emotion_model.config.id2label
+    idx = int(torch.argmax(probs).item())
+    return labels[idx], float(probs[idx].item())
 def score_sentiment(text: str) -> float:
     out = bert_sentiment(text[:512])[0]
 def score_accomplishment(text: str) -> float:
     doc = nlp(text)
     score = 5.0
+    key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away", "returned", "return"}
     for token in doc:
         if token.text.lower() in key_phrases:
             score += 1.5
             score += 0.5
     return round(min(10, max(1, score)), 2)
+# =========================
+# 5) Pathway-aware vector math
+# =========================
+def encode_text(t: str):
+    return sbert_model.encode(t, convert_to_tensor=True)
+def composite_vector(
+    base_text: str,
+    boost_terms: List[str],
+    boost_seq_keys: List[str],
+    limit_seq_keys: List[str],
+    boost_w: float = 0.6,
+    limit_w: float = 0.6,
+):
+    v = encode_text(base_text)
+    for term in boost_terms:
+        t = term.strip()
+        if t:
+            v = v + boost_w * encode_text(t)
+    for key in boost_seq_keys:
+        phrase = PATHWAY_PHRASES.get(key, "")
+        if phrase:
+            v = v + boost_w * encode_text(phrase)
+    for key in limit_seq_keys:
+        phrase = PATHWAY_PHRASES.get(key, "")
+        if phrase:
+            v = v - limit_w * encode_text(phrase)
+    return v
+def best_sequence_for_vector(vec) -> Tuple[str, float]:
+    best_key, best_sim = None, -1.0
+    for key, phrase in PATHWAY_PHRASES.items():
+        if not phrase:
+            continue
+        sim = float(util.cos_sim(vec, encode_text(phrase)).item())
+        if sim > best_sim:
+            best_key, best_sim = key, sim
+    return best_key or "direct", best_sim
+def semantic_indicator_mapping_from_vec(vec, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
     out: Dict[str, float] = {}
     for label, desc in GNH_DOMAINS.items():
+        desc_vec = encode_text(desc)
+        sim = float(util.cos_sim(vec, desc_vec).item())
         sim = max(0.0, min(1.0, sim))
         blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
         out[label] = round(blended, 3)
     return dict(sorted(out.items(), key=lambda kv: -kv[1]))
 # =========================
+# 6) Color cues from free text (many-to-many)
 # =========================
+_COLOR_RE = re.compile(r"\b(" + "|".join(map(re.escape, COLOR_VOCAB)) + r")\s*(?:\-?\s*pathway)?\b", re.I)
+_LIMIT_CUES = {"limit", "reduce", "lessen", "avoid", "diminish", "lower", "constrain", "suppress"}
+def infer_color_directives(text: str) -> Tuple[List[str], List[str]]:
     """
+    Parse '... limit ... red-pathway ...' → limit 'red'
+    otherwise treat mentioned colors as boost.
+    Returns (boost_colors, limit_colors) as lists of color strings.
     """
+    tokens = re.findall(r"\w+|\S", text.lower())
+    idxs = []
+    for m in _COLOR_RE.finditer(text):
+        start = m.start()
+        # find token index closest to this span
+        char_count = 0
+        tok_index = 0
+        for i, tok in enumerate(tokens):
+            char_count += len(tok) + 1  # crude but ok
+            if char_count > start:
+                tok_index = i
+                break
+        idxs.append((tok_index, m.group(1).lower()))
+    boost_colors, limit_colors = [], []
+    for idx, col in idxs:
+        # look back a small window for a limit cue
+        window = tokens[max(0, idx-4):idx]
+        if any(w in _LIMIT_CUES for w in window):
+            limit_colors.append(col)
+        else:
+            boost_colors.append(col)
+    # dedupe
+    boost_colors = list(dict.fromkeys(boost_colors))
+    limit_colors = list(dict.fromkeys(limit_colors))
+    return boost_colors, limit_colors
+def colors_to_seq_keys(colors: List[str]) -> List[str]:
+    keys: List[str] = []
+    for c in colors:
+        for k in COLOR_TO_SEQS.get(c, []):
+            if k not in keys:
+                keys.append(k)
+    return keys
 # =========================
+# 7) Plot helper
 # =========================
 def indicators_plot(indicators: Dict[str, float]):
     labels = list(indicators.keys())
     fig = plt.figure(figsize=(8, 5))
     plt.barh(labels, values, color=colors)
     plt.gca().invert_yaxis()
+    plt.title("GNH Indicator Similarity (Pathway-weighted)")
     plt.xlabel("Score")
     plt.tight_layout()
     return fig
 # =========================
+# 8) Gradio app
 # =========================
 SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
+SEQ_MULTI_CHOICES = [k for k in SEQUENCE_ALIASES.keys() if k != "Auto (recommend)"]
+def normalize_seq_keys(ui_labels: List[str]) -> List[str]:
+    keys = []
+    for lab in ui_labels:
+        k = SEQUENCE_ALIASES.get(lab, lab).lower()
+        keys.append(k)
+    return keys
+def analyze(
+    text: str,
+    seq_choice: str,
+    boost_terms_raw: str,
+    boost_seq_labels: List[str],
+    limit_seq_labels: List[str],
+    boost_w: float,
+    limit_w: float,
+):
     if not text or not text.strip():
+        return (5.0, "neutral (0.0)", 5.0, "—", "—", "{}", None, None)
     # 1) scores
     sentiment = score_sentiment(text)
     emotion, emo_conf = classify_emotion(text)
     accomplishment = score_accomplishment(text)
+    # 2) UI selections
+    boost_seqs_user = normalize_seq_keys(boost_seq_labels)
+    limit_seqs_user = normalize_seq_keys(limit_seq_labels)
+    # 3) parse boosts/limits
+    boost_terms = [t.strip() for t in boost_terms_raw.split(",")] if boost_terms_raw else []
+    # --- NEW: Color cues from text (many-to-many) ---
+    boost_colors, limit_colors = infer_color_directives(text)
+    boost_seqs_from_colors = colors_to_seq_keys(boost_colors)
+    limit_seqs_from_colors = colors_to_seq_keys(limit_colors)
+    # combine lists (dedupe preserving order)
+    def _merge(a: List[str], b: List[str]) -> List[str]:
+        out = list(a)
+        for x in b:
+            if x not in out:
+                out.append(x)
+        return out
+    boost_seq_keys = _merge(boost_seqs_user, boost_seqs_from_colors)
+    limit_seq_keys = _merge(limit_seqs_user, limit_seqs_from_colors)
+    # 4) build context vector
+    context_vec = composite_vector(
+        base_text=text,
+        boost_terms=boost_terms,
+        boost_seq_keys=boost_seq_keys,
+        limit_seq_keys=limit_seq_keys,
+        boost_w=boost_w,
+        limit_w=limit_w,
+    )
+    # 5) choose pathway (Auto or specific)
+    chosen_key = SEQUENCE_ALIASES.get(seq_choice, "auto")
+    if chosen_key == "auto":
+        final_key, final_sim = best_sequence_for_vector(context_vec)
+    else:
+        final_key = chosen_key
+        phrase_for_final = PATHWAY_PHRASES.get(final_key, "")
+        final_sim = float(util.cos_sim(context_vec, encode_text(phrase_for_final)).item()) if phrase_for_final else 0.0
+    # 6) outputs
+    phrase = PATHWAY_PHRASES.get(final_key, "—")
+    img_path = sequence_to_image_path(final_key)
+    indicators = semantic_indicator_mapping_from_vec(context_vec, sentiment_score=sentiment)
+    fig = indicators_plot(indicators)
     top5 = list(indicators.items())[:5]
     top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
+    # annotated meta
+    emo_str = f"{emotion} ({emo_conf:.3f})"
+    meta = f"{final_key} (relevance={final_sim:.3f})"
+    # show how color cues mapped
+    if boost_colors or limit_colors:
+        meta += f" | boost colors: {', '.join(boost_colors) or '—'} → {', '.join(boost_seqs_from_colors) or '—'}"
+        meta += f" | limit colors: {', '.join(limit_colors) or '—'} → {', '.join(limit_seqs_from_colors) or '—'}"
     return (
+        sentiment,            # number
+        emo_str,              # text
+        accomplishment,       # number
+        meta,                 # chosen pathway + relevance + color cue mapping
+        phrase,               # pathway phrase
+        top5_str,             # GNH top5
+        fig,                  # plot
+        img_path,             # image path (optional)
     )
 with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
+    gr.Markdown("## RGB Root Matriz Color Plotter\n"
                 "Type a phrase. Choose a **Sequence** or keep **Auto** to recommend a pathway. "
                 "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")
     with gr.Row():
+        inp = gr.Textbox(
+            lines=4,
+            label="Input text",
+            placeholder="e.g., use gratitude from a return and inspiration from clarity to limit from red-pathway the pain from orange-pathway."
+        )
+    with gr.Row():
+        seq = gr.Dropdown(choices=SEQ_CHOICES, value="Auto (recommend)", label="Primary Pathway")
+    with gr.Row():
+        boost_terms = gr.Textbox(label="Boost terms (comma-separated)", placeholder="gratitude, inspiration, clarity")
+    with gr.Row():
+        boost_seqs = gr.CheckboxGroup(choices=[c for c in SEQ_CHOICES if c != "Auto (recommend)"],
+                                      label="Boost sequences (optional)")
+        limit_seqs = gr.CheckboxGroup(choices=[c for c in SEQ_CHOICES if c != "Auto (recommend)"],
+                                      label="Limit sequences (optional)")
     with gr.Row():
+        boost_w = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Boost weight")
+        limit_w = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Limit weight")
     btn = gr.Button("Analyze", variant="primary")
         acc = gr.Number(label="Accomplishment (1–10)")
     with gr.Row():
+        chosen = gr.Text(label="Chosen pathway (relevance + color mapping)")
         phrase_out = gr.Text(label="Pathway phrase")
     with gr.Row():
         gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")
+        gnh_plot = gr.Plot(label="GNH Similarity (Pathway-weighted)")
     with gr.Row():
         pathway_img = gr.Image(label="Pathway image", type="filepath")
     btn.click(
+        fn=analyze,
+        inputs=[inp, seq, boost_terms, boost_seqs, limit_seqs, boost_w, limit_w],
+        outputs=[sent, emo, acc, chosen, phrase_out, gnh_top, gnh_plot, pathway_img]
     )
 if __name__ == "__main__":
     demo.launch()