Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Sleeping

App Files Files Community

daniellegauthier commited on Sep 6

Commit

617928b

verified ·

1 Parent(s): e180fb2

Update app.py

Browse files

Files changed (1) hide show

app.py +189 -38

app.py CHANGED Viewed

@@ -1,17 +1,26 @@
 import os
 import nltk
 import spacy
 import torch
 import matplotlib.pyplot as plt
-import io
-from typing import Tuple, Dict
 import gradio as gr
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from sentence_transformers import SentenceTransformer, util
-import torch.nn.functional as F
-# --------- lightweight setup helpers ---------
 def ensure_spacy():
     try:
         return spacy.load("en_core_web_sm")
@@ -26,11 +35,14 @@ def ensure_nltk():
     except LookupError:
         nltk.download("punkt")
-# --------- load resources once (cached) ---------
 ensure_nltk()
 nlp = ensure_spacy()
 sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
 bert_sentiment = pipeline(
     "sentiment-analysis",
     model="distilbert-base-uncased-finetuned-sst-2-english"
@@ -40,7 +52,9 @@ emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
 emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
 emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
-# --------- domain definitions & colors ---------
 GNH_DOMAINS: Dict[str, str] = {
     "Mental Wellness": "mental health, emotional clarity, peace of mind",
     "Social Wellness": "relationships, community, friendship, social harmony",
@@ -54,7 +68,7 @@ GNH_DOMAINS: Dict[str, str] = {
     "Living Standards": "housing, wealth, basic needs, affordability",
     "Cultural Diversity": "tradition, language, cultural expression, heritage",
     "Political Wellness": "rights, law, free speech, civic participation",
-    "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
 }
 GNH_COLORS: Dict[str, str] = {
@@ -73,28 +87,89 @@ GNH_COLORS: Dict[str, str] = {
     "Cultural Diversity": "#9370db",
 }
-# --------- core scoring functions ---------
 def classify_emotion(text: str) -> Tuple[str, float]:
     inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
     with torch.no_grad():
         logits = emotion_model(**inputs).logits
         probs = F.softmax(logits, dim=1).squeeze()
     labels = emotion_model.config.id2label
-    top_idx = torch.argmax(probs).item()
     return labels[top_idx], float(probs[top_idx].item())
 def score_sentiment(text: str) -> float:
-    """
-    BERT sentiment → scale to [1..10]
-    POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
-    """
     out = bert_sentiment(text[:512])[0]
     label, score = out["label"], out["score"]
-    if label == "POSITIVE":
-        scaled = 5 + 5 * score
-    else:
-        scaled = 1 + 4 * (1 - score)
-    return round(max(1, min(10, scaled)), 2)
 def score_accomplishment(text: str) -> float:
     doc = nlp(text)
@@ -103,14 +178,11 @@ def score_accomplishment(text: str) -> float:
     for token in doc:
         if token.text.lower() in key_phrases:
             score += 1.5
-        if token.tag_ in {"VBD", "VBN"}:  # past tense / participle
             score += 0.5
-    return round(max(1, min(10, score)), 2)
 def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
-    """
-    SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
-    """
     text_vec = sbert_model.encode(text, convert_to_tensor=True)
     out: Dict[str, float] = {}
     for label, desc in GNH_DOMAINS.items():
@@ -121,12 +193,42 @@ def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weig
         out[label] = round(blended, 3)
     return dict(sorted(out.items(), key=lambda kv: -kv[1]))
-# --------- plotting helper ---------
 def indicators_plot(indicators: Dict[str, float]):
     labels = list(indicators.keys())
     values = list(indicators.values())
     colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
     fig = plt.figure(figsize=(8, 5))
     plt.barh(labels, values, color=colors)
     plt.gca().invert_yaxis()
@@ -135,43 +237,92 @@ def indicators_plot(indicators: Dict[str, float]):
     plt.tight_layout()
     return fig
-# --------- Gradio app ---------
-def analyze(text: str):
     if not text or not text.strip():
-        return 5.0, "neutral (0.0)", "[]", None, 5.0
     sentiment = score_sentiment(text)
     emotion, emo_conf = classify_emotion(text)
     accomplishment = score_accomplishment(text)
     indicators = semantic_indicator_mapping(text, sentiment)
     top5 = list(indicators.items())[:5]
     top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
-    fig = indicators_plot(indicators)
     return (
         sentiment,
         f"{emotion} ({emo_conf:.3f})",
         top5_str,
         fig,
-        accomplishment,
     )
-with gr.Blocks(title="La Matriz — GNH Analyzer") as demo:
-    gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
     with gr.Row():
         inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
     with gr.Row():
-        btn = gr.Button("Analyze", variant="primary")
     with gr.Row():
         sent = gr.Number(label="Sentiment (1–10)")
         emo = gr.Text(label="Emotion")
         acc = gr.Number(label="Accomplishment (1–10)")
     with gr.Row():
-        top = gr.Text(label="Top GNH Indicators")
     with gr.Row():
-        plot = gr.Plot(label="GNH Similarity")
-    btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])
 if __name__ == "__main__":
     demo.launch()

 import os
+import io
+from typing import Dict, Tuple, List
 import nltk
 import spacy
 import torch
 import matplotlib.pyplot as plt
+import torch.nn.functional as F
+import pandas as pd
 import gradio as gr
+from transformers import (
+    pipeline,
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+)
 from sentence_transformers import SentenceTransformer, util
+# =========================
+# 0) Lightweight setup
+# =========================
 def ensure_spacy():
     try:
         return spacy.load("en_core_web_sm")
     except LookupError:
         nltk.download("punkt")
 ensure_nltk()
 nlp = ensure_spacy()
+# =========================
+# 1) Models (cached)
+# =========================
 sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
 bert_sentiment = pipeline(
     "sentiment-analysis",
     model="distilbert-base-uncased-finetuned-sst-2-english"
 emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
 emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
+# =========================
+# 2) GNH definitions
+# =========================
 GNH_DOMAINS: Dict[str, str] = {
     "Mental Wellness": "mental health, emotional clarity, peace of mind",
     "Social Wellness": "relationships, community, friendship, social harmony",
     "Living Standards": "housing, wealth, basic needs, affordability",
     "Cultural Diversity": "tradition, language, cultural expression, heritage",
     "Political Wellness": "rights, law, free speech, civic participation",
+    "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
 }
 GNH_COLORS: Dict[str, str] = {
     "Cultural Diversity": "#9370db",
 }
+# =========================
+# 3) Pathway data
+#    - Reads phrases from bottom of "la matrice plus.csv"
+#    - Maps sequence keys -> phrase & image path
+# =========================
+CSV_PATH = "la matrice plus.csv"
+# Aliases so your UI label → CSV row & image file
+SEQUENCE_ALIASES = {
+    "Auto (recommend)": "auto",
+    "Direct": "direct",
+    "Fem": "feminine",     # CSV row is 'feminine', image is 'fem pathway.png'
+    "Knot": "knot",
+    "Masc": "masc",
+    "Pain": "pain",
+    "Prayer": "prayer",
+    "Precise": "precise",
+    "Practical": "practical",
+    "Plot": "plot",
+    # add more later (e.g., "Spiritual", "Sad") if/when images are added
+}
+SEQUENCE_IMAGE_FILES = {
+    "direct": "direct pathway.png",
+    "feminine": "fem pathway.png",
+    "knot": "knot pathway.png",
+    "masc": "masc pathway.png",
+    "pain": "pain pathway.png",
+    "prayer": "prayer pathway.png",
+    "precise": "precise pathway.png",
+    "practical": "practical pathway.png",
+    "plot": "plot pathway.png",
+    # add "spiritual": "...png", "sad": "...png" when you drop them in
+}
+def load_pathway_phrases(csv_path: str) -> Dict[str, str]:
+    """
+    Build pathway phrase text by concatenating non-null columns
+    from 'matrice1' onward for each sequence row at the bottom of the sheet.
+    """
+    df = pd.read_csv(csv_path)
+    phrases: Dict[str, str] = {}
+    # We consider any row whose 'color' is one of our known sequences
+    valid_keys = set(SEQUENCE_IMAGE_FILES.keys()) | {"spiritual", "sad"}
+    rows = df[df["color"].astype(str).str.lower().isin(valid_keys)].copy()
+    for _, row in rows.iterrows():
+        key = str(row["color"]).strip().lower()
+        # join from column index 4 onward (matrice1 .. last "Unnamed")
+        text = " ".join(
+            str(v) for v in row.iloc[4:].tolist() if pd.notna(v)
+        ).strip()
+        # clean duplicate/missing spaces
+        text = " ".join(text.split())
+        phrases[key] = text
+    return phrases
+PATHWAY_PHRASES = load_pathway_phrases(CSV_PATH)
+def sequence_to_image_path(seq_key: str) -> str | None:
+    fname = SEQUENCE_IMAGE_FILES.get(seq_key)
+    if fname and os.path.exists(fname):
+        return fname
+    return None  # image optional—app will handle gracefully
+# =========================
+# 4) Core scoring functions
+# =========================
 def classify_emotion(text: str) -> Tuple[str, float]:
     inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
     with torch.no_grad():
         logits = emotion_model(**inputs).logits
         probs = F.softmax(logits, dim=1).squeeze()
     labels = emotion_model.config.id2label
+    top_idx = int(torch.argmax(probs).item())
     return labels[top_idx], float(probs[top_idx].item())
 def score_sentiment(text: str) -> float:
     out = bert_sentiment(text[:512])[0]
     label, score = out["label"], out["score"]
+    scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
+    return round(min(10, max(1, scaled)), 2)
 def score_accomplishment(text: str) -> float:
     doc = nlp(text)
     for token in doc:
         if token.text.lower() in key_phrases:
             score += 1.5
+        if token.tag_ in {"VBD", "VBN"}:
             score += 0.5
+    return round(min(10, max(1, score)), 2)
 def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
     text_vec = sbert_model.encode(text, convert_to_tensor=True)
     out: Dict[str, float] = {}
     for label, desc in GNH_DOMAINS.items():
         out[label] = round(blended, 3)
     return dict(sorted(out.items(), key=lambda kv: -kv[1]))
+# =========================
+# 5) Pathway selection logic
+# =========================
+def suggest_sequence(text: str) -> Tuple[str, float]:
+    """
+    Choose the best pathway by SBERT similarity between the input text
+    and each pathway phrase from the CSV.
+    Returns (sequence_key, similarity_score).
+    """
+    if not PATHWAY_PHRASES:
+        return "direct", 0.0
+    text_vec = sbert_model.encode(text, convert_to_tensor=True)
+    best_key, best_sim = None, -1.0
+    for key, phrase in PATHWAY_PHRASES.items():
+        if not phrase:
+            continue
+        phrase_vec = sbert_model.encode(phrase, convert_to_tensor=True)
+        sim = float(util.cos_sim(text_vec, phrase_vec).item())
+        if sim > best_sim:
+            best_key, best_sim = key, sim
+    return (best_key or "direct"), best_sim
+def pathway_payload(seq_key: str) -> Tuple[str, str | None]:
+    """Return (phrase, image_path) for a given sequence key."""
+    key = seq_key.strip().lower()
+    phrase = PATHWAY_PHRASES.get(key, "")
+    img = sequence_to_image_path(key)
+    return phrase, img
+# =========================
+# 6) Plot helper (GNH bars)
+# =========================
 def indicators_plot(indicators: Dict[str, float]):
     labels = list(indicators.keys())
     values = list(indicators.values())
     colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
     fig = plt.figure(figsize=(8, 5))
     plt.barh(labels, values, color=colors)
     plt.gca().invert_yaxis()
     plt.tight_layout()
     return fig
+# =========================
+# 7) Gradio app
+# =========================
+SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
+def analyze(text: str, seq_choice: str):
     if not text or not text.strip():
+        return (
+            5.0, "neutral (0.0)", 5.0,
+            "—", None,
+            "{}", None, "—", 0.0
+        )
+    # 1) scores
     sentiment = score_sentiment(text)
     emotion, emo_conf = classify_emotion(text)
     accomplishment = score_accomplishment(text)
     indicators = semantic_indicator_mapping(text, sentiment)
+    fig = indicators_plot(indicators)
+    # 2) pathway
+    chosen_key = SEQUENCE_ALIASES.get(seq_choice, "auto")
+    auto_key, auto_sim = suggest_sequence(text) if chosen_key == "auto" else (chosen_key, None)
+    final_key = auto_key
+    phrase, img_path = pathway_payload(final_key)
+    # outputs
     top5 = list(indicators.items())[:5]
     top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
     return (
         sentiment,
         f"{emotion} ({emo_conf:.3f})",
+        accomplishment,
+        final_key,                           # selected sequence key
+        phrase or "—",
         top5_str,
         fig,
+        img_path,                            # pathway image (optional)
+        auto_key if chosen_key == "auto" else seq_choice,
+        float(auto_sim or 0.0)
     )
+with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
+    gr.Markdown("## La Matriz Consulting, feat. BERT Emotion + GNH + Pathway\n"
+                "Type a phrase. Choose a **Sequence** or keep **Auto** to recommend a pathway. "
+                "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")
     with gr.Row():
         inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
     with gr.Row():
+        seq = gr.Dropdown(choices=SEQ_CHOICES, value="Auto (recommend)", label="Sequence choice")
+    btn = gr.Button("Analyze", variant="primary")
     with gr.Row():
         sent = gr.Number(label="Sentiment (1–10)")
         emo = gr.Text(label="Emotion")
         acc = gr.Number(label="Accomplishment (1–10)")
+    with gr.Row():
+        seq_used = gr.Text(label="Chosen pathway key")
+        phrase_out = gr.Text(label="Pathway phrase")
     with gr.Row():
+        gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")
+        gnh_plot = gr.Plot(label="GNH Similarity")
     with gr.Row():
+        pathway_img = gr.Image(label="Pathway image", type="filepath")
+        auto_meta = gr.Text(label="Auto selection (key, similarity)")
+    def _wrap_analyze(text, seq_choice):
+        result = analyze(text, seq_choice)
+        # build auto meta text
+        auto_key = result[-2]
+        auto_sim = result[-1]
+        meta = f"{auto_key} (similarity={auto_sim:.3f})" if seq_choice == "Auto (recommend)" else "—"
+        return (*result[:-2], meta)
+    btn.click(
+        fn=_wrap_analyze,
+        inputs=[inp, seq],
+        outputs=[sent, emo, acc, seq_used, phrase_out, gnh_top, gnh_plot, pathway_img, auto_meta]
+    )
 if __name__ == "__main__":
     demo.launch()