Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Sleeping

App Files Files Community

RGB-Root-Matriz-Color-Plotter / app.py

daniellegauthier

Update app.py

2de35e1 verified 2 months ago

raw

history blame

16.5 kB

	import os
	import re
	from typing import Dict, Tuple, List

	import nltk
	import spacy
	import torch
	import torch.nn.functional as F
	import matplotlib.pyplot as plt
	import pandas as pd
	import gradio as gr

	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from sentence_transformers import SentenceTransformer, util

	# =========================
	# 0) Lightweight setup
	# =========================
	def ensure_spacy():
	try:
	return spacy.load("en_core_web_sm")
	except Exception:
	import spacy.cli
	spacy.cli.download("en_core_web_sm")
	return spacy.load("en_core_web_sm")

	def ensure_nltk():
	try:
	nltk.data.find("tokenizers/punkt")
	except LookupError:
	nltk.download("punkt")

	ensure_nltk()
	nlp = ensure_spacy()

	# =========================
	# 1) Models (cached)
	# =========================
	sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
	bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

	emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
	emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
	emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

	# =========================
	# 2) GNH definitions
	# =========================
	GNH_DOMAINS: Dict[str, str] = {
	"Mental Wellness": "mental health, emotional clarity, peace of mind",
	"Social Wellness": "relationships, community, friendship, social harmony",
	"Economic Wellness": "income, savings, financial stability, cost of living",
	"Workplace Wellness": "career, work-life balance, promotion, productivity",
	"Physical Wellness": "physical health, sleep, fitness, exercise",
	"Environmental Wellness": "green space, nature, environmental care",
	"Health": "healthcare, medical care, recovery, well-being",
	"Education Value": "learning, education, school, knowledge, wisdom",
	"Good Governance": "freedom, justice, fairness, democratic participation",
	"Living Standards": "housing, wealth, basic needs, affordability",
	"Cultural Diversity": "tradition, language, cultural expression, heritage",
	"Political Wellness": "rights, law, free speech, civic participation",
	"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
	}

	GNH_COLORS: Dict[str, str] = {
	"Economic Wellness": "#808080",
	"Mental Wellness": "#ffc0cb",
	"Workplace Wellness": "#ffd700",
	"Physical Wellness": "#f5deb3",
	"Social Wellness": "#ffa500",
	"Political Wellness": "#ffffff",
	"Environmental Wellness": "#87ceeb",
	"Ecological Diversity": "#228B22",
	"Health": "#ff6347",
	"Good Governance": "#000000",
	"Education Value": "#8b4513",
	"Living Standards": "#ffff00",
	"Cultural Diversity": "#9370db",
	}

	# =========================
	# 3) Pathways (CSV + images)
	# =========================
	CSV_PATH = "la matrice plus.csv"

	# UI label → internal key
	SEQUENCE_ALIASES = {
	"Auto (recommend)": "auto",
	"Direct": "direct",
	"Fem": "feminine",
	"Knot": "knot",
	"Masc": "masculine",
	"Pain": "pain",
	"Prayer": "prayer",
	"Precise": "precise",
	"Practical": "practical",
	"Plot": "plot",
	"Spiritual": "spiritual",
	"Sad": "sad"
	}

	SEQUENCE_IMAGE_FILES = {
	"direct": "direct pathway.png",
	"feminine": "fem pathway.png",
	"knot": "knot pathway.png",
	"masc": "masc pathway.png",
	"pain": "pain pathway.png",
	"prayer": "prayer pathway.png",
	"precise": "precise pathway.png",
	"practical": "practical pathway.png",
	"plot": "plot pathway.png",
	"spiritual": "spiritual pathway.png",
	"sad": "sad pathway.png"
	}

	# ---- load pathway phrases + colors (many-to-many) ----
	def load_pathway_info(csv_path: str):
	df = pd.read_csv(csv_path)
	keys_we_know = set(SEQUENCE_ALIASES.values()) - {"auto"}
	rows = df[df["color"].astype(str).str.lower().isin(keys_we_know)].copy()

	phrases: Dict[str, str] = {}
	seq_to_colors: Dict[str, List[str]] = {}
	color_to_seqs: Dict[str, List[str]] = {}

	# columns to stitch into a phrase (all except color/r/g/b)
	cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")]

	for _, row in rows.iterrows():
	key = str(row["color"]).strip().lower()

	# parse colors list from column 'r' (e.g., "red, orange")
	colors_field = str(row.get("r", "") or "")
	colors = [c.strip().lower() for c in re.split(r"[,\s]+", colors_field) if c.strip()]
	colors = list(dict.fromkeys(colors)) # dedupe, keep order
	seq_to_colors[key] = colors

	for c in colors:
	color_to_seqs.setdefault(c, [])
	if key not in color_to_seqs[c]:
	color_to_seqs[c].append(key)

	# phrase: join all non-null from the other columns (keeps "let's ..." fragments etc.)
	vals = []
	for c in cols_for_phrase:
	v = row.get(c)
	if pd.notna(v):
	vs = str(v).strip()
	if vs and vs.lower() != "nan":
	vals.append(vs)
	phrase = " ".join(vals)
	phrase = " ".join(phrase.split())
	phrases[key] = phrase

	# color vocab for parsing "red-pathway" in text
	color_vocab = sorted(color_to_seqs.keys())
	return phrases, seq_to_colors, color_to_seqs, color_vocab

	PATHWAY_PHRASES, SEQ_TO_COLORS, COLOR_TO_SEQS, COLOR_VOCAB = load_pathway_info(CSV_PATH)

	def sequence_to_image_path(seq_key: str) -> str \| None:
	fname = SEQUENCE_IMAGE_FILES.get(seq_key)
	return fname if (fname and os.path.exists(fname)) else None

	# =========================
	# 4) Scoring
	# =========================
	def classify_emotion(text: str) -> Tuple[str, float]:
	inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
	with torch.no_grad():
	logits = emotion_model(**inputs).logits
	probs = F.softmax(logits, dim=1).squeeze()
	labels = emotion_model.config.id2label
	idx = int(torch.argmax(probs).item())
	return labels[idx], float(probs[idx].item())

	def score_sentiment(text: str) -> float:
	out = bert_sentiment(text[:512])[0]
	label, score = out["label"], out["score"]
	scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
	return round(min(10, max(1, scaled)), 2)

	def score_accomplishment(text: str) -> float:
	doc = nlp(text)
	score = 5.0
	key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away", "returned", "return"}
	for token in doc:
	if token.text.lower() in key_phrases:
	score += 1.5
	if token.tag_ in {"VBD", "VBN"}:
	score += 0.5
	return round(min(10, max(1, score)), 2)

	# =========================
	# 5) Pathway-aware vector math
	# =========================
	def encode_text(t: str):
	return sbert_model.encode(t, convert_to_tensor=True)

	def composite_vector(
	base_text: str,
	boost_terms: List[str],
	boost_seq_keys: List[str],
	limit_seq_keys: List[str],
	boost_w: float = 0.6,
	limit_w: float = 0.6,
	):
	v = encode_text(base_text)

	for term in boost_terms:
	t = term.strip()
	if t:
	v = v + boost_w * encode_text(t)

	for key in boost_seq_keys:
	phrase = PATHWAY_PHRASES.get(key, "")
	if phrase:
	v = v + boost_w * encode_text(phrase)

	for key in limit_seq_keys:
	phrase = PATHWAY_PHRASES.get(key, "")
	if phrase:
	v = v - limit_w * encode_text(phrase)

	return v

	def best_sequence_for_vector(vec) -> Tuple[str, float]:
	best_key, best_sim = None, -1.0
	for key, phrase in PATHWAY_PHRASES.items():
	if not phrase:
	continue
	sim = float(util.cos_sim(vec, encode_text(phrase)).item())
	if sim > best_sim:
	best_key, best_sim = key, sim
	return best_key or "direct", best_sim

	def semantic_indicator_mapping_from_vec(vec, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
	out: Dict[str, float] = {}
	for label, desc in GNH_DOMAINS.items():
	desc_vec = encode_text(desc)
	sim = float(util.cos_sim(vec, desc_vec).item())
	sim = max(0.0, min(1.0, sim))
	blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
	out[label] = round(blended, 3)
	return dict(sorted(out.items(), key=lambda kv: -kv[1]))

	# =========================
	# 6) Color cues from free text (many-to-many)
	# =========================
	_COLOR_RE = re.compile(r"\b(" + "\|".join(map(re.escape, COLOR_VOCAB)) + r")\s(?:\-?\spathway)?\b", re.I)
	_LIMIT_CUES = {"limit", "reduce", "lessen", "avoid", "diminish", "lower", "constrain", "suppress"}

	def infer_color_directives(text: str) -> Tuple[List[str], List[str]]:
	"""
	Parse '... limit ... red-pathway ...' → limit 'red'
	otherwise treat mentioned colors as boost.
	Returns (boost_colors, limit_colors) as lists of color strings.
	"""
	tokens = re.findall(r"\w+\|\S", text.lower())
	idxs = []
	for m in _COLOR_RE.finditer(text):
	start = m.start()
	# find token index closest to this span
	char_count = 0
	tok_index = 0
	for i, tok in enumerate(tokens):
	char_count += len(tok) + 1 # crude but ok
	if char_count > start:
	tok_index = i
	break
	idxs.append((tok_index, m.group(1).lower()))

	boost_colors, limit_colors = [], []
	for idx, col in idxs:
	# look back a small window for a limit cue
	window = tokens[max(0, idx-4):idx]
	if any(w in _LIMIT_CUES for w in window):
	limit_colors.append(col)
	else:
	boost_colors.append(col)
	# dedupe
	boost_colors = list(dict.fromkeys(boost_colors))
	limit_colors = list(dict.fromkeys(limit_colors))
	return boost_colors, limit_colors

	def colors_to_seq_keys(colors: List[str]) -> List[str]:
	keys: List[str] = []
	for c in colors:
	for k in COLOR_TO_SEQS.get(c, []):
	if k not in keys:
	keys.append(k)
	return keys

	# =========================
	# 7) Plot helper
	# =========================
	def indicators_plot(indicators: Dict[str, float]):
	labels = list(indicators.keys())
	values = list(indicators.values())
	colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
	fig = plt.figure(figsize=(8, 5))
	plt.barh(labels, values, color=colors)
	plt.gca().invert_yaxis()
	plt.title("GNH Indicator Similarity (Pathway-weighted)")
	plt.xlabel("Score")
	plt.tight_layout()
	return fig

	# =========================
	# 8) Gradio app
	# =========================
	SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
	SEQ_MULTI_CHOICES = [k for k in SEQUENCE_ALIASES.keys() if k != "Auto (recommend)"]

	def normalize_seq_keys(ui_labels: List[str]) -> List[str]:
	keys = []
	for lab in ui_labels:
	k = SEQUENCE_ALIASES.get(lab, lab).lower()
	keys.append(k)
	return keys

	def analyze(
	text: str,
	seq_choice: str,
	boost_terms_raw: str,
	boost_seq_labels: List[str],
	limit_seq_labels: List[str],
	boost_w: float,
	limit_w: float,
	):
	if not text or not text.strip():
	return (5.0, "neutral (0.0)", 5.0, "—", "—", "{}", None, None)

	# 1) scores
	sentiment = score_sentiment(text)
	emotion, emo_conf = classify_emotion(text)
	accomplishment = score_accomplishment(text)

	# 2) UI selections
	boost_seqs_user = normalize_seq_keys(boost_seq_labels)
	limit_seqs_user = normalize_seq_keys(limit_seq_labels)

	# 3) parse boosts/limits
	boost_terms = [t.strip() for t in boost_terms_raw.split(",")] if boost_terms_raw else []

	# --- NEW: Color cues from text (many-to-many) ---
	boost_colors, limit_colors = infer_color_directives(text)
	boost_seqs_from_colors = colors_to_seq_keys(boost_colors)
	limit_seqs_from_colors = colors_to_seq_keys(limit_colors)

	# combine lists (dedupe preserving order)
	def _merge(a: List[str], b: List[str]) -> List[str]:
	out = list(a)
	for x in b:
	if x not in out:
	out.append(x)
	return out

	boost_seq_keys = _merge(boost_seqs_user, boost_seqs_from_colors)
	limit_seq_keys = _merge(limit_seqs_user, limit_seqs_from_colors)

	# 4) build context vector
	context_vec = composite_vector(
	base_text=text,
	boost_terms=boost_terms,
	boost_seq_keys=boost_seq_keys,
	limit_seq_keys=limit_seq_keys,
	boost_w=boost_w,
	limit_w=limit_w,
	)

	# 5) choose pathway (Auto or specific)
	chosen_key = SEQUENCE_ALIASES.get(seq_choice, "auto")
	if chosen_key == "auto":
	final_key, final_sim = best_sequence_for_vector(context_vec)
	else:
	final_key = chosen_key
	phrase_for_final = PATHWAY_PHRASES.get(final_key, "")
	final_sim = float(util.cos_sim(context_vec, encode_text(phrase_for_final)).item()) if phrase_for_final else 0.0

	# 6) outputs
	phrase = PATHWAY_PHRASES.get(final_key, "—")
	img_path = sequence_to_image_path(final_key)

	indicators = semantic_indicator_mapping_from_vec(context_vec, sentiment_score=sentiment)
	fig = indicators_plot(indicators)

	top5 = list(indicators.items())[:5]
	top5_str = "\n".join(f"{k}: {v}" for k, v in top5)

	# annotated meta
	emo_str = f"{emotion} ({emo_conf:.3f})"
	meta = f"{final_key} (relevance={final_sim:.3f})"
	# show how color cues mapped
	if boost_colors or limit_colors:
	meta += f" \| boost colors: {', '.join(boost_colors) or '—'} → {', '.join(boost_seqs_from_colors) or '—'}"
	meta += f" \| limit colors: {', '.join(limit_colors) or '—'} → {', '.join(limit_seqs_from_colors) or '—'}"

	return (
	sentiment, # number
	emo_str, # text
	accomplishment, # number
	meta, # chosen pathway + relevance + color cue mapping
	phrase, # pathway phrase
	top5_str, # GNH top5
	fig, # plot
	img_path, # image path (optional)
	)

	with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
	gr.Markdown("## RGB Root Matriz Color Plotter\n"
	"Type a phrase. Choose a Sequence or keep Auto to recommend a pathway. "
	"You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")

	with gr.Row():
	inp = gr.Textbox(
	lines=4,
	label="Input text",
	placeholder="e.g., use gratitude from a return and inspiration from clarity to limit from red-pathway the pain from orange-pathway."
	)

	with gr.Row():
	seq = gr.Dropdown(choices=SEQ_CHOICES, value="Auto (recommend)", label="Primary Pathway")

	with gr.Row():
	boost_terms = gr.Textbox(label="Boost terms (comma-separated)", placeholder="gratitude, inspiration, clarity")
	with gr.Row():
	boost_seqs = gr.CheckboxGroup(choices=[c for c in SEQ_CHOICES if c != "Auto (recommend)"],
	label="Boost sequences (optional)")
	limit_seqs = gr.CheckboxGroup(choices=[c for c in SEQ_CHOICES if c != "Auto (recommend)"],
	label="Limit sequences (optional)")

	with gr.Row():
	boost_w = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Boost weight")
	limit_w = gr.Slider(0.0, 1.5, value=0.6, step=0.05, label="Limit weight")

	btn = gr.Button("Analyze", variant="primary")

	with gr.Row():
	sent = gr.Number(label="Sentiment (1–10)")
	emo = gr.Text(label="Emotion")
	acc = gr.Number(label="Accomplishment (1–10)")

	with gr.Row():
	chosen = gr.Text(label="Chosen pathway (relevance + color mapping)")
	phrase_out = gr.Text(label="Pathway phrase")

	with gr.Row():
	gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")
	gnh_plot = gr.Plot(label="GNH Similarity (Pathway-weighted)")

	with gr.Row():
	pathway_img = gr.Image(label="Pathway image", type="filepath")

	btn.click(
	fn=analyze,
	inputs=[inp, seq, boost_terms, boost_seqs, limit_seqs, boost_w, limit_w],
	outputs=[sent, emo, acc, chosen, phrase_out, gnh_top, gnh_plot, pathway_img]
	)

	if __name__ == "__main__":
	demo.launch()