Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Running

App Files Files Community

RGB-Root-Matriz-Color-Plotter / app.py

daniellegauthier

Update app.py

92b24cb verified 3 months ago

raw

history blame

16.2 kB

	import os, re
	from typing import Dict, Tuple, List

	import nltk, spacy, torch, pandas as pd, matplotlib.pyplot as plt
	import torch.nn.functional as F
	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from sentence_transformers import SentenceTransformer, util

	# ---------- lightweight setup ----------
	def ensure_spacy():
	try:
	return spacy.load("en_core_web_sm")
	except Exception:
	import spacy.cli
	spacy.cli.download("en_core_web_sm")
	return spacy.load("en_core_web_sm")

	def ensure_nltk():
	try:
	nltk.data.find("tokenizers/punkt")
	except LookupError:
	nltk.download("punkt")

	ensure_nltk()
	nlp = ensure_spacy()

	# ---------- models ----------
	sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
	bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
	emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
	emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
	emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

	# ---------- constants ----------
	CSV_PATH_PLUS = "la matrice plus.csv" # pathways + colors + phrase parts
	CSV_PATH_COLOR = "la matrice.csv" # color lexicon

	SEQUENCE_ALIASES = {
	"Direct": "direct",
	"Fem": "feminine",
	"Knot": "knot",
	"Masc": "masc",
	"Pain": "pain",
	"Prayer": "prayer",
	"Precise": "precise",
	"Practical": "practical",
	"Plot": "plot",
	"Spiritual": "spiritual",
	"Sad": "sad",
	}


	SEQUENCE_IMAGE_FILES = {
	"direct": "direct pathway.png",
	"feminine": "fem pathway.png",
	"knot": "knot pathway.png",
	"masc": "masc pathway.png",
	"pain": "pain pathway.png",
	"prayer": "prayer pathway.png",
	"precise": "precise pathway.png",
	"practical": "practical pathway.png",
	"plot": "plot pathway.png",
	"spiritual": "spiritual pathway.png",
	"sad": "sad pathway.png"
	}

	# GNH dictionaries
	GNH_DOMAINS: Dict[str, str] = {
	"Mental Wellness": "mental health, emotional clarity, peace of mind",
	"Social Wellness": "relationships, community, friendship, social harmony",
	"Economic Wellness": "income, savings, financial stability, cost of living",
	"Workplace Wellness": "career, work-life balance, promotion, productivity",
	"Physical Wellness": "physical health, sleep, fitness, exercise",
	"Environmental Wellness": "green space, nature, environmental care",
	"Health": "healthcare, medical care, recovery, well-being",
	"Education Value": "learning, education, school, knowledge, wisdom",
	"Good Governance": "freedom, justice, fairness, democratic participation",
	"Living Standards": "housing, wealth, basic needs, affordability",
	"Cultural Diversity": "tradition, language, cultural expression, heritage",
	"Political Wellness": "rights, law, free speech, civic participation",
	"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
	}

	GNH_COLORS: Dict[str, str] = {
	"Economic Wellness": "#808080",
	"Mental Wellness": "#ffc0cb",
	"Workplace Wellness": "#ffd700",
	"Physical Wellness": "#f5deb3",
	"Social Wellness": "#ffa500",
	"Political Wellness": "#ffffff",
	"Environmental Wellness": "#87ceeb",
	"Ecological Diversity": "#228B22",
	"Health": "#ff6347",
	"Good Governance": "#000000",
	"Education Value": "#8b4513",
	"Living Standards": "#ffff00",
	"Cultural Diversity": "#9370db",
	}

	# ---------- load pathway → colors & phrase ----------
	def load_pathway_info(csv_path_plus: str):
	df = pd.read_csv(csv_path_plus)
	keys = set(SEQUENCE_ALIASES.values())
	rows = df[df["color"].astype(str).str.lower().isin(keys)].copy()

	seq_to_colors: Dict[str, List[str]] = {}
	seq_phrase: Dict[str, str] = {}

	cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")]
	for _, row in rows.iterrows():
	key = str(row["color"]).strip().lower()
	# colors in 'r' as comma/space separated list (supports 2–8)
	colors_field = str(row.get("r", "") or "")
	colors = [c.strip().lower() for c in re.split(r"[,\s]+", colors_field) if c.strip()]
	seq_to_colors[key] = list(dict.fromkeys(colors)) # dedupe

	vals = []
	for c in cols_for_phrase:
	v = row.get(c)
	if pd.notna(v):
	vs = str(v).strip()
	if vs and vs.lower() != "nan":
	vals.append(vs)
	phrase = " ".join(" ".join(vals).split())
	seq_phrase[key] = phrase

	return seq_to_colors, seq_phrase

	SEQ_TO_COLORS, SEQ_PHRASE = load_pathway_info(CSV_PATH_PLUS)

	# ---------- load color lexicon ----------
	def _find_col(df: pd.DataFrame, candidates: List[str]) -> str \| None:
	names = {c.lower(): c for c in df.columns}
	for c in candidates:
	if c.lower() in names:
	return names[c.lower()]
	for want in candidates:
	for lc, orig in names.items():
	if want.replace(" ", "").replace("-", "") in lc.replace(" ", "").replace("-", ""):
	return orig
	return None

	def _split_words(s: str) -> List[str]:
	if not isinstance(s, str): return []
	parts = re.split(r"[,\;/\\|\s]+", s.strip())
	return [p for p in (w.strip().lower() for w in parts) if p]

	def load_color_lexicon(csv_path_color: str):
	df = pd.read_csv(csv_path_color)
	color_col = _find_col(df, ["color", "colour"])
	m1_col = _find_col(df, ["matrice1", "matrice 1"])
	m_col = _find_col(df, ["matrice"])
	en_col = _find_col(df, ["english-words-code", "english words code", "english_words_code", "english"])

	lex: Dict[str, Dict[str, List[str]]] = {}
	for _, row in df.iterrows():
	cname = str(row.get(color_col, "")).strip().lower()
	if not cname: continue
	lex[cname] = {
	"matrice1": _split_words(str(row.get(m1_col, ""))),
	"matrice": _split_words(str(row.get(m_col, ""))),
	"english": _split_words(str(row.get(en_col, ""))),
	}
	return lex

	COLOR_LEX = load_color_lexicon(CSV_PATH_COLOR)

	def sequence_to_image_path(seq_key: str) -> str \| None:
	fname = SEQUENCE_IMAGE_FILES.get(seq_key)
	return fname if (fname and os.path.exists(fname)) else None

	# ---------- core scoring ----------
	def encode_text(t: str):
	return sbert_model.encode(t, convert_to_tensor=True)

	def classify_emotion(text: str) -> Tuple[str, float]:
	inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
	with torch.no_grad():
	logits = emotion_model(**inputs).logits
	probs = F.softmax(logits, dim=1).squeeze()
	labels = emotion_model.config.id2label
	idx = int(torch.argmax(probs).item())
	return labels[idx], float(probs[idx].item())

	def score_sentiment(text: str) -> float:
	out = bert_sentiment(text[:512])[0]
	label, score = out["label"], out["score"]
	scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
	return round(min(10, max(1, scaled)), 2)

	def score_accomplishment(text: str) -> float:
	doc = nlp(text); score = 5.0
	key_phrases = {"finally","told","decided","quit","refused","stood","walked","walked away","returned","return"}
	for token in doc:
	if token.text.lower() in key_phrases: score += 1.5
	if token.tag_ in {"VBD","VBN"}: score += 0.5
	return round(min(10, max(1, score)), 2)

	def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
	v = encode_text(text)
	out: Dict[str, float] = {}
	for dom, desc in GNH_DOMAINS.items():
	sim = float(util.cos_sim(v, encode_text(desc)).item())
	sim = max(0.0, min(1.0, sim))
	blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
	out[dom] = round(blended, 3)
	return dict(sorted(out.items(), key=lambda kv: -kv[1]))

	def indicators_plot(indicators: Dict[str, float]):
	labels = list(indicators.keys()); values = list(indicators.values())
	colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
	fig = plt.figure(figsize=(8,5))
	plt.barh(labels, values, color=colors)
	plt.gca().invert_yaxis()
	plt.title("GNH Indicator Similarity")
	plt.xlabel("Score")
	plt.tight_layout()
	return fig

	# ---------- chips / prompts ----------
	WORD_MODES = ["Matrice1", "Matrice", "English", "GNH Indicators"]

	def join_lex_words(color: str) -> str:
	d = COLOR_LEX.get(color.lower(), {})
	words = d.get("matrice1", []) + d.get("matrice", []) + d.get("english", [])
	return " ".join(dict.fromkeys(words))

	def nearest_gnh_domain_for_color(color: str) -> Tuple[str, float]:
	text = join_lex_words(color)
	if not text:
	return "Mental Wellness", 0.0
	v = encode_text(text)
	best, best_sim = None, -1.0
	for dom, desc in GNH_DOMAINS.items():
	sim = float(util.cos_sim(v, encode_text(desc)).item())
	if sim > best_sim:
	best, best_sim = dom, sim
	return best or "Mental Wellness", best_sim

	def chip_html_for(color: str, mode: str, max_words: int = 4) -> str:
	if not color: return ""
	if mode.lower().startswith("gnh"):
	domain, sim = nearest_gnh_domain_for_color(color)
	hex_color = GNH_COLORS.get(domain, "#cccccc")
	dot = f"<span style='display:inline-block;width:12px;height:12px;border-radius:50%;background:{hex_color};margin-right:6px;border:1px solid #999;vertical-align:middle'></span>"
	pill = f"<span style='display:inline-block;margin:2px 6px;padding:2px 8px;border-radius:12px;background:#eee;font-size:12px'>{domain} · {sim:.2f}</span>"
	return f"<div style='margin-bottom:6px'>{dot}<b>{color.capitalize()}</b>{pill}</div>"
	# lexicon modes
	key = "english" if mode.lower() == "english" else ("matrice1" if mode.lower()=="matrice1" else "matrice")
	words = COLOR_LEX.get(color.lower(), {}).get(key, [])[:max_words]
	pills = "".join(
	f"<span style='display:inline-block;margin:2px 6px;padding:2px 8px;border-radius:12px;background:#eee;font-size:12px'>{w}</span>"
	for w in words
	)
	dot = f"<span style='display:inline-block;width:12px;height:12px;border-radius:50%;background:{color};margin-right:6px;border:1px solid #999;vertical-align:middle'></span>"
	return f"<div style='margin-bottom:6px'>{dot}<b>{color.capitalize()}</b>{pills}</div>"

	def colors_for_sequence(seq_key: str) -> List[str]:
	return SEQ_TO_COLORS.get(seq_key, []) # 2–8 colors

	def labels_for_mode(colors: List[str], mode: str) -> List[str]:
	if mode.lower().startswith("gnh"):
	labs = []
	for c in colors:
	d, _ = nearest_gnh_domain_for_color(c)
	labs.append(d)
	return labs
	return [c.capitalize() for c in colors]

	# ---------- dynamic prompt UI (2–8 inputs) ----------
	MAX_COLORS = 8 # upper bound; raise if some pathways exceed this

	def update_prompt_ui(seq_choice: str, word_mode: str):
	key = SEQUENCE_ALIASES.get(seq_choice)
	colors = colors_for_sequence(key)
	labels = labels_for_mode(colors, word_mode)

	chips = "".join(chip_html_for(c, word_mode) for c in colors) or "No prompts available for this pathway."

	# Return updates for chips + each color input (visibility, label, placeholder)
	inputs_updates = []
	for i in range(MAX_COLORS):
	if i < len(colors):
	lab = labels[i] if i < len(labels) else f"Input {i+1}"
	ph = f"Describe {lab} meaning..." if lab else "—"
	inputs_updates.append(gr.update(visible=True, label=f"{lab} meaning", placeholder=ph, value=""))
	else:
	inputs_updates.append(gr.update(visible=False, value="", label=f"Input {i+1}", placeholder="—"))
	return (chips, *inputs_updates)

	# ---------- MAIN ANALYSIS ----------
	def analyze(text: str, seq_choice: str, word_mode: str, *color_inputs):
	"""
	- user chooses pathway
	- show N color prompts (2–8)
	- compose updated pathway phrase embedding all inputs
	- analyze sentiment/emotion + GNH on (text + updated phrase)
	"""
	key = SEQUENCE_ALIASES.get(seq_choice)
	if key not in SEQ_PHRASE:
	return (5.0, "neutral (0.0)", 5.0, "Please choose a valid pathway.", "{}", None, None,
	f"{seq_choice} (unavailable)", *update_prompt_ui(seq_choice, word_mode))

	sentiment = score_sentiment(text or "")
	emotion, emo_conf = classify_emotion(text or "")
	accomplishment = score_accomplishment(text or "")

	colors = colors_for_sequence(key)
	labels = labels_for_mode(colors, word_mode)

	# Updated phrase = base phrase + each "{Label}: {input}"
	base_phrase = SEQ_PHRASE.get(key, "")
	pieces = [base_phrase]
	for lab, user_text in zip(labels, list(color_inputs)[:len(colors)]):
	if isinstance(user_text, str) and user_text.strip():
	pieces.append(f"{lab}: {user_text.strip()}")
	updated_phrase = " // ".join([p for p in pieces if p])

	augmented_text = " ".join([t for t in [text, updated_phrase] if t and t.strip()])
	indicators = semantic_indicator_mapping(augmented_text, sentiment_score=sentiment)
	fig = indicators_plot(indicators)
	top5 = list(indicators.items())[:5]
	top5_str = "\n".join(f"{k}: {v}" for k, v in top5)

	cols = SEQ_TO_COLORS.get(key, [])
	emo_str = f"{emotion} ({emo_conf:.3f})"
	meta = f"{key} \| colors: {', '.join(cols) if cols else '—'}"
	img_path = sequence_to_image_path(key)

	# keep UI prompts in sync after run
	chips_and_inputs = update_prompt_ui(seq_choice, word_mode)

	return (
	sentiment, emo_str, accomplishment,
	updated_phrase, top5_str, fig, img_path, meta,
	*chips_and_inputs
	)

	# ---------- Gradio UI ----------
	SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
	DEFAULT_SEQ = "Direct" if "Direct" in SEQ_CHOICES else SEQ_CHOICES[0]

	with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
	gr.Markdown("## RGB Root Matriz Color Plotter\n"
	"Type a phrase. Choose a Sequence or keep Auto to recommend a pathway. "
	"You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")

	with gr.Row():
	inp = gr.Textbox(lines=4, label="Your situation / obstacle", placeholder="Describe the situation...")

	with gr.Row():
	seq = gr.Dropdown(choices=SEQ_CHOICES, value=DEFAULT_SEQ, label="Pathway")
	word_mode = gr.Radio(choices=WORD_MODES, value="Matrice1", label="Word Mode")

	chips_block = gr.HTML() # chips for all colors

	# up to MAX_COLORS inputs (shown/hidden dynamically)
	color_inputs = []
	for i in range(MAX_COLORS):
	tb = gr.Textbox(visible=False, label=f"Input {i+1}", placeholder="—")
	color_inputs.append(tb)

	run = gr.Button("Generate Pathway Analysis", variant="primary")

	# outputs
	with gr.Row():
	sent = gr.Number(label="Sentiment (1–10)")
	emo = gr.Text(label="Emotion")
	acc = gr.Number(label="Accomplishment (1–10)")

	with gr.Row():
	phrase_out = gr.Text(label="Updated Pathway Phrase (with your meanings)")
	gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")

	gnh_plot = gr.Plot(label="GNH Similarity")
	img_out = gr.Image(label="Pathway image", type="filepath")
	meta_out = gr.Text(label="Chosen pathway / colors")

	# events
	def _update_ui(seq_choice, mode):
	return update_prompt_ui(seq_choice, mode)

	seq.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[chips_block, *color_inputs])
	word_mode.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[chips_block, *color_inputs])

	run.click(
	fn=analyze,
	inputs=[inp, seq, word_mode, *color_inputs],
	outputs=[sent, emo, acc, phrase_out, gnh_top, gnh_plot, img_out, meta_out, chips_block, *color_inputs],
	)

	# ✅ initialize prompts on load (instead of tb.update(...))
	demo.load(
	fn=_update_ui,
	inputs=[seq, word_mode],
	outputs=[chips_block, *color_inputs]
	)

	if __name__ == "__main__":
	demo.launch()