Spaces:

daniellegauthier
/

RGB-Root-Matriz-Color-Plotter

Sleeping

App Files Files Community

RGB-Root-Matriz-Color-Plotter / app.py

daniellegauthier

Update app.py

dd6720a verified 2 months ago

raw

history blame

6.61 kB

	import os
	import nltk
	import spacy
	import torch
	import matplotlib.pyplot as plt
	import io
	from typing import Tuple, Dict

	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	from sentence_transformers import SentenceTransformer, util
	import torch.nn.functional as F

	# --------- lightweight setup helpers ---------
	def ensure_spacy():
	try:
	return spacy.load("en_core_web_sm")
	except Exception:
	import spacy.cli
	spacy.cli.download("en_core_web_sm")
	return spacy.load("en_core_web_sm")

	def ensure_nltk():
	try:
	nltk.data.find("tokenizers/punkt")
	except LookupError:
	nltk.download("punkt")

	# --------- load resources once (cached) ---------
	ensure_nltk()
	nlp = ensure_spacy()

	sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
	bert_sentiment = pipeline(
	"sentiment-analysis",
	model="distilbert-base-uncased-finetuned-sst-2-english"
	)

	emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
	emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
	emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

	# --------- domain definitions & colors ---------
	GNH_DOMAINS: Dict[str, str] = {
	"Mental Wellness": "mental health, emotional clarity, peace of mind",
	"Social Wellness": "relationships, community, friendship, social harmony",
	"Economic Wellness": "income, savings, financial stability, cost of living",
	"Workplace Wellness": "career, work-life balance, promotion, productivity",
	"Physical Wellness": "physical health, sleep, fitness, exercise",
	"Environmental Wellness": "green space, nature, environmental care",
	"Health": "healthcare, medical care, recovery, well-being",
	"Education Value": "learning, education, school, knowledge, wisdom",
	"Good Governance": "freedom, justice, fairness, democratic participation",
	"Living Standards": "housing, wealth, basic needs, affordability",
	"Cultural Diversity": "tradition, language, cultural expression, heritage",
	"Political Wellness": "rights, law, free speech, civic participation",
	"Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
	}

	GNH_COLORS: Dict[str, str] = {
	"Economic Wellness": "#808080",
	"Mental Wellness": "#ffc0cb",
	"Workplace Wellness": "#ffd700",
	"Physical Wellness": "#f5deb3",
	"Social Wellness": "#ffa500",
	"Political Wellness": "#ffffff",
	"Environmental Wellness": "#87ceeb",
	"Ecological Diversity": "#228B22",
	"Health": "#ff6347",
	"Good Governance": "#000000",
	"Education Value": "#8b4513",
	"Living Standards": "#ffff00",
	"Cultural Diversity": "#9370db",
	}

	# --------- core scoring functions ---------
	def classify_emotion(text: str) -> Tuple[str, float]:
	inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
	with torch.no_grad():
	logits = emotion_model(**inputs).logits
	probs = F.softmax(logits, dim=1).squeeze()
	labels = emotion_model.config.id2label
	top_idx = torch.argmax(probs).item()
	return labels[top_idx], float(probs[top_idx].item())

	def score_sentiment(text: str) -> float:
	"""
	BERT sentiment → scale to [1..10]
	POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
	"""
	out = bert_sentiment(text[:512])[0]
	label, score = out["label"], out["score"]
	if label == "POSITIVE":
	scaled = 5 + 5 * score
	else:
	scaled = 1 + 4 * (1 - score)
	return round(max(1, min(10, scaled)), 2)

	def score_accomplishment(text: str) -> float:
	doc = nlp(text)
	score = 5.0
	key_phrases = {"finally", "told", "decided", "quit", "refused", "stood", "walked", "walked away"}
	for token in doc:
	if token.text.lower() in key_phrases:
	score += 1.5
	if token.tag_ in {"VBD", "VBN"}: # past tense / participle
	score += 0.5
	return round(max(1, min(10, score)), 2)

	def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
	"""
	SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
	"""
	text_vec = sbert_model.encode(text, convert_to_tensor=True)
	out: Dict[str, float] = {}
	for label, desc in GNH_DOMAINS.items():
	desc_vec = sbert_model.encode(desc, convert_to_tensor=True)
	sim = float(util.cos_sim(text_vec, desc_vec).item())
	sim = max(0.0, min(1.0, sim))
	blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
	out[label] = round(blended, 3)
	return dict(sorted(out.items(), key=lambda kv: -kv[1]))

	# --------- plotting helper ---------
	def indicators_plot(indicators: Dict[str, float]):
	labels = list(indicators.keys())
	values = list(indicators.values())
	colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]

	fig = plt.figure(figsize=(8, 5))
	plt.barh(labels, values, color=colors)
	plt.gca().invert_yaxis()
	plt.title("GNH Indicator Similarity (Sentiment-weighted)")
	plt.xlabel("Score")
	plt.tight_layout()
	return fig

	# --------- Gradio app ---------
	def analyze(text: str):
	if not text or not text.strip():
	return 5.0, "neutral (0.0)", "[]", None, 5.0
	sentiment = score_sentiment(text)
	emotion, emo_conf = classify_emotion(text)
	accomplishment = score_accomplishment(text)
	indicators = semantic_indicator_mapping(text, sentiment)

	top5 = list(indicators.items())[:5]
	top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
	fig = indicators_plot(indicators)

	return (
	sentiment,
	f"{emotion} ({emo_conf:.3f})",
	top5_str,
	fig,
	accomplishment,
	)

	with gr.Blocks(title="La Matriz — GNH Analyzer") as demo:
	gr.Markdown("# La Matriz — BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
	with gr.Row():
	inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
	with gr.Row():
	btn = gr.Button("Analyze", variant="primary")
	with gr.Row():
	sent = gr.Number(label="Sentiment (1–10)")
	emo = gr.Text(label="Emotion")
	acc = gr.Number(label="Accomplishment (1–10)")
	with gr.Row():
	top = gr.Text(label="Top GNH Indicators")
	with gr.Row():
	plot = gr.Plot(label="GNH Similarity")

	btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])

	if __name__ == "__main__":
	demo.launch()