daniellegauthier commited on
Commit
617928b
·
verified ·
1 Parent(s): e180fb2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -38
app.py CHANGED
@@ -1,17 +1,26 @@
1
  import os
 
 
 
2
  import nltk
3
  import spacy
4
  import torch
5
  import matplotlib.pyplot as plt
6
- import io
7
- from typing import Tuple, Dict
8
-
9
  import gradio as gr
10
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
 
 
 
 
 
11
  from sentence_transformers import SentenceTransformer, util
12
- import torch.nn.functional as F
13
 
14
- # --------- lightweight setup helpers ---------
 
 
15
  def ensure_spacy():
16
  try:
17
  return spacy.load("en_core_web_sm")
@@ -26,11 +35,14 @@ def ensure_nltk():
26
  except LookupError:
27
  nltk.download("punkt")
28
 
29
- # --------- load resources once (cached) ---------
30
  ensure_nltk()
31
  nlp = ensure_spacy()
32
 
 
 
 
33
  sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
 
34
  bert_sentiment = pipeline(
35
  "sentiment-analysis",
36
  model="distilbert-base-uncased-finetuned-sst-2-english"
@@ -40,7 +52,9 @@ emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
40
  emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
41
  emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
42
 
43
- # --------- domain definitions & colors ---------
 
 
44
  GNH_DOMAINS: Dict[str, str] = {
45
  "Mental Wellness": "mental health, emotional clarity, peace of mind",
46
  "Social Wellness": "relationships, community, friendship, social harmony",
@@ -54,7 +68,7 @@ GNH_DOMAINS: Dict[str, str] = {
54
  "Living Standards": "housing, wealth, basic needs, affordability",
55
  "Cultural Diversity": "tradition, language, cultural expression, heritage",
56
  "Political Wellness": "rights, law, free speech, civic participation",
57
- "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife"
58
  }
59
 
60
  GNH_COLORS: Dict[str, str] = {
@@ -73,28 +87,89 @@ GNH_COLORS: Dict[str, str] = {
73
  "Cultural Diversity": "#9370db",
74
  }
75
 
76
- # --------- core scoring functions ---------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def classify_emotion(text: str) -> Tuple[str, float]:
78
  inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
79
  with torch.no_grad():
80
  logits = emotion_model(**inputs).logits
81
  probs = F.softmax(logits, dim=1).squeeze()
82
  labels = emotion_model.config.id2label
83
- top_idx = torch.argmax(probs).item()
84
  return labels[top_idx], float(probs[top_idx].item())
85
 
86
  def score_sentiment(text: str) -> float:
87
- """
88
- BERT sentiment → scale to [1..10]
89
- POSITIVE: ~[6..10]; NEGATIVE: ~[1..5]
90
- """
91
  out = bert_sentiment(text[:512])[0]
92
  label, score = out["label"], out["score"]
93
- if label == "POSITIVE":
94
- scaled = 5 + 5 * score
95
- else:
96
- scaled = 1 + 4 * (1 - score)
97
- return round(max(1, min(10, scaled)), 2)
98
 
99
  def score_accomplishment(text: str) -> float:
100
  doc = nlp(text)
@@ -103,14 +178,11 @@ def score_accomplishment(text: str) -> float:
103
  for token in doc:
104
  if token.text.lower() in key_phrases:
105
  score += 1.5
106
- if token.tag_ in {"VBD", "VBN"}: # past tense / participle
107
  score += 0.5
108
- return round(max(1, min(10, score)), 2)
109
 
110
  def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
111
- """
112
- SBERT cosine similarity to domain descriptions, then blend with sentiment_score.
113
- """
114
  text_vec = sbert_model.encode(text, convert_to_tensor=True)
115
  out: Dict[str, float] = {}
116
  for label, desc in GNH_DOMAINS.items():
@@ -121,12 +193,42 @@ def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weig
121
  out[label] = round(blended, 3)
122
  return dict(sorted(out.items(), key=lambda kv: -kv[1]))
123
 
124
- # --------- plotting helper ---------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  def indicators_plot(indicators: Dict[str, float]):
126
  labels = list(indicators.keys())
127
  values = list(indicators.values())
128
  colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
129
-
130
  fig = plt.figure(figsize=(8, 5))
131
  plt.barh(labels, values, color=colors)
132
  plt.gca().invert_yaxis()
@@ -135,43 +237,92 @@ def indicators_plot(indicators: Dict[str, float]):
135
  plt.tight_layout()
136
  return fig
137
 
138
- # --------- Gradio app ---------
139
- def analyze(text: str):
 
 
 
 
140
  if not text or not text.strip():
141
- return 5.0, "neutral (0.0)", "[]", None, 5.0
 
 
 
 
 
 
142
  sentiment = score_sentiment(text)
143
  emotion, emo_conf = classify_emotion(text)
144
  accomplishment = score_accomplishment(text)
145
  indicators = semantic_indicator_mapping(text, sentiment)
 
 
 
 
 
 
 
 
146
 
 
147
  top5 = list(indicators.items())[:5]
148
  top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
149
- fig = indicators_plot(indicators)
150
 
151
  return (
152
  sentiment,
153
  f"{emotion} ({emo_conf:.3f})",
 
 
 
154
  top5_str,
155
  fig,
156
- accomplishment,
 
 
157
  )
158
 
159
- with gr.Blocks(title="La Matriz GNH Analyzer") as demo:
160
- gr.Markdown("# La Matriz BERT + Emotion + GNH\nType a phrase. We’ll estimate sentiment (1–10), emotion, and show related GNH domains.")
 
 
 
161
  with gr.Row():
162
  inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
163
  with gr.Row():
164
- btn = gr.Button("Analyze", variant="primary")
 
 
 
165
  with gr.Row():
166
  sent = gr.Number(label="Sentiment (1–10)")
167
  emo = gr.Text(label="Emotion")
168
  acc = gr.Number(label="Accomplishment (1–10)")
 
 
 
 
 
169
  with gr.Row():
170
- top = gr.Text(label="Top GNH Indicators")
 
 
171
  with gr.Row():
172
- plot = gr.Plot(label="GNH Similarity")
 
 
 
 
 
 
 
 
 
173
 
174
- btn.click(fn=analyze, inputs=inp, outputs=[sent, emo, top, plot, acc])
 
 
 
 
175
 
176
  if __name__ == "__main__":
177
  demo.launch()
 
1
  import os
2
+ import io
3
+ from typing import Dict, Tuple, List
4
+
5
  import nltk
6
  import spacy
7
  import torch
8
  import matplotlib.pyplot as plt
9
+ import torch.nn.functional as F
10
+ import pandas as pd
 
11
  import gradio as gr
12
+
13
+ from transformers import (
14
+ pipeline,
15
+ AutoTokenizer,
16
+ AutoModelForSequenceClassification,
17
+ )
18
+
19
  from sentence_transformers import SentenceTransformer, util
 
20
 
21
+ # =========================
22
+ # 0) Lightweight setup
23
+ # =========================
24
  def ensure_spacy():
25
  try:
26
  return spacy.load("en_core_web_sm")
 
35
  except LookupError:
36
  nltk.download("punkt")
37
 
 
38
  ensure_nltk()
39
  nlp = ensure_spacy()
40
 
41
+ # =========================
42
+ # 1) Models (cached)
43
+ # =========================
44
  sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
45
+
46
  bert_sentiment = pipeline(
47
  "sentiment-analysis",
48
  model="distilbert-base-uncased-finetuned-sst-2-english"
 
52
  emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
53
  emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)
54
 
55
+ # =========================
56
+ # 2) GNH definitions
57
+ # =========================
58
  GNH_DOMAINS: Dict[str, str] = {
59
  "Mental Wellness": "mental health, emotional clarity, peace of mind",
60
  "Social Wellness": "relationships, community, friendship, social harmony",
 
68
  "Living Standards": "housing, wealth, basic needs, affordability",
69
  "Cultural Diversity": "tradition, language, cultural expression, heritage",
70
  "Political Wellness": "rights, law, free speech, civic participation",
71
+ "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
72
  }
73
 
74
  GNH_COLORS: Dict[str, str] = {
 
87
  "Cultural Diversity": "#9370db",
88
  }
89
 
90
+ # =========================
91
+ # 3) Pathway data
92
+ # - Reads phrases from bottom of "la matrice plus.csv"
93
+ # - Maps sequence keys -> phrase & image path
94
+ # =========================
95
+ CSV_PATH = "la matrice plus.csv"
96
+
97
+ # Aliases so your UI label → CSV row & image file
98
+ SEQUENCE_ALIASES = {
99
+ "Auto (recommend)": "auto",
100
+ "Direct": "direct",
101
+ "Fem": "feminine", # CSV row is 'feminine', image is 'fem pathway.png'
102
+ "Knot": "knot",
103
+ "Masc": "masc",
104
+ "Pain": "pain",
105
+ "Prayer": "prayer",
106
+ "Precise": "precise",
107
+ "Practical": "practical",
108
+ "Plot": "plot",
109
+ # add more later (e.g., "Spiritual", "Sad") if/when images are added
110
+ }
111
+
112
+ SEQUENCE_IMAGE_FILES = {
113
+ "direct": "direct pathway.png",
114
+ "feminine": "fem pathway.png",
115
+ "knot": "knot pathway.png",
116
+ "masc": "masc pathway.png",
117
+ "pain": "pain pathway.png",
118
+ "prayer": "prayer pathway.png",
119
+ "precise": "precise pathway.png",
120
+ "practical": "practical pathway.png",
121
+ "plot": "plot pathway.png",
122
+ # add "spiritual": "...png", "sad": "...png" when you drop them in
123
+ }
124
+
125
+ def load_pathway_phrases(csv_path: str) -> Dict[str, str]:
126
+ """
127
+ Build pathway phrase text by concatenating non-null columns
128
+ from 'matrice1' onward for each sequence row at the bottom of the sheet.
129
+ """
130
+ df = pd.read_csv(csv_path)
131
+ phrases: Dict[str, str] = {}
132
+ # We consider any row whose 'color' is one of our known sequences
133
+ valid_keys = set(SEQUENCE_IMAGE_FILES.keys()) | {"spiritual", "sad"}
134
+ rows = df[df["color"].astype(str).str.lower().isin(valid_keys)].copy()
135
+
136
+ for _, row in rows.iterrows():
137
+ key = str(row["color"]).strip().lower()
138
+ # join from column index 4 onward (matrice1 .. last "Unnamed")
139
+ text = " ".join(
140
+ str(v) for v in row.iloc[4:].tolist() if pd.notna(v)
141
+ ).strip()
142
+ # clean duplicate/missing spaces
143
+ text = " ".join(text.split())
144
+ phrases[key] = text
145
+
146
+ return phrases
147
+
148
+ PATHWAY_PHRASES = load_pathway_phrases(CSV_PATH)
149
+
150
+ def sequence_to_image_path(seq_key: str) -> str | None:
151
+ fname = SEQUENCE_IMAGE_FILES.get(seq_key)
152
+ if fname and os.path.exists(fname):
153
+ return fname
154
+ return None # image optional—app will handle gracefully
155
+
156
+ # =========================
157
+ # 4) Core scoring functions
158
+ # =========================
159
  def classify_emotion(text: str) -> Tuple[str, float]:
160
  inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
161
  with torch.no_grad():
162
  logits = emotion_model(**inputs).logits
163
  probs = F.softmax(logits, dim=1).squeeze()
164
  labels = emotion_model.config.id2label
165
+ top_idx = int(torch.argmax(probs).item())
166
  return labels[top_idx], float(probs[top_idx].item())
167
 
168
  def score_sentiment(text: str) -> float:
 
 
 
 
169
  out = bert_sentiment(text[:512])[0]
170
  label, score = out["label"], out["score"]
171
+ scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
172
+ return round(min(10, max(1, scaled)), 2)
 
 
 
173
 
174
  def score_accomplishment(text: str) -> float:
175
  doc = nlp(text)
 
178
  for token in doc:
179
  if token.text.lower() in key_phrases:
180
  score += 1.5
181
+ if token.tag_ in {"VBD", "VBN"}:
182
  score += 0.5
183
+ return round(min(10, max(1, score)), 2)
184
 
185
  def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
 
 
 
186
  text_vec = sbert_model.encode(text, convert_to_tensor=True)
187
  out: Dict[str, float] = {}
188
  for label, desc in GNH_DOMAINS.items():
 
193
  out[label] = round(blended, 3)
194
  return dict(sorted(out.items(), key=lambda kv: -kv[1]))
195
 
196
+ # =========================
197
+ # 5) Pathway selection logic
198
+ # =========================
199
+ def suggest_sequence(text: str) -> Tuple[str, float]:
200
+ """
201
+ Choose the best pathway by SBERT similarity between the input text
202
+ and each pathway phrase from the CSV.
203
+ Returns (sequence_key, similarity_score).
204
+ """
205
+ if not PATHWAY_PHRASES:
206
+ return "direct", 0.0
207
+ text_vec = sbert_model.encode(text, convert_to_tensor=True)
208
+ best_key, best_sim = None, -1.0
209
+ for key, phrase in PATHWAY_PHRASES.items():
210
+ if not phrase:
211
+ continue
212
+ phrase_vec = sbert_model.encode(phrase, convert_to_tensor=True)
213
+ sim = float(util.cos_sim(text_vec, phrase_vec).item())
214
+ if sim > best_sim:
215
+ best_key, best_sim = key, sim
216
+ return (best_key or "direct"), best_sim
217
+
218
+ def pathway_payload(seq_key: str) -> Tuple[str, str | None]:
219
+ """Return (phrase, image_path) for a given sequence key."""
220
+ key = seq_key.strip().lower()
221
+ phrase = PATHWAY_PHRASES.get(key, "")
222
+ img = sequence_to_image_path(key)
223
+ return phrase, img
224
+
225
+ # =========================
226
+ # 6) Plot helper (GNH bars)
227
+ # =========================
228
  def indicators_plot(indicators: Dict[str, float]):
229
  labels = list(indicators.keys())
230
  values = list(indicators.values())
231
  colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
 
232
  fig = plt.figure(figsize=(8, 5))
233
  plt.barh(labels, values, color=colors)
234
  plt.gca().invert_yaxis()
 
237
  plt.tight_layout()
238
  return fig
239
 
240
+ # =========================
241
+ # 7) Gradio app
242
+ # =========================
243
+ SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
244
+
245
+ def analyze(text: str, seq_choice: str):
246
  if not text or not text.strip():
247
+ return (
248
+ 5.0, "neutral (0.0)", 5.0,
249
+ "—", None,
250
+ "{}", None, "—", 0.0
251
+ )
252
+
253
+ # 1) scores
254
  sentiment = score_sentiment(text)
255
  emotion, emo_conf = classify_emotion(text)
256
  accomplishment = score_accomplishment(text)
257
  indicators = semantic_indicator_mapping(text, sentiment)
258
+ fig = indicators_plot(indicators)
259
+
260
+ # 2) pathway
261
+ chosen_key = SEQUENCE_ALIASES.get(seq_choice, "auto")
262
+ auto_key, auto_sim = suggest_sequence(text) if chosen_key == "auto" else (chosen_key, None)
263
+ final_key = auto_key
264
+
265
+ phrase, img_path = pathway_payload(final_key)
266
 
267
+ # outputs
268
  top5 = list(indicators.items())[:5]
269
  top5_str = "\n".join(f"{k}: {v}" for k, v in top5)
 
270
 
271
  return (
272
  sentiment,
273
  f"{emotion} ({emo_conf:.3f})",
274
+ accomplishment,
275
+ final_key, # selected sequence key
276
+ phrase or "—",
277
  top5_str,
278
  fig,
279
+ img_path, # pathway image (optional)
280
+ auto_key if chosen_key == "auto" else seq_choice,
281
+ float(auto_sim or 0.0)
282
  )
283
 
284
+ with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
285
+ gr.Markdown("## La Matriz Consulting, feat. BERT Emotion + GNH + Pathway\n"
286
+ "Type a phrase. Choose a **Sequence** or keep **Auto** to recommend a pathway. "
287
+ "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")
288
+
289
  with gr.Row():
290
  inp = gr.Textbox(lines=4, label="Input text", placeholder="e.g., I finally quit my toxic job and feel lighter.")
291
  with gr.Row():
292
+ seq = gr.Dropdown(choices=SEQ_CHOICES, value="Auto (recommend)", label="Sequence choice")
293
+
294
+ btn = gr.Button("Analyze", variant="primary")
295
+
296
  with gr.Row():
297
  sent = gr.Number(label="Sentiment (1–10)")
298
  emo = gr.Text(label="Emotion")
299
  acc = gr.Number(label="Accomplishment (1–10)")
300
+
301
+ with gr.Row():
302
+ seq_used = gr.Text(label="Chosen pathway key")
303
+ phrase_out = gr.Text(label="Pathway phrase")
304
+
305
  with gr.Row():
306
+ gnh_top = gr.Text(label="Top GNH Indicators (Top 5)")
307
+ gnh_plot = gr.Plot(label="GNH Similarity")
308
+
309
  with gr.Row():
310
+ pathway_img = gr.Image(label="Pathway image", type="filepath")
311
+ auto_meta = gr.Text(label="Auto selection (key, similarity)")
312
+
313
+ def _wrap_analyze(text, seq_choice):
314
+ result = analyze(text, seq_choice)
315
+ # build auto meta text
316
+ auto_key = result[-2]
317
+ auto_sim = result[-1]
318
+ meta = f"{auto_key} (similarity={auto_sim:.3f})" if seq_choice == "Auto (recommend)" else "—"
319
+ return (*result[:-2], meta)
320
 
321
+ btn.click(
322
+ fn=_wrap_analyze,
323
+ inputs=[inp, seq],
324
+ outputs=[sent, emo, acc, seq_used, phrase_out, gnh_top, gnh_plot, pathway_img, auto_meta]
325
+ )
326
 
327
  if __name__ == "__main__":
328
  demo.launch()