import os, re
from typing import Dict, Tuple, List

import nltk, spacy, torch, pandas as pd, matplotlib.pyplot as plt
import torch.nn.functional as F
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from sentence_transformers import SentenceTransformer, util

# -------------------- setup --------------------
def ensure_spacy():
    try:
        return spacy.load("en_core_web_sm")
    except Exception:
        import spacy.cli
        spacy.cli.download("en_core_web_sm")
        return spacy.load("en_core_web_sm")

def ensure_nltk():
    try:
        nltk.data.find("tokenizers/punkt")
    except LookupError:
        nltk.download("punkt")

ensure_nltk()
nlp = ensure_spacy()

sbert_model = SentenceTransformer("all-MiniLM-L6-v2")
bert_sentiment = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

emotion_model_name = "j-hartmann/emotion-english-distilroberta-base"
emotion_tokenizer = AutoTokenizer.from_pretrained(emotion_model_name)
emotion_model = AutoModelForSequenceClassification.from_pretrained(emotion_model_name)

# -------------------- constants --------------------
CSV_PATH_PLUS  = "la matrice plus.csv"   # pathways + colors + template words
CSV_PATH_COLOR = "la matrice.csv"        # color lexicon

SEQUENCE_ALIASES = {
    "Direct": "direct",
    "Feminine": "feminine",
    "Knot": "knot",
    "Masculine": "masc",
    "Pain": "pain",
    "Prayer": "prayer",
    "Precise": "precise",
    "Practical": "practical",
    "Plot": "plot",
    "Spiritual": "spiritual",
    "Sad": "sad",
}

SEQUENCE_IMAGE_FILES = {
    "direct": "direct pathway.png",
    "feminine": "fem pathway.png",
    "knot": "knot pathway.png",
    "masc": "masc pathway.png",
    "pain": "pain pathway.png",
    "prayer": "prayer pathway.png",
    "precise": "precise pathway.png",
    "practical": "practical pathway.png",
    "plot": "plot pathway.png",
    "spiritual": "spiritual pathway.png",
    "sad": "sad pathway.png"
}

GNH_DOMAINS: Dict[str, str] = {
    "Mental Wellness": "mental health, emotional clarity, peace of mind",
    "Social Wellness": "relationships, community, friendship, social harmony",
    "Economic Wellness": "income, savings, financial stability, cost of living",
    "Workplace Wellness": "career, work-life balance, promotion, productivity",
    "Physical Wellness": "physical health, sleep, fitness, exercise",
    "Environmental Wellness": "green space, nature, environmental care",
    "Health": "healthcare, medical care, recovery, well-being",
    "Education Value": "learning, education, school, knowledge, wisdom",
    "Good Governance": "freedom, justice, fairness, democratic participation",
    "Living Standards": "housing, wealth, basic needs, affordability",
    "Cultural Diversity": "tradition, language, cultural expression, heritage",
    "Political Wellness": "rights, law, free speech, civic participation",
    "Ecological Diversity": "biodiversity, forest, ecosystem, wildlife",
}

GNH_COLORS: Dict[str, str] = {
    "Economic Wellness": "#808080",
    "Mental Wellness": "#FA005A",
    "Workplace Wellness": "#ffd700",
    "Physical Wellness": "#FAB478",
    "Social Wellness": "#ffa500",
    "Political Wellness": "#ffffff",
    "Environmental Wellness": "#0000FF",
    "Ecological Diversity": "#00FF00",
    "Health": "#FF0000",
    "Good Governance": "#000000",
    "Education Value": "#8b4513",
    "Living Standards": "#ffff00",
    "Cultural Diversity": "#B432FF",
}

WORD_MODES = ["Matrice1", "Matrice", "English", "GNH Indicators"]
MAX_COLORS = 8

# -------------------- loaders --------------------
def _find_col(df: pd.DataFrame, candidates: List[str]) -> str | None:
    names = {c.lower(): c for c in df.columns}
    for c in candidates:
        if c.lower() in names: return names[c.lower()]
    for want in candidates:
        ww = want.replace(" ", "").replace("-", "")
        for lc, orig in names.items():
            if ww in lc.replace(" ", "").replace("-", ""):
                return orig
    return None

def load_pathway_info(csv_path_plus: str):
    df = pd.read_csv(csv_path_plus)
    keys = set(SEQUENCE_ALIASES.values())
    rows = df[df["color"].astype(str).str.lower().isin(keys)].copy()

    seq_to_colors: Dict[str, List[str]] = {}
    seq_phrase: Dict[str, str] = {}

    # colors live in 'r' (list), template = concat of the other fields
    cols_for_phrase = [c for c in df.columns if c not in ("color", "r", "g", "b")]
    for _, row in rows.iterrows():
        key = str(row["color"]).strip().lower()
        color_list = str(row.get("r", "") or "")
        colors = [c.strip().lower() for c in re.split(r"[,\s]+", color_list) if c.strip()]
        seq_to_colors[key] = list(dict.fromkeys(colors))

        vals = []
        for c in cols_for_phrase:
            v = row.get(c)
            if pd.notna(v):
                s = str(v).strip()
                if s and s.lower() != "nan":
                    vals.append(s)
        phrase = " ".join(" ".join(vals).split())  # base template
        seq_phrase[key] = phrase

    return seq_to_colors, seq_phrase

def _split_words(s: str) -> List[str]:
    if not isinstance(s, str): return []
    parts = re.split(r"[,\;/\|\s]+", s.strip())
    return [p for p in (w.strip().lower() for w in parts) if p]

def load_color_lexicon(csv_path_color: str):
    df = pd.read_csv(csv_path_color)
    color_col = _find_col(df, ["color", "colour"])
    m1_col = _find_col(df, ["matrice1", "matrice 1"])
    m_col  = _find_col(df, ["matrice"])
    en_col = _find_col(df, ["english-words-code", "english words code", "english_words_code", "english"])

    lex: Dict[str, Dict[str, List[str]]] = {}
    for _, row in df.iterrows():
        cname = str(row.get(color_col, "")).strip().lower()
        if not cname: continue
        lex[cname] = {
            "matrice1": _split_words(str(row.get(m1_col, ""))),
            "matrice":  _split_words(str(row.get(m_col,  ""))),
            "english":  _split_words(str(row.get(en_col, ""))),
        }
    return lex

SEQ_TO_COLORS, SEQ_PHRASE = load_pathway_info(CSV_PATH_PLUS)
COLOR_LEX = load_color_lexicon(CSV_PATH_COLOR)

def sequence_to_image_path(seq_key: str) -> str | None:
    fname = SEQUENCE_IMAGE_FILES.get(seq_key)
    return fname if (fname and os.path.exists(fname)) else None

# -------------------- NLP helpers --------------------
def encode_text(t: str):
    return sbert_model.encode(t, convert_to_tensor=True)

def classify_emotion(text: str) -> Tuple[str, float]:
    inputs = emotion_tokenizer(text, return_tensors="pt", truncation=True)
    with torch.no_grad():
        logits = emotion_model(**inputs).logits
        probs = F.softmax(logits, dim=1).squeeze()
    labels = emotion_model.config.id2label
    idx = int(torch.argmax(probs).item())
    return labels[idx], float(probs[idx].item())

def score_sentiment(text: str) -> float:
    out = bert_sentiment(text[:512])[0]
    label, score = out["label"], out["score"]
    scaled = 5 + 5 * score if label == "POSITIVE" else 1 + 4 * (1 - score)
    return round(min(10, max(1, scaled)), 2)

def score_accomplishment(text: str) -> float:
    doc = nlp(text); score = 5.0
    key_phrases = {"finally","told","decided","quit","refused","stood","walked","walked away","returned","return"}
    for token in doc:
        if token.text.lower() in key_phrases: score += 1.5
        if token.tag_ in {"VBD","VBN"}:       score += 0.5
    return round(min(10, max(1, score)), 2)

def semantic_indicator_mapping(text: str, sentiment_score: float, sentiment_weight: float = 0.3) -> Dict[str, float]:
    v = encode_text(text)
    out: Dict[str, float] = {}
    for dom, desc in GNH_DOMAINS.items():
        sim = float(util.cos_sim(v, encode_text(desc)).item())
        sim = max(0.0, min(1.0, sim))
        blended = (1 - sentiment_weight) * sim + sentiment_weight * (sentiment_score / 10.0)
        out[dom] = round(blended, 3)
    return dict(sorted(out.items(), key=lambda kv: -kv[1]))

def indicators_plot(indicators: Dict[str, float]):
    labels = list(indicators.keys()); values = list(indicators.values())
    colors = [GNH_COLORS.get(label, "#cccccc") for label in labels]
    fig = plt.figure(figsize=(8,5))
    plt.barh(labels, values, color=colors)
    plt.gca().invert_yaxis()
    plt.title("GNH Indicator Similarity")
    plt.xlabel("Score")
    plt.tight_layout()
    return fig

# -------------------- prompt building (legible placeholders) --------------------
def join_all_words(color: str) -> List[str]:
    d = COLOR_LEX.get(color.lower(), {})
    return list(dict.fromkeys(d.get("matrice1", []) + d.get("matrice", []) + d.get("english", [])))

def nearest_gnh_domain_for_color(color: str) -> Tuple[str, float]:
    words = " ".join(join_all_words(color))
    if not words:
        return "Mental Wellness", 0.0
    v = encode_text(words)
    best, best_sim = None, -1.0
    for dom, desc in GNH_DOMAINS.items():
        sim = float(util.cos_sim(v, encode_text(desc)).item())
        if sim > best_sim:
            best, best_sim = dom, sim
    return best or "Mental Wellness", best_sim

def labels_for_mode(colors: List[str], mode: str) -> List[str]:
    if mode.lower().startswith("gnh"):
        return [nearest_gnh_domain_for_color(c)[0] for c in colors]
    return [c.capitalize() for c in colors]

def placeholder_for(color: str, mode: str) -> str:
    """
    Always show a meaningful placeholder driven by the chosen mode.
    """
    color_lc = color.lower()
    if mode.lower().startswith("gnh"):
        dom, _ = nearest_gnh_domain_for_color(color_lc)
        return f"{dom}: {GNH_DOMAINS.get(dom, '')}"

    # map mode -> CSV column key
    mode_key = {
        "matrice1": "matrice1",
        "matrice":  "matrice",
        "english":  "english",
    }.get(mode.lower(), "matrice")

    lex = COLOR_LEX.get(color_lc, {})
    primary = lex.get(mode_key, [])

    # If the chosen column has entries, use them.
    if primary:
        return ", ".join(primary[:12])

    # Otherwise, try the other two lexicon columns (ordered).
    fallback_order = [k for k in ("matrice1", "matrice", "english") if k != mode_key]
    for fb in fallback_order:
        words = lex.get(fb, [])
        if words:
            label = "Matrice1" if fb == "matrice1" else ("Matrice" if fb == "matrice" else "English")
            return f"(from {label}) " + ", ".join(words[:12])

    # Final fallback: mapped GNH domain description (still a “meaning”, just not from lexicon).
    dom, _ = nearest_gnh_domain_for_color(color_lc)
    return f"(mapped GNH) {dom}: {GNH_DOMAINS.get(dom, '')}"


def simple_color_legend(colors: List[str]) -> str:
    if not colors:
        return "No prompts available for this pathway."
    parts = []
    for c in colors:
        dot = f"<span style='display:inline-block;width:10px;height:10px;border-radius:50%;background:{c};margin-right:8px;border:1px solid #999;vertical-align:middle'></span>"
        parts.append(f"<div style='margin:4px 0'>{dot}<b>{c.capitalize()}</b></div>")
    return "<div>" + "".join(parts) + "</div>"

def colors_for_sequence(seq_key: str) -> List[str]:
    return SEQ_TO_COLORS.get(seq_key, [])

def update_prompt_ui(seq_choice: str, word_mode: str):
    key = SEQUENCE_ALIASES.get(seq_choice)
    colors = colors_for_sequence(key)
    labels = labels_for_mode(colors, word_mode)
    legend_html = simple_color_legend(colors)

    updates = []
    for i in range(MAX_COLORS):
        if i < len(colors):
            lab = labels[i] if i < len(labels) else f"Input {i+1}"
            ph  = placeholder_for(colors[i], word_mode)
            updates.append(gr.update(visible=True, label=f"{lab} meaning", placeholder=ph, value=""))
        else:
            updates.append(gr.update(visible=False, value="", label=f"Input {i+1}", placeholder="—"))
    return (legend_html, *updates)

# -------------------- template replacement --------------------
def render_phrase_template(base_phrase: str, colors: List[str], labels: List[str], inputs: List[str]) -> str:
    """
    Replace occurrences of '<color>-pathway' (any spacing/hyphen variants) with the user's phrase for that color.
    If user left it empty, keep the label (color name or mapped GNH indicator).
    Finally, append a compact legend ' // Label: input'.
    """
    text = base_phrase or ""
    # build replacement map color -> replacement text
    rep: Dict[str, str] = {}
    for color, label, user in zip(colors, labels, inputs):
        use = user.strip() if isinstance(user, str) and user.strip() else label
        rep[color.lower()] = use

    # replace each token case-insensitively
    for color, replacement in rep.items():
        # match 'brown-pathway', 'brown pathway', 'Brown- Pathway', etc.
        pattern = re.compile(rf"\b{re.escape(color)}\s*-\s*pathway\b", re.IGNORECASE)
        text = pattern.sub(replacement, text)

    # if the template had no tokens, fall back to readable construction:
    # "use A to B the C of D as a new E" is preserved, but we still append meanings
    suffix_parts = []
    for color, label, user in zip(colors, labels, inputs):
        if isinstance(user, str) and user.strip():
            suffix_parts.append(f"{label}: {user.strip()}")
    if suffix_parts:
        text = (text + " // " + " // ".join(suffix_parts)).strip()

    return text

# -------------------- main analysis --------------------
def analyze(text: str, seq_choice: str, word_mode: str, *color_inputs):
    key = SEQUENCE_ALIASES.get(seq_choice)
    if key not in SEQ_PHRASE:
        return (5.0, "neutral (0.0)", 5.0, "Choose a valid pathway.", "{}", None, None, f"{seq_choice} (unavailable)",
                *update_prompt_ui(seq_choice, word_mode))

    colors = colors_for_sequence(key)
    labels = labels_for_mode(colors, word_mode)
    base_phrase = SEQ_PHRASE.get(key, "")

    # updated phrase with template replacement
    user_inputs = list(color_inputs)[:len(colors)]
    updated_phrase = render_phrase_template(base_phrase, colors, labels, user_inputs)

    # analysis on original + updated
    combined_text = " ".join([t for t in [text, updated_phrase] if t and t.strip()])
    sentiment = score_sentiment(combined_text)
    emotion, emo_conf = classify_emotion(combined_text)
    accomplishment = score_accomplishment(combined_text)

    indicators = semantic_indicator_mapping(combined_text, sentiment_score=sentiment)
    fig = indicators_plot(indicators)
    top5 = list(indicators.items())[:5]
    top5_str = "\n".join(f"{k}: {v}" for k, v in top5)

    img_path = sequence_to_image_path(key)
    meta = f"{key} | colors: {', '.join(colors) if colors else '—'}"
    emo_str = f"{emotion} ({emo_conf:.3f})"

    # keep prompt area synced
    prompt_updates = update_prompt_ui(seq_choice, word_mode)

    return (
        sentiment, emo_str, accomplishment,
        updated_phrase, top5_str, fig, img_path, meta,
        *prompt_updates
    )

# -------------------- UI --------------------
SEQ_CHOICES = list(SEQUENCE_ALIASES.keys())
DEFAULT_SEQ = "Knot" if "Knot" in SEQ_CHOICES else SEQ_CHOICES[0]

with gr.Blocks(title="RGB Root Matriz Color Plotter") as demo:
    gr.Markdown("## RGB Root Matriz Color Plotter\n"
                "Type a phrase. Choose a **Sequence**. "
                "You’ll get sentiment, emotion, accomplishment, GNH bars, and the pathway phrase + image from the dataset.")

    with gr.Row():
        inp = gr.Textbox(lines=4, label="Your situation / obstacle", placeholder="Describe the situation...")

    with gr.Row():
        seq = gr.Dropdown(choices=SEQ_CHOICES, value=DEFAULT_SEQ, label="Pathway")
        word_mode = gr.Radio(choices=WORD_MODES, value="Matrice", label="Word Mode")

    legend = gr.HTML()

    color_boxes: List[gr.Textbox] = []
    for i in range(MAX_COLORS):
        color_boxes.append(gr.Textbox(visible=False, label=f"Input {i+1}", placeholder="—"))

    run = gr.Button("Generate Pathway Analysis", variant="primary")

    with gr.Row():
        sent = gr.Number(label="Sentiment (1–10)")
        emo  = gr.Text(label="Emotion")
        acc  = gr.Number(label="Accomplishment (1–10)")

    with gr.Row():
        phrase_out = gr.Text(label="Updated Pathway Phrase (template with your meanings)")
        gnh_top    = gr.Text(label="Top GNH Indicators (Top 5)")

    gnh_plot = gr.Plot(label="GNH Similarity")
    img_out  = gr.Image(label="Pathway image", type="filepath")
    meta_out = gr.Text(label="Chosen pathway / colors")

    def _update_ui(seq_choice, mode):
        return update_prompt_ui(seq_choice, mode)

    seq.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])
    word_mode.change(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])

    run.click(
        fn=analyze,
        inputs=[inp, seq, word_mode, *color_boxes],
        outputs=[sent, emo, acc, phrase_out, gnh_top, gnh_plot, img_out, meta_out, legend, *color_boxes],
    )

    demo.load(fn=_update_ui, inputs=[seq, word_mode], outputs=[legend, *color_boxes])

if __name__ == "__main__":
    demo.launch()