import os
os.environ['HF_HOME'] = '/tmp'

import time
import json
import re
import logging
from collections import Counter
from typing import Optional

from flask import Flask, request, jsonify, render_template
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# -------------------------
# App + logging
# -------------------------
app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("summarizer")

# -------------------------
# Device selection
# -------------------------
USE_GPU = torch.cuda.is_available()
DEVICE = 0 if USE_GPU else -1
logger.info("CUDA available: %s. Using device: %s", USE_GPU, DEVICE)

# -------------------------
# Model names (we'll load summarizers lazily)
# -------------------------
PEGASUS_MODEL = "google/pegasus-large"
LED_MODEL = "allenai/led-large-16384"
PARAM_MODEL = "google/flan-t5-small"   # instruction model for parameter generation

# caches for lazy-loaded pipelines
_SUMMARIZER_CACHE = {}

# load the small param-generator right away (keeps it small)
logger.info("Loading parameter generator model: %s", PARAM_MODEL)
param_tokenizer = AutoTokenizer.from_pretrained(PARAM_MODEL)
param_model = AutoModelForSeq2SeqLM.from_pretrained(PARAM_MODEL)
param_generator = pipeline("text2text-generation", model=param_model, tokenizer=param_tokenizer, device=DEVICE)

# -------------------------
# Presets & utils
# -------------------------
LENGTH_PRESETS = {
    "short": {"min_length": 20, "max_length": 60},
    "medium": {"min_length": 60, "max_length": 130},
    "long": {"min_length": 130, "max_length": 300},
}

_STOPWORDS = {
    "the","and","is","in","to","of","a","that","it","on","for","as","are","with","was","be","by","this","an","or","from","at","which","we","has","have"
}

def tokenize_sentences(text):
    sents = re.split(r'(?<=[.!?])\s+', text.strip())
    return [s.strip() for s in sents if s.strip()]

def extractive_prefilter(text, top_k=12):
    sents = tokenize_sentences(text)
    if len(sents) <= top_k:
        return text
    words = re.findall(r"\w+", text.lower())
    freqs = Counter(w for w in words if w not in _STOPWORDS)
    scored = []
    for i, s in enumerate(sents):
        ws = re.findall(r"\w+", s.lower())
        score = sum(freqs.get(w, 0) for w in ws)
        scored.append((score, i, s))
    scored.sort(reverse=True)
    chosen = [s for _, _, s in sorted(scored[:top_k], key=lambda t: t[1])]
    return " ".join(chosen)

def chunk_text_by_chars(text, max_chars=1500, overlap=200):
    if len(text) <= max_chars:
        return [text]
    parts = []
    start = 0
    while start < len(text):
        end = min(len(text), start + max_chars)
        chunk = text[start:end]
        nl = chunk.rfind('\n')
        if nl > max_chars * 0.6:
            end = start + nl
            chunk = text[start:end]
        parts.append(chunk.strip())
        start = max(end - overlap, end)
    return parts

def _first_int_from_text(s, fallback=None):
    m = re.search(r"\d{1,4}", s)
    return int(m.group()) if m else fallback

# -------------------------
# Lazy summarizer loader
# -------------------------
def get_summarizer(model_key: str):
    """
    Returns a pipeline summarizer for 'pegasus' or 'led', loading it lazily.
    model_key: "pegasus" or "led"
    """
    model_key = model_key.lower()
    if model_key in _SUMMARIZER_CACHE:
        return _SUMMARIZER_CACHE[model_key]

    if model_key == "pegasus":
        model_name = PEGASUS_MODEL
    elif model_key == "led":
        model_name = LED_MODEL
    else:
        raise ValueError("Unknown model_key: " + str(model_key))

    logger.info("Loading summarizer model '%s' (%s) on device %s ...", model_key, model_name, DEVICE)
    tok = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    pipe = pipeline("summarization", model=model, tokenizer=tok, device=DEVICE)
    _SUMMARIZER_CACHE[model_key] = pipe
    logger.info("Loaded summarizer '%s' successfully.", model_key)
    return pipe

# -------------------------
# Prompt and decision logic
# -------------------------
def apply_tone_instruction(text, tone, target_sentences=None):
    tone = (tone or "neutral").lower()
    if tone == "bullet":
        instr = "Produce concise bullet points. Each bullet short (<=20 words). No extra commentary."
    elif tone == "short":
        ts = target_sentences or 1
        instr = f"Summarize the content in {ts} sentence{'s' if ts>1 else ''}. Be highly abstractive and avoid copying sentences verbatim."
    elif tone == "formal":
        instr = "Summarize in a formal, professional tone in 2-4 sentences. Keep it precise and well-structured."
    elif tone == "casual":
        instr = "Summarize in a casual, conversational tone in 1-3 sentences. Use plain, friendly language."
    elif tone == "long":
        instr = "Provide a clear, structured summary in 4-8 sentences covering key points and context."
    else:
        instr = "Summarize the content in 2-3 sentences. Be clear and concise."
    instr += " Do not repeat information; prefer rephrasing."
    return f"{instr}\n\nText:\n{text}"

def generate_summarization_config(text):
    """
    Ask small instruction model for settings; fallback to heuristic.
    Returns dict with keys: length, min_length, max_length, tone
    """
    prompt = (
        "You are an assistant that recommends summarization settings.\n"
        "Given the text, respond ONLY with single-line JSON EXACTLY like:\n"
        '{"length":"short|medium|long","min_words":MIN,"max_words":MAX,"tone":"neutral|formal|casual|bullet"}\n\n'
        "Text:\n'''"
        + text[:4000] + "'''"
    )
    try:
        out = param_generator(
            prompt,
            max_new_tokens=64,
            num_beams=1,
            do_sample=False,
            early_stopping=True
        )[0].get("generated_text","").strip()
        cfg = None
        try:
            cfg = json.loads(out)
        except Exception:
            j = re.search(r"\{.*\}", out, re.DOTALL)
            if j:
                raw = j.group().replace("'", '"')
                cfg = json.loads(raw)
        if not cfg:
            raise ValueError("Unparseable param-generator output")
        length = cfg.get("length","").lower()
        tone = cfg.get("tone","").lower()
        min_w = cfg.get("min_words")
        max_w = cfg.get("max_words")
        if length not in ("short","medium","long"):
            words = len(text.split())
            length = "short" if words < 150 else ("medium" if words < 800 else "long")
        if tone not in ("neutral","formal","casual","bullet"):
            tone = "neutral"
        if not isinstance(min_w,int):
            min_w = _first_int_from_text(out, fallback=None)
        if not isinstance(max_w,int):
            max_w = _first_int_from_text(out[::-1], fallback=None)
        defaults = {"short":(15,50),"medium":(50,130),"long":(130,300)}
        dmin,dmax = defaults.get(length,(50,130))
        min_len = int(min_w) if isinstance(min_w,int) else dmin
        max_len = int(max_w) if isinstance(max_w,int) else dmax
        min_len = max(5, min(min_len, 2000))
        max_len = max(min_len+5, min(max_len, 4000))
        logger.info("Param-generator chose length=%s tone=%s min=%s max=%s", length, tone, min_len, max_len)
        return {"length":length,"min_length":min_len,"max_length":max_len,"tone":tone}
    except Exception as e:
        logger.exception("Param-generator failed: %s", e)
        words = len(text.split())
        length = "short" if words < 150 else ("medium" if words < 800 else "long")
        fallback = {"short":(15,50),"medium":(50,130),"long":(130,300)}
        mn,mx = fallback[length]
        return {"length":length,"min_length":mn,"max_length":mx,"tone":"neutral"}

# -------------------------
# Two-stage summarization (chunk -> chunk summaries -> refine)
# -------------------------
def refine_and_combine(summaries_list, tone, final_target_sentences=None, summarizer_pipe=None):
    combined = "\n\n".join(summaries_list)
    if len(combined.split()) > 2000:
        combined = extractive_prefilter(combined, top_k=20)
    prompt = apply_tone_instruction(combined, tone, target_sentences=final_target_sentences)
    tgt_sent = final_target_sentences or 3
    gen_kwargs = {
        "min_length": max(20, int(tgt_sent * 8)),
        "max_length": max(60, int(tgt_sent * 30)),
        "num_beams": 6,
        "early_stopping": True,
        "no_repeat_ngram_size": 3,
        "do_sample": False,
    }
    try:
        if summarizer_pipe is None:
            # fallback to pegasus by default (if pipe not provided)
            summarizer_pipe = get_summarizer("pegasus")
        out = summarizer_pipe(prompt, **gen_kwargs)[0]["summary_text"].strip()
        return out
    except Exception as e:
        logger.exception("Refine failed: %s", e)
        return " ".join(summaries_list[:3])

# -------------------------
# Model-specific generation helper
# -------------------------
def summarize_with_model(pipe, text_prompt, short_target=False):
    """
    Use model pipeline with conservative and model-appropriate generation settings.
    short_target: if True use shorter min/max suitable for concise outputs
    """
    # heuristics: if pipe is LED (model name in tied tokenizer), allow larger max_length
    model_name = getattr(pipe.model.config, "name_or_path", "") or ""
    is_led = "led" in model_name or "longformer" in model_name or "allenai" in model_name and "led" in model_name
    if short_target:
        min_l = 12
        max_l = 60
    else:
        min_l = 24
        max_l = 140 if not is_led else 400  # LED can handle longer outputs
    gen_kwargs = {
        "min_length": min_l,
        "max_length": max_l,
        "num_beams": 5 if not is_led else 4,
        "early_stopping": True,
        "no_repeat_ngram_size": 3,
        "do_sample": False,
    }
    return pipe(text_prompt, **gen_kwargs)[0]["summary_text"].strip()

# -------------------------
# Routes
# -------------------------
@app.route("/")
def home():
    return render_template("index.html")

@app.route("/summarize", methods=["POST"])
def summarize_route():
    t0 = time.time()
    data = request.get_json(force=True) or {}
    text = (data.get("text") or "")[:90000]
    user_model_pref = (data.get("model") or "auto").lower()   # 'pegasus' | 'led' | 'auto'
    requested_length = (data.get("length") or "auto").lower()  # short|medium|long|auto
    requested_tone = (data.get("tone") or "auto").lower()      # neutral|formal|casual|bullet|auto

    if not text or len(text.split()) < 5:
        return jsonify({"error":"Input too short."}), 400

    # 1) Decide settings (AI or explicit)
    if requested_length in ("auto","ai") or requested_tone in ("auto","ai"):
        cfg = generate_summarization_config(text)
        length_choice = cfg.get("length","medium")
        tone_choice = cfg.get("tone","neutral")
        preset_min = cfg.get("min_length")
        preset_max = cfg.get("max_length")
    else:
        length_choice = requested_length if requested_length in ("short","medium","long") else "medium"
        tone_choice = requested_tone if requested_tone in ("neutral","formal","casual","bullet") else "neutral"
        preset_min = LENGTH_PRESETS.get(length_choice, LENGTH_PRESETS["medium"])["min_length"]
        preset_max = LENGTH_PRESETS.get(length_choice, LENGTH_PRESETS["medium"])["max_length"]

    # 2) Model selection (user preference or auto)
    # auto rules: if user specifically asked 'led' or param-generator picked long / input is very long -> led
    words_len = len(text.split())
    prefer_led = False
    if user_model_pref == "led":
        prefer_led = True
    elif user_model_pref == "pegasus":
        prefer_led = False
    else:  # auto
        if length_choice == "long" or words_len > 3000:
            prefer_led = True
        else:
            prefer_led = False

    model_key = "led" if prefer_led else "pegasus"
    # get the pipeline (lazy load)
    try:
        summarizer_pipe = get_summarizer(model_key)
    except Exception as e:
        logger.exception("Failed to load summarizer '%s': %s", model_key, e)
        # fallback to pegasus if led fails
        summarizer_pipe = get_summarizer("pegasus")
        model_key = "pegasus"

    # 3) Prefilter very long inputs (if not using LED)
    if not prefer_led and words_len > 2500:
        text_for_chunks = extractive_prefilter(text, top_k=40)
    else:
        text_for_chunks = text

    # 4) Chunking: choose chunk size depending on model
    if model_key == "led":
        chunk_max_chars = 8000   # LED can handle larger chunks
        chunk_overlap = 400
    else:
        chunk_max_chars = 1400
        chunk_overlap = 200
    chunks = chunk_text_by_chars(text_for_chunks, max_chars=chunk_max_chars, overlap=chunk_overlap)

    # 5) Summarize each chunk
    chunk_summaries = []
    for chunk in chunks:
        chunk_target = 1 if length_choice == "short" else 2
        chunk_tone = tone_choice if tone_choice in ("formal","casual","bullet") else "neutral"
        prompt = apply_tone_instruction(chunk, chunk_tone, target_sentences=chunk_target)
        try:
            # choose short_target True for tiny chunk summaries
            out = summarize_with_model(summarizer_pipe, prompt, short_target=(chunk_target==1))
        except Exception as e:
            logger.exception("Chunk summarization failed, using extractive fallback: %s", e)
            out = extractive_prefilter(chunk, top_k=3)
        chunk_summaries.append(out)

    # 6) Combine + refine using the same model for consistency (or prefer Pegasus for elegant refinement)
    refine_model_key = model_key if model_key == "led" else "pegasus"
    refine_pipe = get_summarizer(refine_model_key)
    final_target_sentences = {"short":1,"medium":3,"long":6}.get(length_choice,3)
    final = refine_and_combine(chunk_summaries, tone_choice, final_target_sentences, summarizer_pipe=refine_pipe)

    # 7) Post-process bullet tone
    if tone_choice == "bullet":
        parts = re.split(r'[\n\r]+|(?:\.\s+)|(?:;\s+)', final)
        bullets = [f"- {p.strip().rstrip('.')}" for p in parts if p.strip()]
        final = "\n".join(bullets[:20])

    elapsed = time.time() - t0
    meta = {
        "length_choice": length_choice,
        "tone": tone_choice,
        "model_used": model_key,
        "refine_model": refine_model_key,
        "chunks": len(chunks),
        "input_words": words_len,
        "time_seconds": round(elapsed, 2),
        "device": ("gpu" if USE_GPU else "cpu")
    }
    return jsonify({"summary": final, "meta": meta})

# -------------------------
# Run
# -------------------------
if __name__ == "__main__":
    # debug=False for production; use Gunicorn in deployment
    app.run(host="0.0.0.0", port=7860, debug=False)