Spaces:

sayanAIAI
/

AIprojects

Runtime error

File size: 14,887 Bytes

05ced71
2226679
a323f1e
743f7ef
 
 
08b9e9f
743f7ef
bb6c458
743f7ef
 
 
bb6c458
05ced71
743f7ef
bb6c458
743f7ef
05ced71
743f7ef
 
 
 
bb6c458
743f7ef
 
 
 
05ced71
d005cea
bb6c458
d005cea
bb6c458
 
 
 
 
 
 
 
 
743f7ef
 
 
d005cea
 
bb6c458
d005cea
fd8623d
a323f1e
 
 
fd8623d
 
743f7ef
 
 
 
 
 
 
 
 
 
 
 
 
 
bb6c458
743f7ef
 
 
bb6c458
 
 
743f7ef
 
fd8623d
a323f1e
fd8623d
 
 
 
 
 
 
a323f1e
fd8623d
 
 
bb6c458
fd8623d
 
bb6c458
 
 
 
 
 
 
 
743f7ef
bb6c458
 
743f7ef
bb6c458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd8623d
743f7ef
bb6c458
743f7ef
 
 
 
 
fd8623d
743f7ef
 
bb6c458
fd8623d
743f7ef
bb6c458
743f7ef
08b9e9f
d005cea
 
bb6c458
 
d005cea
 
bb6c458
743f7ef
08b9e9f
d005cea
bb6c458
d005cea
 
bb6c458
08b9e9f
743f7ef
 
 
 
bb6c458
08b9e9f
d005cea
 
 
743f7ef
 
 
d005cea
08b9e9f
bb6c458
 
 
d005cea
 
bb6c458
d005cea
 
bb6c458
d005cea
bb6c458
743f7ef
bb6c458
743f7ef
bb6c458
 
 
 
d005cea
bb6c458
 
 
08b9e9f
bb6c458
d005cea
 
bb6c458
 
 
d005cea
743f7ef
bb6c458
743f7ef
bb6c458
743f7ef
 
 
 
 
 
 
 
 
 
 
 
 
 
bb6c458
 
 
 
743f7ef
 
bb6c458
743f7ef
 
bb6c458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d005cea
 
 
a323f1e
 
 
 
05ced71
fd8623d
743f7ef
bb6c458
 
 
 
 
fd8623d
 
bb6c458
05ced71
743f7ef
bb6c458
d005cea
bb6c458
 
d005cea
 
 
743f7ef
bb6c458
743f7ef
 
d005cea
bb6c458
 
743f7ef
bb6c458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
743f7ef
fd8623d
743f7ef
05ced71
bb6c458
 
 
 
 
 
 
 
743f7ef
bb6c458
 
743f7ef
 
 
 
 
bb6c458
 
743f7ef
 
 
 
05ced71
bb6c458
 
 
 
 
05ced71
bb6c458
743f7ef
 
 
 
 
 
 
 
 
bb6c458
 
743f7ef
 
 
 
 
 
 
 
 
 
05ced71
bb6c458
743f7ef

import os
os.environ['HF_HOME'] = '/tmp'

import time
import json
import re
import logging
from collections import Counter
from typing import Optional

from flask import Flask, request, jsonify, render_template
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# -------------------------
# App + logging
# -------------------------
app = Flask(__name__)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("summarizer")

# -------------------------
# Device selection
# -------------------------
USE_GPU = torch.cuda.is_available()
DEVICE = 0 if USE_GPU else -1
logger.info("CUDA available: %s. Using device: %s", USE_GPU, DEVICE)

# -------------------------
# Model names (we'll load summarizers lazily)
# -------------------------
PEGASUS_MODEL = "google/pegasus-large"
LED_MODEL = "allenai/led-large-16384"
PARAM_MODEL = "google/flan-t5-small"   # instruction model for parameter generation

# caches for lazy-loaded pipelines
_SUMMARIZER_CACHE = {}

# load the small param-generator right away (keeps it small)
logger.info("Loading parameter generator model: %s", PARAM_MODEL)
param_tokenizer = AutoTokenizer.from_pretrained(PARAM_MODEL)
param_model = AutoModelForSeq2SeqLM.from_pretrained(PARAM_MODEL)
param_generator = pipeline("text2text-generation", model=param_model, tokenizer=param_tokenizer, device=DEVICE)

# -------------------------
# Presets & utils
# -------------------------
LENGTH_PRESETS = {
    "short": {"min_length": 20, "max_length": 60},
    "medium": {"min_length": 60, "max_length": 130},
    "long": {"min_length": 130, "max_length": 300},
}

_STOPWORDS = {
    "the","and","is","in","to","of","a","that","it","on","for","as","are","with","was","be","by","this","an","or","from","at","which","we","has","have"
}

def tokenize_sentences(text):
    sents = re.split(r'(?<=[.!?])\s+', text.strip())
    return [s.strip() for s in sents if s.strip()]

def extractive_prefilter(text, top_k=12):
    sents = tokenize_sentences(text)
    if len(sents) <= top_k:
        return text
    words = re.findall(r"\w+", text.lower())
    freqs = Counter(w for w in words if w not in _STOPWORDS)
    scored = []
    for i, s in enumerate(sents):
        ws = re.findall(r"\w+", s.lower())
        score = sum(freqs.get(w, 0) for w in ws)
        scored.append((score, i, s))
    scored.sort(reverse=True)
    chosen = [s for _, _, s in sorted(scored[:top_k], key=lambda t: t[1])]
    return " ".join(chosen)

def chunk_text_by_chars(text, max_chars=1500, overlap=200):
    if len(text) <= max_chars:
        return [text]
    parts = []
    start = 0
    while start < len(text):
        end = min(len(text), start + max_chars)
        chunk = text[start:end]
        nl = chunk.rfind('\n')
        if nl > max_chars * 0.6:
            end = start + nl
            chunk = text[start:end]
        parts.append(chunk.strip())
        start = max(end - overlap, end)
    return parts

def _first_int_from_text(s, fallback=None):
    m = re.search(r"\d{1,4}", s)
    return int(m.group()) if m else fallback

# -------------------------
# Lazy summarizer loader
# -------------------------
def get_summarizer(model_key: str):
    """
    Returns a pipeline summarizer for 'pegasus' or 'led', loading it lazily.
    model_key: "pegasus" or "led"
    """
    model_key = model_key.lower()
    if model_key in _SUMMARIZER_CACHE:
        return _SUMMARIZER_CACHE[model_key]

    if model_key == "pegasus":
        model_name = PEGASUS_MODEL
    elif model_key == "led":
        model_name = LED_MODEL
    else:
        raise ValueError("Unknown model_key: " + str(model_key))

    logger.info("Loading summarizer model '%s' (%s) on device %s ...", model_key, model_name, DEVICE)
    tok = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    pipe = pipeline("summarization", model=model, tokenizer=tok, device=DEVICE)
    _SUMMARIZER_CACHE[model_key] = pipe
    logger.info("Loaded summarizer '%s' successfully.", model_key)
    return pipe

# -------------------------
# Prompt and decision logic
# -------------------------
def apply_tone_instruction(text, tone, target_sentences=None):
    tone = (tone or "neutral").lower()
    if tone == "bullet":
        instr = "Produce concise bullet points. Each bullet short (<=20 words). No extra commentary."
    elif tone == "short":
        ts = target_sentences or 1
        instr = f"Summarize the content in {ts} sentence{'s' if ts>1 else ''}. Be highly abstractive and avoid copying sentences verbatim."
    elif tone == "formal":
        instr = "Summarize in a formal, professional tone in 2-4 sentences. Keep it precise and well-structured."
    elif tone == "casual":
        instr = "Summarize in a casual, conversational tone in 1-3 sentences. Use plain, friendly language."
    elif tone == "long":
        instr = "Provide a clear, structured summary in 4-8 sentences covering key points and context."
    else:
        instr = "Summarize the content in 2-3 sentences. Be clear and concise."
    instr += " Do not repeat information; prefer rephrasing."
    return f"{instr}\n\nText:\n{text}"

def generate_summarization_config(text):
    """
    Ask small instruction model for settings; fallback to heuristic.
    Returns dict with keys: length, min_length, max_length, tone
    """
    prompt = (
        "You are an assistant that recommends summarization settings.\n"
        "Given the text, respond ONLY with single-line JSON EXACTLY like:\n"
        '{"length":"short|medium|long","min_words":MIN,"max_words":MAX,"tone":"neutral|formal|casual|bullet"}\n\n'
        "Text:\n'''"
        + text[:4000] + "'''"
    )
    try:
        out = param_generator(
            prompt,
            max_new_tokens=64,
            num_beams=1,
            do_sample=False,
            early_stopping=True
        )[0].get("generated_text","").strip()
        cfg = None
        try:
            cfg = json.loads(out)
        except Exception:
            j = re.search(r"\{.*\}", out, re.DOTALL)
            if j:
                raw = j.group().replace("'", '"')
                cfg = json.loads(raw)
        if not cfg:
            raise ValueError("Unparseable param-generator output")
        length = cfg.get("length","").lower()
        tone = cfg.get("tone","").lower()
        min_w = cfg.get("min_words")
        max_w = cfg.get("max_words")
        if length not in ("short","medium","long"):
            words = len(text.split())
            length = "short" if words < 150 else ("medium" if words < 800 else "long")
        if tone not in ("neutral","formal","casual","bullet"):
            tone = "neutral"
        if not isinstance(min_w,int):
            min_w = _first_int_from_text(out, fallback=None)
        if not isinstance(max_w,int):
            max_w = _first_int_from_text(out[::-1], fallback=None)
        defaults = {"short":(15,50),"medium":(50,130),"long":(130,300)}
        dmin,dmax = defaults.get(length,(50,130))
        min_len = int(min_w) if isinstance(min_w,int) else dmin
        max_len = int(max_w) if isinstance(max_w,int) else dmax
        min_len = max(5, min(min_len, 2000))
        max_len = max(min_len+5, min(max_len, 4000))
        logger.info("Param-generator chose length=%s tone=%s min=%s max=%s", length, tone, min_len, max_len)
        return {"length":length,"min_length":min_len,"max_length":max_len,"tone":tone}
    except Exception as e:
        logger.exception("Param-generator failed: %s", e)
        words = len(text.split())
        length = "short" if words < 150 else ("medium" if words < 800 else "long")
        fallback = {"short":(15,50),"medium":(50,130),"long":(130,300)}
        mn,mx = fallback[length]
        return {"length":length,"min_length":mn,"max_length":mx,"tone":"neutral"}

# -------------------------
# Two-stage summarization (chunk -> chunk summaries -> refine)
# -------------------------
def refine_and_combine(summaries_list, tone, final_target_sentences=None, summarizer_pipe=None):
    combined = "\n\n".join(summaries_list)
    if len(combined.split()) > 2000:
        combined = extractive_prefilter(combined, top_k=20)
    prompt = apply_tone_instruction(combined, tone, target_sentences=final_target_sentences)
    tgt_sent = final_target_sentences or 3
    gen_kwargs = {
        "min_length": max(20, int(tgt_sent * 8)),
        "max_length": max(60, int(tgt_sent * 30)),
        "num_beams": 6,
        "early_stopping": True,
        "no_repeat_ngram_size": 3,
        "do_sample": False,
    }
    try:
        if summarizer_pipe is None:
            # fallback to pegasus by default (if pipe not provided)
            summarizer_pipe = get_summarizer("pegasus")
        out = summarizer_pipe(prompt, **gen_kwargs)[0]["summary_text"].strip()
        return out
    except Exception as e:
        logger.exception("Refine failed: %s", e)
        return " ".join(summaries_list[:3])

# -------------------------
# Model-specific generation helper
# -------------------------
def summarize_with_model(pipe, text_prompt, short_target=False):
    """
    Use model pipeline with conservative and model-appropriate generation settings.
    short_target: if True use shorter min/max suitable for concise outputs
    """
    # heuristics: if pipe is LED (model name in tied tokenizer), allow larger max_length
    model_name = getattr(pipe.model.config, "name_or_path", "") or ""
    is_led = "led" in model_name or "longformer" in model_name or "allenai" in model_name and "led" in model_name
    if short_target:
        min_l = 12
        max_l = 60
    else:
        min_l = 24
        max_l = 140 if not is_led else 400  # LED can handle longer outputs
    gen_kwargs = {
        "min_length": min_l,
        "max_length": max_l,
        "num_beams": 5 if not is_led else 4,
        "early_stopping": True,
        "no_repeat_ngram_size": 3,
        "do_sample": False,
    }
    return pipe(text_prompt, **gen_kwargs)[0]["summary_text"].strip()

# -------------------------
# Routes
# -------------------------
@app.route("/")
def home():
    return render_template("index.html")

@app.route("/summarize", methods=["POST"])
def summarize_route():
    t0 = time.time()
    data = request.get_json(force=True) or {}
    text = (data.get("text") or "")[:90000]
    user_model_pref = (data.get("model") or "auto").lower()   # 'pegasus' | 'led' | 'auto'
    requested_length = (data.get("length") or "auto").lower()  # short|medium|long|auto
    requested_tone = (data.get("tone") or "auto").lower()      # neutral|formal|casual|bullet|auto

    if not text or len(text.split()) < 5:
        return jsonify({"error":"Input too short."}), 400

    # 1) Decide settings (AI or explicit)
    if requested_length in ("auto","ai") or requested_tone in ("auto","ai"):
        cfg = generate_summarization_config(text)
        length_choice = cfg.get("length","medium")
        tone_choice = cfg.get("tone","neutral")
        preset_min = cfg.get("min_length")
        preset_max = cfg.get("max_length")
    else:
        length_choice = requested_length if requested_length in ("short","medium","long") else "medium"
        tone_choice = requested_tone if requested_tone in ("neutral","formal","casual","bullet") else "neutral"
        preset_min = LENGTH_PRESETS.get(length_choice, LENGTH_PRESETS["medium"])["min_length"]
        preset_max = LENGTH_PRESETS.get(length_choice, LENGTH_PRESETS["medium"])["max_length"]

    # 2) Model selection (user preference or auto)
    # auto rules: if user specifically asked 'led' or param-generator picked long / input is very long -> led
    words_len = len(text.split())
    prefer_led = False
    if user_model_pref == "led":
        prefer_led = True
    elif user_model_pref == "pegasus":
        prefer_led = False
    else:  # auto
        if length_choice == "long" or words_len > 3000:
            prefer_led = True
        else:
            prefer_led = False

    model_key = "led" if prefer_led else "pegasus"
    # get the pipeline (lazy load)
    try:
        summarizer_pipe = get_summarizer(model_key)
    except Exception as e:
        logger.exception("Failed to load summarizer '%s': %s", model_key, e)
        # fallback to pegasus if led fails
        summarizer_pipe = get_summarizer("pegasus")
        model_key = "pegasus"

    # 3) Prefilter very long inputs (if not using LED)
    if not prefer_led and words_len > 2500:
        text_for_chunks = extractive_prefilter(text, top_k=40)
    else:
        text_for_chunks = text

    # 4) Chunking: choose chunk size depending on model
    if model_key == "led":
        chunk_max_chars = 8000   # LED can handle larger chunks
        chunk_overlap = 400
    else:
        chunk_max_chars = 1400
        chunk_overlap = 200
    chunks = chunk_text_by_chars(text_for_chunks, max_chars=chunk_max_chars, overlap=chunk_overlap)

    # 5) Summarize each chunk
    chunk_summaries = []
    for chunk in chunks:
        chunk_target = 1 if length_choice == "short" else 2
        chunk_tone = tone_choice if tone_choice in ("formal","casual","bullet") else "neutral"
        prompt = apply_tone_instruction(chunk, chunk_tone, target_sentences=chunk_target)
        try:
            # choose short_target True for tiny chunk summaries
            out = summarize_with_model(summarizer_pipe, prompt, short_target=(chunk_target==1))
        except Exception as e:
            logger.exception("Chunk summarization failed, using extractive fallback: %s", e)
            out = extractive_prefilter(chunk, top_k=3)
        chunk_summaries.append(out)

    # 6) Combine + refine using the same model for consistency (or prefer Pegasus for elegant refinement)
    refine_model_key = model_key if model_key == "led" else "pegasus"
    refine_pipe = get_summarizer(refine_model_key)
    final_target_sentences = {"short":1,"medium":3,"long":6}.get(length_choice,3)
    final = refine_and_combine(chunk_summaries, tone_choice, final_target_sentences, summarizer_pipe=refine_pipe)

    # 7) Post-process bullet tone
    if tone_choice == "bullet":
        parts = re.split(r'[\n\r]+|(?:\.\s+)|(?:;\s+)', final)
        bullets = [f"- {p.strip().rstrip('.')}" for p in parts if p.strip()]
        final = "\n".join(bullets[:20])

    elapsed = time.time() - t0
    meta = {
        "length_choice": length_choice,
        "tone": tone_choice,
        "model_used": model_key,
        "refine_model": refine_model_key,
        "chunks": len(chunks),
        "input_words": words_len,
        "time_seconds": round(elapsed, 2),
        "device": ("gpu" if USE_GPU else "cpu")
    }
    return jsonify({"summary": final, "meta": meta})

# -------------------------
# Run
# -------------------------
if __name__ == "__main__":
    # debug=False for production; use Gunicorn in deployment
    app.run(host="0.0.0.0", port=7860, debug=False)