Spaces:

sayanAIAI
/

AIprojects

Runtime error

App Files Files Community

sayanAIAI commited on Sep 17

Commit

743f7ef

verified ·

1 Parent(s): 08b9e9f

Update main.py

Browse files

Files changed (1) hide show

main.py +213 -99

main.py CHANGED Viewed

@@ -1,32 +1,52 @@
 import os
 os.environ['HF_HOME'] = '/tmp'
-from flask import Flask, request, jsonify, render_template
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-import json, re, time
 import logging
-# ... keep your existing imports
-logger = logging.getLogger(__name__)
 app = Flask(__name__)
 # -------------------------
-# Models (CPU as requested)
 # -------------------------
-# Primary summarizer: higher-quality model
-MODEL_NAME = "facebook/bart-large-cnn"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=-1)  # CPU
-# Small instruction model to choose length/tone when "auto" is requested
-PARAM_MODEL_NAME = "google/flan-t5-small"
-param_tokenizer = AutoTokenizer.from_pretrained(PARAM_MODEL_NAME)
-param_model = AutoModelForSeq2SeqLM.from_pretrained(PARAM_MODEL_NAME)
-param_generator = pipeline("text2text-generation", model=param_model, tokenizer=param_tokenizer, device=-1)  # CPU
 # -------------------------
-# Presets & helpers
 # -------------------------
 LENGTH_PRESETS = {
     "short": {"min_length": 20, "max_length": 60},
@@ -34,6 +54,34 @@ LENGTH_PRESETS = {
     "long": {"min_length": 130, "max_length": 300},
 }
 def chunk_text_by_chars(text, max_chars=1500, overlap=200):
     if len(text) <= max_chars:
         return [text]
@@ -47,168 +95,234 @@ def chunk_text_by_chars(text, max_chars=1500, overlap=200):
             end = start + nl
             chunk = text[start:end]
         parts.append(chunk.strip())
-        start = end - overlap
     return parts
-def apply_tone_instruction(text, tone):
     tone = (tone or "neutral").lower()
-    if tone == "formal":
-        instr = "Summarize in a formal, professional tone:"
     elif tone == "casual":
-        instr = "Summarize in a casual, conversational tone:"
-    elif tone == "bullet":
-        instr = "Summarize into short bullet points:"
     else:
-        instr = "Summarize:"
-    return f"{instr}\n\n{text}"
-# small regex int extractor
-def _first_int_from_text(s, fallback=None):
-    m = re.search(r"\d{1,5}", s)
-    return int(m.group()) if m else fallback
 def generate_summarization_config(text):
     """
-    Fast, robust parameter generator that prefers quick generation settings to avoid worker timeouts.
-    If the param model fails or is slow, we fall back to heuristics.
     """
-    # short prompt (keeps prompt length bounded)
     prompt = (
-        "You are a helpful assistant that recommends summarization settings.\n"
-        "Given the following source text, pick a summary LENGTH category (short/medium/long), "
-        "an estimated MIN and MAX length in words for the summary, and a TONE (neutral/formal/casual/bullet).\n"
-        "Respond ONLY in compact JSON (single line):\n"
         '{"length":"short|medium|long","min_words":MIN,"max_words":MAX,"tone":"neutral|formal|casual|bullet"}\n\n'
         "Text:\n'''"
-        + (text[:3000])  # limit prompt size so generation is fast
-        + "'''"
     )
     try:
-        # IMPORTANT: use max_new_tokens (not max_length), small beam or sampling off,
-        # and a small token limit to keep latency low on CPU.
         gen = param_generator(
             prompt,
-            max_new_tokens=64,   # keep short
-            num_beams=1,         # avoid expensive beam-search
-            do_sample=False      # deterministic and typically faster for small models
         )
         out = gen[0].get("generated_text", "").strip()
-        # try to extract JSON substring
         cfg = None
         try:
             cfg = json.loads(out)
         except Exception:
-            jmatch = re.search(r"\{.*\}", out, re.DOTALL)
-            if jmatch:
-                raw = jmatch.group().replace("'", '"')
                 cfg = json.loads(raw)
         if not cfg:
-            raise ValueError("Failed to parse param-generator output")
         length = cfg.get("length", "").lower()
         tone = cfg.get("tone", "").lower()
         min_w = cfg.get("min_words")
         max_w = cfg.get("max_words")
-        # normalize & fallback rules
         if length not in ("short", "medium", "long"):
             words = len(text.split())
             length = "short" if words < 150 else ("medium" if words < 800 else "long")
         if tone not in ("neutral", "formal", "casual", "bullet"):
             tone = "neutral"
         defaults = {"short": (15, 50), "medium": (50, 130), "long": (130, 300)}
-        dmin, dmax = defaults[length]
         min_len = int(min_w) if isinstance(min_w, int) else dmin
         max_len = int(max_w) if isinstance(max_w, int) else dmax
-        # clamp to sane bounds
         min_len = max(5, min(min_len, 2000))
         max_len = max(min_len + 5, min(max_len, 4000))
         return {"length": length, "min_length": min_len, "max_length": max_len, "tone": tone}
     except Exception as e:
-        # log the error and fallback to quick heuristic
-        logger.exception("param-generator failed or timed out, falling back to heuristic: %s", str(e))
         words = len(text.split())
         length = "short" if words < 150 else ("medium" if words < 800 else "long")
         fallback = {"short": (15, 50), "medium": (50, 130), "long": (130, 300)}
         mn, mx = fallback[length]
         return {"length": length, "min_length": mn, "max_length": mx, "tone": "neutral"}
 # -------------------------
 # Routes
 # -------------------------
 @app.route("/")
 def home():
-    # expects templates/index.html to exist (your frontend)
     return render_template("index.html")
 @app.route("/summarize", methods=["POST"])
 def summarize_route():
-    start_time = time.time()
     data = request.get_json(force=True)
-    text = data.get("text", "")[:20000]  # cap input
     requested_length = (data.get("length") or "medium").lower()
     requested_tone = (data.get("tone") or "neutral").lower()
     if not text or len(text.split()) < 5:
         return jsonify({"error": "Input too short."}), 400
-    # If user asks AI to choose settings
     if requested_length in ("auto", "ai") or requested_tone in ("auto", "ai"):
         cfg = generate_summarization_config(text)
-        length = cfg.get("length", "medium")
-        tone = cfg.get("tone", "neutral")
         preset_min = cfg.get("min_length")
         preset_max = cfg.get("max_length")
-        preset = LENGTH_PRESETS.get(length, LENGTH_PRESETS["medium"])
     else:
-        length = requested_length if requested_length in LENGTH_PRESETS else "medium"
-        tone = requested_tone if requested_tone in ("neutral", "formal", "casual", "bullet") else "neutral"
-        preset = LENGTH_PRESETS.get(length, LENGTH_PRESETS["medium"])
-        preset_min = preset["min_length"]
-        preset_max = preset["max_length"]
-    # chunk input for long texts
-    chunks = chunk_text_by_chars(text, max_chars=1500, overlap=200)
-    summaries = []
-    for chunk in chunks:
-        prompted = apply_tone_instruction(chunk, tone)
-        min_l = int(preset_min) if preset_min is not None else preset["min_length"]
-        max_l = int(preset_max) if preset_max is not None else preset["max_length"]
-        out = summarizer(
-            prompted,
-            min_length=min_l,
-            max_length=max_l,
-            truncation=True
-        )[0]["summary_text"]
-        summaries.append(out.strip())
-    if len(summaries) == 1:
-        final = summaries[0]
     else:
-        combined = "\n\n".join(summaries)
-        prompted = apply_tone_instruction(combined, tone)
-        final = summarizer(
-            prompted,
-            min_length=preset["min_length"],
-            max_length=preset["max_length"],
-            truncation=True
-        )[0]["summary_text"]
-    if tone == "bullet":
-        lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
-        final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
-    elapsed = time.time() - start_time
-    return jsonify({"summary": final, "meta": {"length_choice": length, "tone": tone, "time_seconds": round(elapsed, 2)}})
 if __name__ == "__main__":
-    # keep debug off in production; using CPU as requested
-    app.run(host="0.0.0.0", port=7860, debug=True)

 import os
 os.environ['HF_HOME'] = '/tmp'
+import time
+import json
+import re
 import logging
+from collections import Counter
+from flask import Flask, request, jsonify, render_template
+import torch
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    pipeline
+)
+# -------------------------
+# Basic app + logging
+# -------------------------
 app = Flask(__name__)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("summarizer")
+# -------------------------
+# Device selection (GPU if available)
+# -------------------------
+USE_GPU = torch.cuda.is_available()
+DEVICE = 0 if USE_GPU else -1
+logger.info("CUDA available: %s. Using device: %s", USE_GPU, DEVICE)
 # -------------------------
+# Models (quality-first)
 # -------------------------
+# Primary summarizer (higher-quality model)
+SUMMARIZER_MODEL = "facebook/bart-large-cnn"           # quality-focused
+summ_tokenizer = AutoTokenizer.from_pretrained(SUMMARIZER_MODEL)
+summ_model = AutoModelForSeq2SeqLM.from_pretrained(SUMMARIZER_MODEL)
+summarizer = pipeline("summarization", model=summ_model, tokenizer=summ_tokenizer, device=DEVICE)
+# Parameter-generator (small instruction model to "think" and choose settings)
+# We keep this compact but capable. If you later want stronger reasoning, swap to flan-t5-base.
+PARAM_MODEL = "google/flan-t5-small"
+param_tokenizer = AutoTokenizer.from_pretrained(PARAM_MODEL)
+param_model = AutoModelForSeq2SeqLM.from_pretrained(PARAM_MODEL)
+param_generator = pipeline("text2text-generation", model=param_model, tokenizer=param_tokenizer, device=DEVICE)
 # -------------------------
+# Presets & utilities
 # -------------------------
 LENGTH_PRESETS = {
     "short": {"min_length": 20, "max_length": 60},
     "long": {"min_length": 130, "max_length": 300},
 }
+# Simple sentence splitter and extractive prefilter (helps focus abstractive model)
+_STOPWORDS = {
+    "the","and","is","in","to","of","a","that","it","on","for","as","are","with","was","be","by","this","an","or","from","at","which","we","has","have"
+}
+def tokenize_sentences(text):
+    sents = re.split(r'(?<=[.!?])\s+', text.strip())
+    return [s.strip() for s in sents if s.strip()]
+def extractive_prefilter(text, top_k=12):
+    """
+    Rank sentences by (non-stopword) word-frequency and return top_k sentences
+    in original order joined. Useful for very long inputs.
+    """
+    sents = tokenize_sentences(text)
+    if len(sents) <= top_k:
+        return text
+    words = re.findall(r"\w+", text.lower())
+    freqs = Counter(w for w in words if w not in _STOPWORDS)
+    scores = []
+    for i, s in enumerate(sents):
+        ws = re.findall(r"\w+", s.lower())
+        score = sum(freqs.get(w, 0) for w in ws)
+        scores.append((score, i, s))
+    scores.sort(reverse=True)
+    chosen = [s for _, _, s in sorted(scores[:top_k], key=lambda t: t[1])]
+    return " ".join(chosen)
 def chunk_text_by_chars(text, max_chars=1500, overlap=200):
     if len(text) <= max_chars:
         return [text]
             end = start + nl
             chunk = text[start:end]
         parts.append(chunk.strip())
+        start = max(end - overlap, end)  # move forward with overlap
     return parts
+def apply_tone_instruction(text, tone, target_sentences=None):
+    """
+    Build a clear instruction prompt for the summarizer based on tone/length.
+    """
     tone = (tone or "neutral").lower()
+    if tone == "bullet":
+        instr = "Produce concise bullet points. Each bullet should be short (<=20 words) and focused. No extra commentary."
+    elif tone == "short":
+        ts = target_sentences or 1
+        instr = f"Summarize the content in {ts} sentence{'s' if ts>1 else ''}. Be highly abstractive and avoid copying sentences verbatim."
+    elif tone == "formal":
+        instr = "Summarize in a formal, professional tone in 2-4 sentences. Keep it precise and well-structured."
     elif tone == "casual":
+        instr = "Summarize in a casual, conversational tone in 1-3 sentences. Use plain, friendly language."
+    elif tone == "long":
+        instr = "Provide a clear, structured summary in 4-8 sentences, covering key points and relevant context."
     else:
+        instr = "Summarize the content in 2-3 sentences. Be clear and concise."
+    instr += " Do not repeat the same information. Prefer rephrasing over copying."
+    return f"{instr}\n\nText:\n{text}"
+# helper: extract first integer
+def _first_int_from_text(s, fallback=None):
+    m = re.search(r"\d{1,4}", s)
+    return int(m.group()) if m else fallback
+# -------------------------
+# Parameter generator (AI "thinking" module)
+# -------------------------
 def generate_summarization_config(text):
     """
+    Use the instruction model to recommend: length(short|medium|long), min_words, max_words, tone.
+    Falls back to heuristics on failure.
     """
     prompt = (
+        "You are an assistant that recommends optimal summarization settings.\n"
+        "Given the text, respond ONLY with single-line JSON EXACTLY like:\n"
         '{"length":"short|medium|long","min_words":MIN,"max_words":MAX,"tone":"neutral|formal|casual|bullet"}\n\n'
         "Text:\n'''"
+        + text[:4000] +
+        "'''"
     )
     try:
+        # keep generation short and deterministic; use max_new_tokens (avoid max_length)
         gen = param_generator(
             prompt,
+            max_new_tokens=64,
+            num_beams=1,
+            do_sample=False,
+            early_stopping=True
         )
         out = gen[0].get("generated_text", "").strip()
+        # attempt JSON parse
         cfg = None
         try:
             cfg = json.loads(out)
         except Exception:
+            j = re.search(r"\{.*\}", out, re.DOTALL)
+            if j:
+                raw = j.group().replace("'", '"')
                 cfg = json.loads(raw)
         if not cfg:
+            raise ValueError("Param-generator output not parseable")
         length = cfg.get("length", "").lower()
         tone = cfg.get("tone", "").lower()
         min_w = cfg.get("min_words")
         max_w = cfg.get("max_words")
         if length not in ("short", "medium", "long"):
             words = len(text.split())
             length = "short" if words < 150 else ("medium" if words < 800 else "long")
         if tone not in ("neutral", "formal", "casual", "bullet"):
             tone = "neutral"
+        if not isinstance(min_w, int):
+            min_w = _first_int_from_text(out, fallback=None)
+        if not isinstance(max_w, int):
+            max_w = _first_int_from_text(out[::-1], fallback=None)
         defaults = {"short": (15, 50), "medium": (50, 130), "long": (130, 300)}
+        dmin, dmax = defaults.get(length, (50,130))
         min_len = int(min_w) if isinstance(min_w, int) else dmin
         max_len = int(max_w) if isinstance(max_w, int) else dmax
         min_len = max(5, min(min_len, 2000))
         max_len = max(min_len + 5, min(max_len, 4000))
+        logger.info("Param-generator chose: length=%s tone=%s min=%s max=%s", length, tone, min_len, max_len)
         return {"length": length, "min_length": min_len, "max_length": max_len, "tone": tone}
     except Exception as e:
+        logger.exception("Param-generator failed; falling back to heuristic: %s", str(e))
         words = len(text.split())
         length = "short" if words < 150 else ("medium" if words < 800 else "long")
         fallback = {"short": (15, 50), "medium": (50, 130), "long": (130, 300)}
         mn, mx = fallback[length]
         return {"length": length, "min_length": mn, "max_length": mx, "tone": "neutral"}
+# -------------------------
+# Two-stage summarization helpers
+# -------------------------
+def refine_and_combine(summaries_list, tone, final_target_sentences=None):
+    """
+    Combine chunk summaries and perform a refinement pass to produce cohesive final summary.
+    """
+    combined = "\n\n".join(summaries_list)
+    if len(combined.split()) > 2000:
+        combined = extractive_prefilter(combined, top_k=20)
+    prompt = apply_tone_instruction(combined, tone, target_sentences=final_target_sentences)
+    # heuristics for min/max
+    tgt_sent = final_target_sentences or 3
+    gen_kwargs = {
+        "min_length": max(20, int(tgt_sent * 8)),
+        "max_length": max(60, int(tgt_sent * 30)),
+        "num_beams": 6,
+        "early_stopping": True,
+        "no_repeat_ngram_size": 3,
+        "do_sample": False,
+    }
+    try:
+        out = summarizer(prompt, **gen_kwargs)[0]["summary_text"].strip()
+        return out
+    except Exception as e:
+        logger.exception("Refine step failed: %s", e)
+        return " ".join(summaries_list[:3])
 # -------------------------
 # Routes
 # -------------------------
 @app.route("/")
 def home():
+    # Ensure you have templates/index.html in place
     return render_template("index.html")
 @app.route("/summarize", methods=["POST"])
 def summarize_route():
+    t0 = time.time()
     data = request.get_json(force=True)
+    text = (data.get("text") or "")[:60000]  # cap input to reasonable size
     requested_length = (data.get("length") or "medium").lower()
     requested_tone = (data.get("tone") or "neutral").lower()
     if not text or len(text.split()) < 5:
         return jsonify({"error": "Input too short."}), 400
+    # 1) Decide settings (AI or explicit)
     if requested_length in ("auto", "ai") or requested_tone in ("auto", "ai"):
         cfg = generate_summarization_config(text)
+        length_choice = cfg.get("length", "medium")
+        tone_choice = cfg.get("tone", "neutral")
         preset_min = cfg.get("min_length")
         preset_max = cfg.get("max_length")
     else:
+        length_choice = requested_length if requested_length in ("short","medium","long") else "medium"
+        tone_choice = requested_tone if requested_tone in ("neutral","formal","casual","bullet","short","long") else "neutral"
+        preset_min = LENGTH_PRESETS.get(length_choice, LENGTH_PRESETS["medium"])["min_length"]
+        preset_max = LENGTH_PRESETS.get(length_choice, LENGTH_PRESETS["medium"])["max_length"]
+    # Map chosen length to target final sentences
+    sentence_map = {"short": 1, "medium": 3, "long": 6}
+    final_target_sentences = sentence_map.get(length_choice, 3)
+    # 2) Prefilter if extremely long
+    words_len = len(text.split())
+    if words_len > 3500:
+        text_for_chunks = extractive_prefilter(text, top_k=40)
     else:
+        text_for_chunks = text
+    # 3) Chunking
+    chunks = chunk_text_by_chars(text_for_chunks, max_chars=1400, overlap=200)
+    chunk_summaries = []
+    # 4) Summarize each chunk
+    for chunk in chunks:
+        chunk_target = 1 if length_choice == "short" else 2
+        chunk_tone = tone_choice if tone_choice in ("formal","casual","bullet") else "neutral"
+        prompt = apply_tone_instruction(chunk, chunk_tone, target_sentences=chunk_target)
+        gen_kwargs = {
+            "min_length": 12 if chunk_target == 1 else 24,
+            "max_length": 60 if chunk_target == 1 else 120,
+            "num_beams": 5,
+            "early_stopping": True,
+            "no_repeat_ngram_size": 3,
+            "do_sample": False,
+        }
+        try:
+            out = summarizer(prompt, **gen_kwargs)[0]["summary_text"].strip()
+        except Exception as e:
+            logger.exception("Chunk summarization failed, using extractive fallback: %s", e)
+            out = extractive_prefilter(chunk, top_k=3)
+        chunk_summaries.append(out)
+    # 5) Combine & refine
+    final = refine_and_combine(chunk_summaries, tone_choice, final_target_sentences=final_target_sentences)
+    # 6) Post-process for bullet tone
+    if tone_choice == "bullet":
+        parts = re.split(r'[\n\r]+|(?:\.\s+)|(?:;\s+)', final)
+        bullets = [f"- {p.strip().rstrip('.')}" for p in parts if p.strip()]
+        final = "\n".join(bullets[:20])
+    elapsed = time.time() - t0
+    meta = {
+        "length_choice": length_choice,
+        "tone": tone_choice,
+        "chunks": len(chunks),
+        "input_words": words_len,
+        "time_seconds": round(elapsed, 2),
+        "device": ("gpu" if USE_GPU else "cpu")
+    }
+    return jsonify({"summary": final, "meta": meta})
+# -------------------------
+# Run
+# -------------------------
 if __name__ == "__main__":
+    # In production use Gunicorn; debug True here only for local testing
+    app.run(host="0.0.0.0", port=7860, debug=False)