AIprojects / main.py
sayanAIAI's picture
Update main.py
a323f1e verified
raw
history blame
3.16 kB
import os
os.environ['HF_HOME'] = '/tmp'
from flask import Flask, request, jsonify, render_template
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import math, textwrap
app = Flask(__name__)
MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=-1) # set device appropriately
# Simple mapping of presets to generation lengths
LENGTH_PRESETS = {
"short": {"min_length": 20, "max_length": 60},
"medium": {"min_length": 60, "max_length": 130},
"long": {"min_length": 130, "max_length": 300},
}
def chunk_text_by_chars(text, max_chars=1500, overlap=200):
if len(text) <= max_chars:
return [text]
parts = []
start = 0
while start < len(text):
end = min(len(text), start + max_chars)
chunk = text[start:end]
nl = chunk.rfind('\n')
if nl > max_chars * 0.6:
end = start + nl
chunk = text[start:end]
parts.append(chunk.strip())
start = end - overlap
return parts
def apply_tone_instruction(text, tone):
tone = (tone or "neutral").lower()
if tone == "formal":
instr = "Summarize in a formal, professional tone:"
elif tone == "casual":
instr = "Summarize in a casual, conversational tone:"
elif tone == "bullet":
instr = "Summarize into short bullet points:"
else:
instr = "Summarize:"
return f"{instr}\n\n{text}"
# NEW: Route to show summarizer.html (fixes 404)
@app.route("/")
def home():
return render_template("index.html")
@app.route("/summarize", methods=["POST"])
def summarize_route():
data = request.get_json(force=True)
text = data.get("text", "")[:20000]
length = data.get("length", "medium")
tone = data.get("tone", "neutral")
if not text or len(text.split()) < 5:
return jsonify({"error": "Input too short."}), 400
preset = LENGTH_PRESETS.get(length, LENGTH_PRESETS["medium"])
chunks = chunk_text_by_chars(text, max_chars=1500, overlap=200)
summaries = []
for chunk in chunks:
prompted = apply_tone_instruction(chunk, tone)
out = summarizer(
prompted,
min_length=preset["min_length"],
max_length=preset["max_length"],
truncation=True
)[0]["summary_text"]
summaries.append(out.strip())
if len(summaries) == 1:
final = summaries[0]
else:
combined = "\n\n".join(summaries)
prompted = apply_tone_instruction(combined, tone)
final = summarizer(
prompted,
min_length=preset["min_length"],
max_length=preset["max_length"],
truncation=True
)[0]["summary_text"]
if tone == "bullet":
lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
return jsonify({"summary": final})
if __name__ == "__main__":
app.run(debug=True, port=7860)