Spaces:
Runtime error
Runtime error
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
os.environ['HF_HOME'] = '/tmp'
|
| 3 |
-
|
| 4 |
-
from flask import Flask, request, jsonify
|
| 5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 6 |
import math, textwrap
|
| 7 |
|
|
@@ -14,23 +14,21 @@ summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=
|
|
| 14 |
|
| 15 |
# Simple mapping of presets to generation lengths
|
| 16 |
LENGTH_PRESETS = {
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
}
|
| 21 |
|
| 22 |
def chunk_text_by_chars(text, max_chars=1500, overlap=200):
|
| 23 |
-
if len(text) <= max_chars:
|
| 24 |
return [text]
|
| 25 |
parts = []
|
| 26 |
start = 0
|
| 27 |
while start < len(text):
|
| 28 |
end = min(len(text), start + max_chars)
|
| 29 |
-
# try to break at newline or sentence boundary for nicer chunking
|
| 30 |
chunk = text[start:end]
|
| 31 |
-
# extend to nearest newline if possible (avoid cutting sentences)
|
| 32 |
nl = chunk.rfind('\n')
|
| 33 |
-
if nl > max_chars*0.6:
|
| 34 |
end = start + nl
|
| 35 |
chunk = text[start:end]
|
| 36 |
parts.append(chunk.strip())
|
|
@@ -49,10 +47,15 @@ def apply_tone_instruction(text, tone):
|
|
| 49 |
instr = "Summarize:"
|
| 50 |
return f"{instr}\n\n{text}"
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
@app.route("/summarize", methods=["POST"])
|
| 53 |
def summarize_route():
|
| 54 |
data = request.get_json(force=True)
|
| 55 |
-
text = data.get("text", "")[:20000]
|
| 56 |
length = data.get("length", "medium")
|
| 57 |
tone = data.get("tone", "neutral")
|
| 58 |
|
|
@@ -65,25 +68,26 @@ def summarize_route():
|
|
| 65 |
|
| 66 |
for chunk in chunks:
|
| 67 |
prompted = apply_tone_instruction(chunk, tone)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
| 73 |
summaries.append(out.strip())
|
| 74 |
|
| 75 |
-
# If multiple chunk summaries, join and compress once more
|
| 76 |
if len(summaries) == 1:
|
| 77 |
final = summaries[0]
|
| 78 |
else:
|
| 79 |
combined = "\n\n".join(summaries)
|
| 80 |
prompted = apply_tone_instruction(combined, tone)
|
| 81 |
-
final = summarizer(
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
| 85 |
|
| 86 |
-
# if bullet tone, post-process
|
| 87 |
if tone == "bullet":
|
| 88 |
lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
|
| 89 |
final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
|
|
@@ -91,5 +95,4 @@ def summarize_route():
|
|
| 91 |
return jsonify({"summary": final})
|
| 92 |
|
| 93 |
if __name__ == "__main__":
|
| 94 |
-
app.run(debug=True,port=7860)
|
| 95 |
-
|
|
|
|
| 1 |
import os
|
| 2 |
os.environ['HF_HOME'] = '/tmp'
|
| 3 |
+
|
| 4 |
+
from flask import Flask, request, jsonify, render_template
|
| 5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 6 |
import math, textwrap
|
| 7 |
|
|
|
|
| 14 |
|
| 15 |
# Simple mapping of presets to generation lengths
|
| 16 |
LENGTH_PRESETS = {
|
| 17 |
+
"short": {"min_length": 20, "max_length": 60},
|
| 18 |
+
"medium": {"min_length": 60, "max_length": 130},
|
| 19 |
+
"long": {"min_length": 130, "max_length": 300},
|
| 20 |
}
|
| 21 |
|
| 22 |
def chunk_text_by_chars(text, max_chars=1500, overlap=200):
|
| 23 |
+
if len(text) <= max_chars:
|
| 24 |
return [text]
|
| 25 |
parts = []
|
| 26 |
start = 0
|
| 27 |
while start < len(text):
|
| 28 |
end = min(len(text), start + max_chars)
|
|
|
|
| 29 |
chunk = text[start:end]
|
|
|
|
| 30 |
nl = chunk.rfind('\n')
|
| 31 |
+
if nl > max_chars * 0.6:
|
| 32 |
end = start + nl
|
| 33 |
chunk = text[start:end]
|
| 34 |
parts.append(chunk.strip())
|
|
|
|
| 47 |
instr = "Summarize:"
|
| 48 |
return f"{instr}\n\n{text}"
|
| 49 |
|
| 50 |
+
# NEW: Route to show summarizer.html (fixes 404)
|
| 51 |
+
@app.route("/")
|
| 52 |
+
def home():
|
| 53 |
+
return render_template("index.html")
|
| 54 |
+
|
| 55 |
@app.route("/summarize", methods=["POST"])
|
| 56 |
def summarize_route():
|
| 57 |
data = request.get_json(force=True)
|
| 58 |
+
text = data.get("text", "")[:20000]
|
| 59 |
length = data.get("length", "medium")
|
| 60 |
tone = data.get("tone", "neutral")
|
| 61 |
|
|
|
|
| 68 |
|
| 69 |
for chunk in chunks:
|
| 70 |
prompted = apply_tone_instruction(chunk, tone)
|
| 71 |
+
out = summarizer(
|
| 72 |
+
prompted,
|
| 73 |
+
min_length=preset["min_length"],
|
| 74 |
+
max_length=preset["max_length"],
|
| 75 |
+
truncation=True
|
| 76 |
+
)[0]["summary_text"]
|
| 77 |
summaries.append(out.strip())
|
| 78 |
|
|
|
|
| 79 |
if len(summaries) == 1:
|
| 80 |
final = summaries[0]
|
| 81 |
else:
|
| 82 |
combined = "\n\n".join(summaries)
|
| 83 |
prompted = apply_tone_instruction(combined, tone)
|
| 84 |
+
final = summarizer(
|
| 85 |
+
prompted,
|
| 86 |
+
min_length=preset["min_length"],
|
| 87 |
+
max_length=preset["max_length"],
|
| 88 |
+
truncation=True
|
| 89 |
+
)[0]["summary_text"]
|
| 90 |
|
|
|
|
| 91 |
if tone == "bullet":
|
| 92 |
lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
|
| 93 |
final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
|
|
|
|
| 95 |
return jsonify({"summary": final})
|
| 96 |
|
| 97 |
if __name__ == "__main__":
|
| 98 |
+
app.run(debug=True, port=7860)
|
|
|