sayanAIAI commited on
Commit
a323f1e
·
verified ·
1 Parent(s): fd8623d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +26 -23
main.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  os.environ['HF_HOME'] = '/tmp'
3
- # main.py (excerpt)
4
- from flask import Flask, request, jsonify
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
  import math, textwrap
7
 
@@ -14,23 +14,21 @@ summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=
14
 
15
  # Simple mapping of presets to generation lengths
16
  LENGTH_PRESETS = {
17
- "short": {"min_length": 20, "max_length": 60},
18
- "medium": {"min_length": 60, "max_length": 130},
19
- "long": {"min_length": 130, "max_length": 300},
20
  }
21
 
22
  def chunk_text_by_chars(text, max_chars=1500, overlap=200):
23
- if len(text) <= max_chars:
24
  return [text]
25
  parts = []
26
  start = 0
27
  while start < len(text):
28
  end = min(len(text), start + max_chars)
29
- # try to break at newline or sentence boundary for nicer chunking
30
  chunk = text[start:end]
31
- # extend to nearest newline if possible (avoid cutting sentences)
32
  nl = chunk.rfind('\n')
33
- if nl > max_chars*0.6:
34
  end = start + nl
35
  chunk = text[start:end]
36
  parts.append(chunk.strip())
@@ -49,10 +47,15 @@ def apply_tone_instruction(text, tone):
49
  instr = "Summarize:"
50
  return f"{instr}\n\n{text}"
51
 
 
 
 
 
 
52
  @app.route("/summarize", methods=["POST"])
53
  def summarize_route():
54
  data = request.get_json(force=True)
55
- text = data.get("text", "")[:20000] # safe cap
56
  length = data.get("length", "medium")
57
  tone = data.get("tone", "neutral")
58
 
@@ -65,25 +68,26 @@ def summarize_route():
65
 
66
  for chunk in chunks:
67
  prompted = apply_tone_instruction(chunk, tone)
68
- # call summarizer with min/max lengths
69
- out = summarizer(prompted,
70
- min_length=preset["min_length"],
71
- max_length=preset["max_length"],
72
- truncation=True)[0]["summary_text"]
 
73
  summaries.append(out.strip())
74
 
75
- # If multiple chunk summaries, join and compress once more
76
  if len(summaries) == 1:
77
  final = summaries[0]
78
  else:
79
  combined = "\n\n".join(summaries)
80
  prompted = apply_tone_instruction(combined, tone)
81
- final = summarizer(prompted,
82
- min_length=preset["min_length"],
83
- max_length=preset["max_length"],
84
- truncation=True)[0]["summary_text"]
 
 
85
 
86
- # if bullet tone, post-process
87
  if tone == "bullet":
88
  lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
89
  final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
@@ -91,5 +95,4 @@ def summarize_route():
91
  return jsonify({"summary": final})
92
 
93
  if __name__ == "__main__":
94
- app.run(debug=True,port=7860)
95
-
 
1
  import os
2
  os.environ['HF_HOME'] = '/tmp'
3
+
4
+ from flask import Flask, request, jsonify, render_template
5
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
6
  import math, textwrap
7
 
 
14
 
15
  # Simple mapping of presets to generation lengths
16
  LENGTH_PRESETS = {
17
+ "short": {"min_length": 20, "max_length": 60},
18
+ "medium": {"min_length": 60, "max_length": 130},
19
+ "long": {"min_length": 130, "max_length": 300},
20
  }
21
 
22
  def chunk_text_by_chars(text, max_chars=1500, overlap=200):
23
+ if len(text) <= max_chars:
24
  return [text]
25
  parts = []
26
  start = 0
27
  while start < len(text):
28
  end = min(len(text), start + max_chars)
 
29
  chunk = text[start:end]
 
30
  nl = chunk.rfind('\n')
31
+ if nl > max_chars * 0.6:
32
  end = start + nl
33
  chunk = text[start:end]
34
  parts.append(chunk.strip())
 
47
  instr = "Summarize:"
48
  return f"{instr}\n\n{text}"
49
 
50
+ # NEW: Route to show summarizer.html (fixes 404)
51
+ @app.route("/")
52
+ def home():
53
+ return render_template("index.html")
54
+
55
  @app.route("/summarize", methods=["POST"])
56
  def summarize_route():
57
  data = request.get_json(force=True)
58
+ text = data.get("text", "")[:20000]
59
  length = data.get("length", "medium")
60
  tone = data.get("tone", "neutral")
61
 
 
68
 
69
  for chunk in chunks:
70
  prompted = apply_tone_instruction(chunk, tone)
71
+ out = summarizer(
72
+ prompted,
73
+ min_length=preset["min_length"],
74
+ max_length=preset["max_length"],
75
+ truncation=True
76
+ )[0]["summary_text"]
77
  summaries.append(out.strip())
78
 
 
79
  if len(summaries) == 1:
80
  final = summaries[0]
81
  else:
82
  combined = "\n\n".join(summaries)
83
  prompted = apply_tone_instruction(combined, tone)
84
+ final = summarizer(
85
+ prompted,
86
+ min_length=preset["min_length"],
87
+ max_length=preset["max_length"],
88
+ truncation=True
89
+ )[0]["summary_text"]
90
 
 
91
  if tone == "bullet":
92
  lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
93
  final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
 
95
  return jsonify({"summary": final})
96
 
97
  if __name__ == "__main__":
98
+ app.run(debug=True, port=7860)