JaceWei commited on
Commit
e8b0b7e
·
1 Parent(s): bcced15
Files changed (1) hide show
  1. posterbuilder/convert.py +101 -11
posterbuilder/convert.py CHANGED
@@ -77,7 +77,92 @@ GREEK_OR_MATH_MACROS = (
77
  r"Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega"
78
  )
79
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  # ===================== 基础工具 =====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def normalize_textit_math(s: str) -> str:
83
  """
@@ -153,29 +238,25 @@ def fix_latex_escaped_commands(s: str) -> str:
153
  s = s.replace("\\}", "}")
154
  return s
155
 
156
- import re
157
 
158
  def escape_text(s: str) -> str:
159
  if not s:
160
  return ""
161
 
162
- # --- 1) 捕获 math: $...$ 或 \( ... \) ----
163
  math_blocks = []
164
  def store_math(m):
165
  math_blocks.append(m.group(0))
166
  return f"\0{len(math_blocks)-1}\0"
167
 
168
- # 识别 $...$ 和 \( ... \)
169
- s = re.sub(r"\${1,2}.*?\${1,2}|\\\(.+?\\\)", store_math, s)
170
 
171
  # --- 2) 转义文本字符(不碰 math) ----
172
  rep = {
173
  "&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#",
174
  "_": r"\_", "{": r"\{", "}": r"\}",
175
- "~": r"~{}", # 保持 LaTeX 不换行空格
176
- "^": r"\^{}",
177
  }
178
-
179
  for k, v in rep.items():
180
  s = s.replace(k, v)
181
 
@@ -187,6 +268,7 @@ def escape_text(s: str) -> str:
187
 
188
 
189
 
 
190
  def soft_wrap_title_for_logo(title: str, first_limit=68, next_limit=72) -> str:
191
  if not title or len(title) <= first_limit: return title
192
  def break_at(s: str, limit: int):
@@ -230,17 +312,25 @@ def format_content_to_latex(content: str) -> str:
230
  """格式化正文内容,自动修复 LaTeX 命令"""
231
  if not content:
232
  return ""
233
- # 🔧 新增:先修复由于 \t 被误解析的 LaTeX 命令
 
234
  content = fix_latex_escaped_commands(content)
235
- # NEW: 规范 \textit{...} 中的“伪数学”写法
 
236
  content = normalize_textit_math(content)
237
 
 
 
 
 
238
  lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
239
  if lines and all(ln.startswith(("-", "•")) for ln in lines):
240
  items = [escape_text(ln.lstrip("-• ").strip()) for ln in lines]
241
  return "\n".join(["\\begin{itemize}"] + [f"\\item {it}" for it in items] + ["\\end{itemize}"])
 
242
  return escape_text(" ".join(lines))
243
 
 
244
  def make_block(title: str, content: str, figures_tex: str = "") -> str:
245
  body = format_content_to_latex(content or "")
246
  if figures_tex: body = (body + "\n\n" if body else "") + figures_tex
@@ -581,8 +671,8 @@ def build():
581
  cleaned_tex = cleaned_tex.replace(r"\{", "{")
582
  cleaned_tex = cleaned_tex.replace(r"\}", "}")
583
  # 注意:要先处理上面的大括号再处理反斜杠,否则会提前破坏结构
584
- cleaned_tex = cleaned_tex.replace(r"\\\\", r"\\") # 避免双转义干扰
585
- cleaned_tex = cleaned_tex.replace(r"\\", "\\") # 最终将 \\ → \
586
  cleaned_tex = cleaned_tex.replace(r"\t\t", "\\t")
587
  cleaned_tex = strip_stray_t(cleaned_tex)
588
 
 
77
  r"Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega"
78
  )
79
 
80
+ MATH_INLINE_MACROS = (
81
+ GREEK_OR_MATH_MACROS
82
+ + r"|partial|nabla|infty|cdot|times|pm|leq|geq|ldots|dots"
83
+ )
84
+
85
+ _MACRO_OUTSIDE_MATH_RE = re.compile(
86
+ rf"(\\(?:{MATH_INLINE_MACROS}))" # \delta / \tau / \cdot / ...
87
+ rf"(?:\s*[A-Za-z])?", # 允许后面紧跟一个字母变量(如 \delta c)
88
+ )
89
+
90
+ _BULLET_RE = re.compile(r"•")
91
+
92
  # ===================== 基础工具 =====================
93
+ # 覆盖的数学块(全局已有 MATH_BLOCK_RE,可以复用)
94
+ def wrap_math_macros_outside_math(s: str) -> str:
95
+ """
96
+ 目的:在“非数学环境”里遇到数学宏时,自动加上 $...$。
97
+ 例如:\delta c -> $\delta c$
98
+ \tau -> $\tau$
99
+ 已有的 $...$ / \[...\] / \(...\) 不会被二次处理(先暂存)。
100
+ """
101
+ if not s:
102
+ return s
103
+
104
+ # 1) 暂存已有数学块
105
+ stash = []
106
+ def _hide(m):
107
+ stash.append(m.group(0))
108
+ return f"\x00M{len(stash)-1}\x00"
109
+ s_hidden = MATH_BLOCK_RE.sub(_hide, s)
110
+
111
+ # 2) 把裸奔宏包进 $...$
112
+ def _wrap(m):
113
+ return f"${m.group(0)}$"
114
+ s_hidden = _MACRO_OUTSIDE_MATH_RE.sub(_wrap, s_hidden)
115
+
116
+ # 3) 还原数学块
117
+ for i, blk in enumerate(stash):
118
+ s_hidden = s_hidden.replace(f"\x00M{i}\x00", blk)
119
+
120
+ return s_hidden
121
+
122
+
123
+ def wrap_math_macros_outside_math(s: str) -> str:
124
+ """
125
+ 目的:在“非数学环境”里遇到数学宏时,自动加上 $...$。
126
+ 例如:\delta c -> $\delta c$
127
+ \tau -> $\tau$
128
+ 已有的 $...$ / \[...\] / \(...\) 不会被二次处理(先暂存)。
129
+ """
130
+ if not s:
131
+ return s
132
+
133
+ # 1) 暂存已有数学块
134
+ stash = []
135
+ def _hide(m):
136
+ stash.append(m.group(0))
137
+ return f"\x00M{len(stash)-1}\x00"
138
+ s_hidden = MATH_BLOCK_RE.sub(_hide, s)
139
+
140
+ # 2) 把裸奔宏包进 $...$
141
+ def _wrap(m):
142
+ return f"${m.group(0)}$"
143
+ s_hidden = _MACRO_OUTSIDE_MATH_RE.sub(_wrap, s_hidden)
144
+
145
+ # 3) 还原数学块
146
+ for i, blk in enumerate(stash):
147
+ s_hidden = s_hidden.replace(f"\x00M{i}\x00", blk)
148
+
149
+ return s_hidden
150
+
151
+
152
+ _BULLET_RE = re.compile(r"•")
153
+
154
+ def normalize_inline_bullets(s: str) -> str:
155
+ """
156
+ 把 Unicode 的 • 统一替换为 LaTeX 的 \\textbullet{},并确保两侧留空格。
157
+ """
158
+ if not s:
159
+ return s
160
+ s = _BULLET_RE.sub(r"\\textbullet{}", s)
161
+ # 若两侧无空格,补空格(避免“黏连”)
162
+ s = re.sub(r"(?<=\S)\\textbullet\{\}(?=\S)", r" \\textbullet{} ", s)
163
+ s = re.sub(r"\\textbullet\{\}(?=\S)", r"\\textbullet{} ", s)
164
+ s = re.sub(r"(?<=\S)\\textbullet\{\}", r" \\textbullet{}", s)
165
+ return s
166
 
167
  def normalize_textit_math(s: str) -> str:
168
  """
 
238
  s = s.replace("\\}", "}")
239
  return s
240
 
 
241
 
242
  def escape_text(s: str) -> str:
243
  if not s:
244
  return ""
245
 
246
+ # --- 1) 捕获所有数学块(沿用全局 MATH_BLOCK_RE)----
247
  math_blocks = []
248
  def store_math(m):
249
  math_blocks.append(m.group(0))
250
  return f"\0{len(math_blocks)-1}\0"
251
 
252
+ s = MATH_BLOCK_RE.sub(store_math, s)
 
253
 
254
  # --- 2) 转义文本字符(不碰 math) ----
255
  rep = {
256
  "&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#",
257
  "_": r"\_", "{": r"\{", "}": r"\}",
258
+ "~": r"~{}", "^": r"\^{}",
 
259
  }
 
260
  for k, v in rep.items():
261
  s = s.replace(k, v)
262
 
 
268
 
269
 
270
 
271
+
272
  def soft_wrap_title_for_logo(title: str, first_limit=68, next_limit=72) -> str:
273
  if not title or len(title) <= first_limit: return title
274
  def break_at(s: str, limit: int):
 
312
  """格式化正文内容,自动修复 LaTeX 命令"""
313
  if not content:
314
  return ""
315
+
316
+ # 1) 先修复 \t 造成的命令断头
317
  content = fix_latex_escaped_commands(content)
318
+
319
+ # 2) 规范 \textit{...} 里的“伪数学”
320
  content = normalize_textit_math(content)
321
 
322
+ # 3) **把非数学环境的数学宏包进 $...$** ← NEW(修正 \delta c)
323
+ content = wrap_math_macros_outside_math(content)
324
+
325
+ # 之后再进行 itemize 的识别与转义
326
  lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
327
  if lines and all(ln.startswith(("-", "•")) for ln in lines):
328
  items = [escape_text(ln.lstrip("-• ").strip()) for ln in lines]
329
  return "\n".join(["\\begin{itemize}"] + [f"\\item {it}" for it in items] + ["\\end{itemize}"])
330
+
331
  return escape_text(" ".join(lines))
332
 
333
+
334
  def make_block(title: str, content: str, figures_tex: str = "") -> str:
335
  body = format_content_to_latex(content or "")
336
  if figures_tex: body = (body + "\n\n" if body else "") + figures_tex
 
671
  cleaned_tex = cleaned_tex.replace(r"\{", "{")
672
  cleaned_tex = cleaned_tex.replace(r"\}", "}")
673
  # 注意:要先处理上面的大括号再处理反斜杠,否则会提前破坏结构
674
+ # cleaned_tex = cleaned_tex.replace(r"\\\\", r"\\") # 避免双转义干扰
675
+ # cleaned_tex = cleaned_tex.replace(r"\\", "\\") # 最终将 \\ → \
676
  cleaned_tex = cleaned_tex.replace(r"\t\t", "\\t")
677
  cleaned_tex = strip_stray_t(cleaned_tex)
678