fix
Browse files- posterbuilder/convert.py +101 -11
posterbuilder/convert.py
CHANGED
|
@@ -77,7 +77,92 @@ GREEK_OR_MATH_MACROS = (
|
|
| 77 |
r"Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega"
|
| 78 |
)
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# ===================== 基础工具 =====================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
def normalize_textit_math(s: str) -> str:
|
| 83 |
"""
|
|
@@ -153,29 +238,25 @@ def fix_latex_escaped_commands(s: str) -> str:
|
|
| 153 |
s = s.replace("\\}", "}")
|
| 154 |
return s
|
| 155 |
|
| 156 |
-
import re
|
| 157 |
|
| 158 |
def escape_text(s: str) -> str:
|
| 159 |
if not s:
|
| 160 |
return ""
|
| 161 |
|
| 162 |
-
# --- 1)
|
| 163 |
math_blocks = []
|
| 164 |
def store_math(m):
|
| 165 |
math_blocks.append(m.group(0))
|
| 166 |
return f"\0{len(math_blocks)-1}\0"
|
| 167 |
|
| 168 |
-
|
| 169 |
-
s = re.sub(r"\${1,2}.*?\${1,2}|\\\(.+?\\\)", store_math, s)
|
| 170 |
|
| 171 |
# --- 2) 转义文本字符(不碰 math) ----
|
| 172 |
rep = {
|
| 173 |
"&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#",
|
| 174 |
"_": r"\_", "{": r"\{", "}": r"\}",
|
| 175 |
-
"~": r"~{}",
|
| 176 |
-
"^": r"\^{}",
|
| 177 |
}
|
| 178 |
-
|
| 179 |
for k, v in rep.items():
|
| 180 |
s = s.replace(k, v)
|
| 181 |
|
|
@@ -187,6 +268,7 @@ def escape_text(s: str) -> str:
|
|
| 187 |
|
| 188 |
|
| 189 |
|
|
|
|
| 190 |
def soft_wrap_title_for_logo(title: str, first_limit=68, next_limit=72) -> str:
|
| 191 |
if not title or len(title) <= first_limit: return title
|
| 192 |
def break_at(s: str, limit: int):
|
|
@@ -230,17 +312,25 @@ def format_content_to_latex(content: str) -> str:
|
|
| 230 |
"""格式化正文内容,自动修复 LaTeX 命令"""
|
| 231 |
if not content:
|
| 232 |
return ""
|
| 233 |
-
|
|
|
|
| 234 |
content = fix_latex_escaped_commands(content)
|
| 235 |
-
|
|
|
|
| 236 |
content = normalize_textit_math(content)
|
| 237 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
|
| 239 |
if lines and all(ln.startswith(("-", "•")) for ln in lines):
|
| 240 |
items = [escape_text(ln.lstrip("-• ").strip()) for ln in lines]
|
| 241 |
return "\n".join(["\\begin{itemize}"] + [f"\\item {it}" for it in items] + ["\\end{itemize}"])
|
|
|
|
| 242 |
return escape_text(" ".join(lines))
|
| 243 |
|
|
|
|
| 244 |
def make_block(title: str, content: str, figures_tex: str = "") -> str:
|
| 245 |
body = format_content_to_latex(content or "")
|
| 246 |
if figures_tex: body = (body + "\n\n" if body else "") + figures_tex
|
|
@@ -581,8 +671,8 @@ def build():
|
|
| 581 |
cleaned_tex = cleaned_tex.replace(r"\{", "{")
|
| 582 |
cleaned_tex = cleaned_tex.replace(r"\}", "}")
|
| 583 |
# 注意:要先处理上面的大括号再处理反斜杠,否则会提前破坏结构
|
| 584 |
-
cleaned_tex = cleaned_tex.replace(r"\\\\", r"\\") # 避免双转义干扰
|
| 585 |
-
cleaned_tex = cleaned_tex.replace(r"\\", "\\") # 最终将 \\ → \
|
| 586 |
cleaned_tex = cleaned_tex.replace(r"\t\t", "\\t")
|
| 587 |
cleaned_tex = strip_stray_t(cleaned_tex)
|
| 588 |
|
|
|
|
| 77 |
r"Gamma|Delta|Theta|Lambda|Xi|Pi|Sigma|Upsilon|Phi|Psi|Omega"
|
| 78 |
)
|
| 79 |
|
| 80 |
+
MATH_INLINE_MACROS = (
|
| 81 |
+
GREEK_OR_MATH_MACROS
|
| 82 |
+
+ r"|partial|nabla|infty|cdot|times|pm|leq|geq|ldots|dots"
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
_MACRO_OUTSIDE_MATH_RE = re.compile(
|
| 86 |
+
rf"(\\(?:{MATH_INLINE_MACROS}))" # \delta / \tau / \cdot / ...
|
| 87 |
+
rf"(?:\s*[A-Za-z])?", # 允许后面紧跟一个字母变量(如 \delta c)
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
_BULLET_RE = re.compile(r"•")
|
| 91 |
+
|
| 92 |
# ===================== 基础工具 =====================
|
| 93 |
+
# 覆盖的数学块(全局已有 MATH_BLOCK_RE,可以复用)
|
| 94 |
+
def wrap_math_macros_outside_math(s: str) -> str:
|
| 95 |
+
"""
|
| 96 |
+
目的:在“非数学环境”里遇到数学宏时,自动加上 $...$。
|
| 97 |
+
例如:\delta c -> $\delta c$
|
| 98 |
+
\tau -> $\tau$
|
| 99 |
+
已有的 $...$ / \[...\] / \(...\) 不会被二次处理(先暂存)。
|
| 100 |
+
"""
|
| 101 |
+
if not s:
|
| 102 |
+
return s
|
| 103 |
+
|
| 104 |
+
# 1) 暂存已有数学块
|
| 105 |
+
stash = []
|
| 106 |
+
def _hide(m):
|
| 107 |
+
stash.append(m.group(0))
|
| 108 |
+
return f"\x00M{len(stash)-1}\x00"
|
| 109 |
+
s_hidden = MATH_BLOCK_RE.sub(_hide, s)
|
| 110 |
+
|
| 111 |
+
# 2) 把裸奔宏包进 $...$
|
| 112 |
+
def _wrap(m):
|
| 113 |
+
return f"${m.group(0)}$"
|
| 114 |
+
s_hidden = _MACRO_OUTSIDE_MATH_RE.sub(_wrap, s_hidden)
|
| 115 |
+
|
| 116 |
+
# 3) 还原数学块
|
| 117 |
+
for i, blk in enumerate(stash):
|
| 118 |
+
s_hidden = s_hidden.replace(f"\x00M{i}\x00", blk)
|
| 119 |
+
|
| 120 |
+
return s_hidden
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def wrap_math_macros_outside_math(s: str) -> str:
|
| 124 |
+
"""
|
| 125 |
+
目的:在“非数学环境”里遇到数学宏时,自动加上 $...$。
|
| 126 |
+
例如:\delta c -> $\delta c$
|
| 127 |
+
\tau -> $\tau$
|
| 128 |
+
已有的 $...$ / \[...\] / \(...\) 不会被二次处理(先暂存)。
|
| 129 |
+
"""
|
| 130 |
+
if not s:
|
| 131 |
+
return s
|
| 132 |
+
|
| 133 |
+
# 1) 暂存已有数学块
|
| 134 |
+
stash = []
|
| 135 |
+
def _hide(m):
|
| 136 |
+
stash.append(m.group(0))
|
| 137 |
+
return f"\x00M{len(stash)-1}\x00"
|
| 138 |
+
s_hidden = MATH_BLOCK_RE.sub(_hide, s)
|
| 139 |
+
|
| 140 |
+
# 2) 把裸奔宏包进 $...$
|
| 141 |
+
def _wrap(m):
|
| 142 |
+
return f"${m.group(0)}$"
|
| 143 |
+
s_hidden = _MACRO_OUTSIDE_MATH_RE.sub(_wrap, s_hidden)
|
| 144 |
+
|
| 145 |
+
# 3) 还原数学块
|
| 146 |
+
for i, blk in enumerate(stash):
|
| 147 |
+
s_hidden = s_hidden.replace(f"\x00M{i}\x00", blk)
|
| 148 |
+
|
| 149 |
+
return s_hidden
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
_BULLET_RE = re.compile(r"•")
|
| 153 |
+
|
| 154 |
+
def normalize_inline_bullets(s: str) -> str:
|
| 155 |
+
"""
|
| 156 |
+
把 Unicode 的 • 统一替换为 LaTeX 的 \\textbullet{},并确保两侧留空格。
|
| 157 |
+
"""
|
| 158 |
+
if not s:
|
| 159 |
+
return s
|
| 160 |
+
s = _BULLET_RE.sub(r"\\textbullet{}", s)
|
| 161 |
+
# 若两侧无空格,补空格(避免“黏连”)
|
| 162 |
+
s = re.sub(r"(?<=\S)\\textbullet\{\}(?=\S)", r" \\textbullet{} ", s)
|
| 163 |
+
s = re.sub(r"\\textbullet\{\}(?=\S)", r"\\textbullet{} ", s)
|
| 164 |
+
s = re.sub(r"(?<=\S)\\textbullet\{\}", r" \\textbullet{}", s)
|
| 165 |
+
return s
|
| 166 |
|
| 167 |
def normalize_textit_math(s: str) -> str:
|
| 168 |
"""
|
|
|
|
| 238 |
s = s.replace("\\}", "}")
|
| 239 |
return s
|
| 240 |
|
|
|
|
| 241 |
|
| 242 |
def escape_text(s: str) -> str:
|
| 243 |
if not s:
|
| 244 |
return ""
|
| 245 |
|
| 246 |
+
# --- 1) 捕获所有数学块(沿用全局 MATH_BLOCK_RE)----
|
| 247 |
math_blocks = []
|
| 248 |
def store_math(m):
|
| 249 |
math_blocks.append(m.group(0))
|
| 250 |
return f"\0{len(math_blocks)-1}\0"
|
| 251 |
|
| 252 |
+
s = MATH_BLOCK_RE.sub(store_math, s)
|
|
|
|
| 253 |
|
| 254 |
# --- 2) 转义文本字符(不碰 math) ----
|
| 255 |
rep = {
|
| 256 |
"&": r"\&", "%": r"\%", "$": r"\$", "#": r"\#",
|
| 257 |
"_": r"\_", "{": r"\{", "}": r"\}",
|
| 258 |
+
"~": r"~{}", "^": r"\^{}",
|
|
|
|
| 259 |
}
|
|
|
|
| 260 |
for k, v in rep.items():
|
| 261 |
s = s.replace(k, v)
|
| 262 |
|
|
|
|
| 268 |
|
| 269 |
|
| 270 |
|
| 271 |
+
|
| 272 |
def soft_wrap_title_for_logo(title: str, first_limit=68, next_limit=72) -> str:
|
| 273 |
if not title or len(title) <= first_limit: return title
|
| 274 |
def break_at(s: str, limit: int):
|
|
|
|
| 312 |
"""格式化正文内容,自动修复 LaTeX 命令"""
|
| 313 |
if not content:
|
| 314 |
return ""
|
| 315 |
+
|
| 316 |
+
# 1) 先修复 \t 造成的命令断头
|
| 317 |
content = fix_latex_escaped_commands(content)
|
| 318 |
+
|
| 319 |
+
# 2) 规范 \textit{...} 里的“伪数学”
|
| 320 |
content = normalize_textit_math(content)
|
| 321 |
|
| 322 |
+
# 3) **把非数学环境的数学宏包进 $...$** ← NEW(修正 \delta c)
|
| 323 |
+
content = wrap_math_macros_outside_math(content)
|
| 324 |
+
|
| 325 |
+
# 之后再进行 itemize 的识别与转义
|
| 326 |
lines = [ln.strip() for ln in content.splitlines() if ln.strip()]
|
| 327 |
if lines and all(ln.startswith(("-", "•")) for ln in lines):
|
| 328 |
items = [escape_text(ln.lstrip("-• ").strip()) for ln in lines]
|
| 329 |
return "\n".join(["\\begin{itemize}"] + [f"\\item {it}" for it in items] + ["\\end{itemize}"])
|
| 330 |
+
|
| 331 |
return escape_text(" ".join(lines))
|
| 332 |
|
| 333 |
+
|
| 334 |
def make_block(title: str, content: str, figures_tex: str = "") -> str:
|
| 335 |
body = format_content_to_latex(content or "")
|
| 336 |
if figures_tex: body = (body + "\n\n" if body else "") + figures_tex
|
|
|
|
| 671 |
cleaned_tex = cleaned_tex.replace(r"\{", "{")
|
| 672 |
cleaned_tex = cleaned_tex.replace(r"\}", "}")
|
| 673 |
# 注意:要先处理上面的大括号再处理反斜杠,否则会提前破坏结构
|
| 674 |
+
# cleaned_tex = cleaned_tex.replace(r"\\\\", r"\\") # 避免双转义干扰
|
| 675 |
+
# cleaned_tex = cleaned_tex.replace(r"\\", "\\") # 最终将 \\ → \
|
| 676 |
cleaned_tex = cleaned_tex.replace(r"\t\t", "\\t")
|
| 677 |
cleaned_tex = strip_stray_t(cleaned_tex)
|
| 678 |
|