PaperShow / app.py
JaceWei's picture
update
a7893a8
raw
history blame
9.09 kB
import gradio as gr
import subprocess, shutil, os, zipfile, datetime, sys, time
from pathlib import Path
def _ensure_versions():
import importlib, subprocess, sys
def ver(pkg):
try:
m = importlib.import_module(pkg)
return getattr(m, "__version__", "0")
except Exception:
return "0"
hub_ok = False
try:
from packaging.version import Version
hv = Version(ver("huggingface_hub"))
hub_ok = Version("0.24.0") <= hv < Version("1.0.0")
except Exception:
pass
if not hub_ok:
subprocess.check_call([sys.executable, "-m", "pip",
"install", "huggingface-hub==0.27.1",
"transformers==4.48.0",
"--force-reinstall", "--no-deps"])
_ensure_versions()
ROOT = Path(__file__).resolve().parent
OUTPUT_DIR = ROOT / "output"
INPUT_DIR = ROOT / "input"
LOGO_DIR = INPUT_DIR / "logo"
POSTER_LATEX_DIR = ROOT / "posterbuilder" / "latex_proj"
ZIP_PATH = ROOT / "output.zip"
LOG_PATH = ROOT / "last_run.log"
TIMEOUT_SECONDS = 1800 # 30 minutes
def run_pipeline(arxiv_url, pdf_file, openai_key, logo_files):
start_time = datetime.datetime.now()
logs = [f"πŸš€ Starting pipeline at {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n"]
# ====== Prepare directories ======
for d in [OUTPUT_DIR, LOGO_DIR, POSTER_LATEX_DIR, INPUT_DIR]:
d.mkdir(parents=True, exist_ok=True)
# Clean up old outputs
for item in OUTPUT_DIR.iterdir():
if item.is_dir():
shutil.rmtree(item)
else:
item.unlink()
if ZIP_PATH.exists():
ZIP_PATH.unlink()
logs.append("🧹 Cleaned previous output.\n")
_write_logs(logs)
yield "\n".join(logs), None
# ====== Validation: must upload LOGO ======
# Gradio may return a single file or a list, normalize to list
if logo_files is None:
logo_files = []
if not isinstance(logo_files, (list, tuple)):
logo_files = [logo_files]
logo_files = [f for f in logo_files if f] # filter None
if len(logo_files) == 0:
msg = "❌ You must upload at least one institutional logo (multiple allowed)."
logs.append(msg)
_write_logs(logs)
yield "\n".join(logs), None
return
# Clear input/logo and then save new files
for item in LOGO_DIR.iterdir():
if item.is_file():
item.unlink()
saved_logo_paths = []
for lf in logo_files:
p = LOGO_DIR / Path(lf.name).name
shutil.copy(lf.name, p)
saved_logo_paths.append(p)
logs.append(f"🏷️ Saved {len(saved_logo_paths)} logo file(s) to: {LOGO_DIR}\n")
_write_logs(logs)
yield "\n".join(logs), None
# ====== Handle uploaded PDF (optional) ======
pdf_path = None
if pdf_file:
pdf_dir = INPUT_DIR / "pdf"
pdf_dir.mkdir(parents=True, exist_ok=True)
pdf_path = pdf_dir / Path(pdf_file.name).name
shutil.copy(pdf_file.name, pdf_path)
logs.append(f"πŸ“„ Uploaded PDF saved to: {pdf_path}\n")
# For pipeline Step 1.5 compatibility: also copy to input/paper.pdf
canonical_pdf = INPUT_DIR / "paper.pdf"
shutil.copy(pdf_file.name, canonical_pdf)
_write_logs(logs)
yield "\n".join(logs), None
# ====== Validate input source ======
if not arxiv_url and not pdf_file:
msg = "❌ Please provide either an arXiv link or upload a PDF file (choose one)."
logs.append(msg)
_write_logs(logs)
yield "\n".join(logs), None
return
# ====== Build command ======
cmd = [
"python", "pipeline.py",
"--model_name_t", "gpt-5",
"--model_name_v", "gpt-5",
"--result_dir", "output",
"--paper_latex_root", "input/latex_proj",
"--openai_key", openai_key,
"--gemini_key", "##",
"--logo_dir", str(LOGO_DIR) # πŸ‘ˆ pass logo directory
]
if arxiv_url:
cmd += ["--arxiv_url", arxiv_url]
# if pdf_path:
# cmd += ["--pdf_path", str(pdf_path)]
# ====== Run command with REAL-TIME streaming ======
logs.append("\n======= REAL-TIME LOG =======\n")
_write_logs(logs)
yield "\n".join(logs), None
try:
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
universal_newlines=True,
)
except Exception as e:
msg = f"❌ Pipeline failed to start: {e}"
logs.append(msg)
_write_logs(logs)
yield "\n".join(logs), None
return
last_yield = time.time()
try:
while True:
# Timeout guard
if (datetime.datetime.now() - start_time).total_seconds() > TIMEOUT_SECONDS:
logs.append("❌ Pipeline timed out (30 min limit). Killing process...\n")
try:
process.kill()
except Exception:
pass
_write_logs(logs)
yield "\n".join(logs), None
return
line = process.stdout.readline()
if line:
# echo to HF Space logs as well
print(line, end="")
logs.append(line.rstrip("\n"))
# write & occasionally yield to reduce UI churn
_write_logs(logs)
now = time.time()
if now - last_yield >= 0.3: # throttle UI updates ~3/sec
last_yield = now
yield "\n".join(logs), None
elif process.poll() is not None:
break
else:
time.sleep(0.05)
return_code = process.wait()
logs.append(f"\nProcess finished with code {return_code}\n")
_write_logs(logs)
yield "\n".join(logs), None
if return_code != 0:
logs.append("❌ Process exited with non-zero status. See logs above.\n")
_write_logs(logs)
yield "\n".join(logs), None
return
except Exception as e:
logs.append(f"❌ Error during streaming: {e}\n")
_write_logs(logs)
yield "\n".join(logs), None
return
finally:
try:
if process.stdout:
process.stdout.close()
except Exception:
pass
# ====== Check output & zip ======
has_output = False
try:
for _ in OUTPUT_DIR.iterdir():
has_output = True
break
except FileNotFoundError:
has_output = False
if not has_output:
msg = "❌ No output generated. Please check logs above."
logs.append(msg)
_write_logs(logs)
yield "\n".join(logs), None
return
try:
with zipfile.ZipFile(ZIP_PATH, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(OUTPUT_DIR):
for file in files:
file_path = Path(root) / file
arcname = file_path.relative_to(OUTPUT_DIR)
zipf.write(file_path, arcname=arcname)
logs.append(f"βœ… Zipped output folder to {ZIP_PATH}\n")
except Exception as e:
logs.append(f"❌ Failed to create zip: {e}\n")
end_time = datetime.datetime.now()
logs.append(f"🏁 Completed at {end_time.strftime('%Y-%m-%d %H:%M:%S')} (Duration: {(end_time - start_time).seconds}s)\n")
_write_logs(logs)
yield "\n".join(logs), (str(ZIP_PATH) if ZIP_PATH.exists() else None)
def _write_logs(logs):
try:
with open(LOG_PATH, "w", encoding="utf-8") as f:
f.write("\n".join(logs))
except Exception:
# Avoid crashing UI if disk I/O fails
pass
# ===================== Gradio UI =====================
iface = gr.Interface(
fn=run_pipeline,
inputs=[
gr.Textbox(label="πŸ“˜ ArXiv URL (choose one)", placeholder="https://arxiv.org/abs/2505.xxxxx"),
gr.File(label="πŸ“„ Upload PDF (choose one)"),
gr.Textbox(label="πŸ”‘ OpenAI API Key", placeholder="sk-...", type="password"),
gr.File(label="🏷️ Upload Institutional Logo(s) (required, multiple allowed)", file_count="multiple", file_types=["image"]),
],
outputs=[
gr.Textbox(label="🧾 Logs", lines=30, max_lines=50),
gr.File(label="πŸ“¦ Download Results (.zip)")
],
title="πŸ“„ Paper2Poster",
description=(
"Upload your paper, and the pipeline will automatically generate a fully compilable LaTeX poster; you can download the ZIP file and compile it yourself. Each paper takes approximately 6–10 minutes to process.\n"
"Provide either an arXiv link or upload a PDF file (choose one); the system will generate a poster and package it for download.\n"
"You must upload at least one institutional logo (multiple allowed).\n"
),
allow_flagging="never",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)