Spaces:
Build error
Build error
| import streamlit as st | |
| import fitz # PyMuPDF | |
| from huggingface_hub import snapshot_download | |
| import os | |
| from pdf2image import convert_from_path | |
| from PIL import Image | |
| import tempfile | |
| # Download the model if not already downloaded | |
| model_dir = "./pdf-extract-kit" | |
| if not os.path.exists(model_dir): | |
| snapshot_download(repo_id="opendatalab/pdf-extract-kit-1.0", local_dir=model_dir, max_workers=20) | |
| st.title("PDF Table Extractor with PDF-Extract-Kit-1.0") | |
| uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) | |
| if uploaded_file: | |
| st.write("Converting PDF to images...") | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf: | |
| tmp_pdf.write(uploaded_file.read()) | |
| tmp_pdf_path = tmp_pdf.name | |
| images = convert_from_path(tmp_pdf_path) | |
| for i, img in enumerate(images): | |
| st.image(img, caption=f"Page {i+1}", use_column_width=True) | |
| # Here you would call the table detection model on each image | |
| st.info("🛠 Table detection model would run here... (to be implemented)") | |
| st.success("Done processing PDF!") | |