File size: 4,890 Bytes
6af3c44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os, tempfile, traceback
import gradio as gr
import spaces
import requests

# ---------- Cache & HF Hub settings ----------
os.environ.setdefault("HF_HOME", "/data/.cache/huggingface")
os.environ.setdefault("HF_HUB_CACHE", "/data/.cache/huggingface/hub")
os.environ.setdefault("TRANSFORMERS_CACHE", "/data/.cache/huggingface/transformers")
os.environ.setdefault("HF_HUB_ENABLE_XET", "0")
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
for p in (os.environ["HF_HOME"], os.environ["HF_HUB_CACHE"], os.environ["TRANSFORMERS_CACHE"]):
    os.makedirs(p, exist_ok=True)

# ---------- Docling imports ----------
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline

# CUDA info (informational)
try:
    import torch
    HAS_CUDA = torch.cuda.is_available()
    torch.set_num_threads(max(1, int(os.environ.get("OMP_NUM_THREADS", "2"))))
except Exception:
    HAS_CUDA = False

# Converters
std_converter = DocumentConverter(format_options={InputFormat.PDF: PdfFormatOption()})
vlm_converter = DocumentConverter(format_options={InputFormat.PDF: PdfFormatOption(pipeline_cls=VlmPipeline)})

# ---------- Helpers ----------
def _success(md: str, html: str):
    tmpdir = tempfile.gettempdir()
    md_path = os.path.join(tmpdir, "output.md")
    html_path = os.path.join(tmpdir, "output.html")
    with open(md_path, "w", encoding="utf-8") as f: f.write(md)
    with open(html_path, "w", encoding="utf-8") as f: f.write(html)
    return md, html, md_path, html_path

def _fail(msg: str):
    err = f"**Conversion failed**:\n```\n{msg}\n```"
    return err, "<pre>" + err + "</pre>", None, None

def _convert_local_path(path: str, use_vlm: bool):
    try:
        conv = vlm_converter if use_vlm else std_converter
        doc = conv.convert(source=path).document
        md = doc.export_to_markdown()
        html = doc.export_to_html()
        return _success(md, html)
    except Exception as e:
        return _fail(f"{e}\n\n{traceback.format_exc()}")

# ---------- GPU-decorated endpoints ----------
@spaces.GPU(duration=600)
def run_convert_file(file, mode):
    if file is None:
        return _fail("No file provided.")
    return _convert_local_path(file.name, mode.startswith("VLM"))

@spaces.GPU(duration=600)
def run_convert_url(url, mode):
    if not url:
        return _fail("No URL provided.")
    try:
        r = requests.get(url, stream=True, timeout=60)
        r.raise_for_status()
        fd, tmp_path = tempfile.mkstemp(suffix=".pdf")
        with os.fdopen(fd, "wb") as tmp:
            for chunk in r.iter_content(chunk_size=1 << 20):
                if chunk:
                    tmp.write(chunk)
    except Exception as e:
        return _fail(f"Failed to download URL: {e}")
    try:
        return _convert_local_path(tmp_path, mode.startswith("VLM"))
    finally:
        try: os.remove(tmp_path)
        except: pass

# ---------- UI ----------
subtitle = "Device: **CUDA (ZeroGPU)**" if HAS_CUDA else "Device: **CPU** (GPU warms on first call)"

with gr.Blocks(title="Granite-Docling 258M β€” PDF β†’ Markdown/HTML") as demo:
    gr.Markdown(
        f"""# Granite-Docling 258M β€” PDF β†’ Markdown / HTML  
{subtitle}

**Modes**
- **Standard (faster)** β†’ PDFs with a text layer  
- **VLM (Granite – better for complex/scanned)** β†’ scans / heavy tables / formulas

_First call may be slow while models download and ZeroGPU warms. Cache lives in `/data`._
"""
    )

    mode = gr.Radio(
        ["Standard (faster)", "VLM (Granite – better for complex/scanned)"],
        value="Standard (faster)", label="Mode"
    )

    with gr.Tab("Upload PDF"):
        fi = gr.File(file_types=[".pdf"], label="PDF")
        md_preview = gr.Markdown(label="Markdown Preview")
        html_preview = gr.HTML(label="HTML Preview")  # <β€” rendered HTML
        dl_md = gr.File(label="Download Markdown (.md)")
        dl_html = gr.File(label="Download HTML (.html)")
        gr.Button("Convert").click(
            fn=run_convert_file,
            inputs=[fi, mode],
            outputs=[md_preview, html_preview, dl_md, dl_html]
        )

    with gr.Tab("Convert from URL"):
        url = gr.Textbox(label="Public PDF URL", placeholder="https://.../file.pdf")
        md_preview2 = gr.Markdown(label="Markdown Preview")
        html_preview2 = gr.HTML(label="HTML Preview")
        dl_md2 = gr.File(label="Download Markdown (.md)")
        dl_html2 = gr.File(label="Download HTML (.html)")
        gr.Button("Convert").click(
            fn=run_convert_url,
            inputs=[url, mode],
            outputs=[md_preview2, html_preview2, dl_md2, dl_html2]
        )

# Bind & queue
demo.queue().launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))