Spaces:
Running
Running
Synced repo using 'sync_with_huggingface' Github Action
Browse files- gradio_app.py +10 -6
- requirements.txt +1 -1
gradio_app.py
CHANGED
|
@@ -98,12 +98,14 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 98 |
in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
|
| 99 |
in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
|
| 100 |
|
| 101 |
-
page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"
|
| 102 |
output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
|
| 103 |
|
| 104 |
force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
|
| 105 |
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
with gr.Column():
|
| 108 |
result_md = gr.Markdown(label="Result markdown", visible=False)
|
| 109 |
result_json = gr.JSON(label="Result json", visible=False)
|
|
@@ -154,17 +156,19 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 154 |
page_range_txt.change(
|
| 155 |
fn=check_page_range,
|
| 156 |
inputs=[page_range_txt, in_file],
|
| 157 |
-
outputs=[page_range_txt,
|
| 158 |
)
|
| 159 |
|
| 160 |
# Run Marker
|
| 161 |
-
def run_marker_img(filename, page_range, force_ocr, output_format, debug):
|
| 162 |
cli_options = {
|
| 163 |
"output_format": output_format,
|
| 164 |
"page_range": page_range,
|
| 165 |
"force_ocr": force_ocr,
|
| 166 |
"debug": debug,
|
| 167 |
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
|
|
|
|
|
|
|
| 168 |
}
|
| 169 |
config_parser = ConfigParser(cli_options)
|
| 170 |
rendered = convert_pdf(
|
|
@@ -213,9 +217,9 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 213 |
gr_debug_lay
|
| 214 |
]
|
| 215 |
|
| 216 |
-
|
| 217 |
fn=run_marker_img,
|
| 218 |
-
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb],
|
| 219 |
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
|
| 220 |
)
|
| 221 |
|
|
|
|
| 98 |
in_num = gr.Slider(label="PDF file page number", minimum=1, maximum=1, value=1, step=1, visible=False)
|
| 99 |
in_img = gr.Image(label="PDF file (preview)", type="pil", sources=None, visible=False)
|
| 100 |
|
| 101 |
+
page_range_txt = gr.Textbox(label="Page range to parse, comma separated like 0,5-10,20", value=f"")
|
| 102 |
output_format_dd = gr.Dropdown(label="Output format", choices=["markdown", "json", "html"], value="markdown")
|
| 103 |
|
| 104 |
force_ocr_ckb = gr.Checkbox(label="Force OCR", value=True, info="Force OCR on all pages")
|
| 105 |
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
|
| 106 |
+
use_llm_ckb = gr.Checkbox(label="Use LLM", value=False, info="Use LLM for higher quality processing")
|
| 107 |
+
strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
|
| 108 |
+
run_marker_btn = gr.Button("Run Marker", interactive=False)
|
| 109 |
with gr.Column():
|
| 110 |
result_md = gr.Markdown(label="Result markdown", visible=False)
|
| 111 |
result_json = gr.JSON(label="Result json", visible=False)
|
|
|
|
| 156 |
page_range_txt.change(
|
| 157 |
fn=check_page_range,
|
| 158 |
inputs=[page_range_txt, in_file],
|
| 159 |
+
outputs=[page_range_txt, run_marker_btn]
|
| 160 |
)
|
| 161 |
|
| 162 |
# Run Marker
|
| 163 |
+
def run_marker_img(filename, page_range, force_ocr, output_format, debug, use_llm, strip_existing_ocr):
|
| 164 |
cli_options = {
|
| 165 |
"output_format": output_format,
|
| 166 |
"page_range": page_range,
|
| 167 |
"force_ocr": force_ocr,
|
| 168 |
"debug": debug,
|
| 169 |
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
|
| 170 |
+
"use_llm": use_llm,
|
| 171 |
+
"strip_existing_ocr": strip_existing_ocr
|
| 172 |
}
|
| 173 |
config_parser = ConfigParser(cli_options)
|
| 174 |
rendered = convert_pdf(
|
|
|
|
| 217 |
gr_debug_lay
|
| 218 |
]
|
| 219 |
|
| 220 |
+
run_marker_btn.click(
|
| 221 |
fn=run_marker_img,
|
| 222 |
+
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb],
|
| 223 |
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout]
|
| 224 |
)
|
| 225 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
torch==2.5.1
|
| 2 |
-
marker-pdf==1.
|
| 3 |
gradio==5.8.0
|
| 4 |
huggingface-hub==0.26.3
|
|
|
|
| 1 |
torch==2.5.1
|
| 2 |
+
marker-pdf==1.2.0
|
| 3 |
gradio==5.8.0
|
| 4 |
huggingface-hub==0.26.3
|