Spaces:
Running
Running
Synced repo using 'sync_with_huggingface' Github Action
Browse filesoriginal:
- remote: "https://github.com/xiaoyao9184/docker-marker"
- commit: "386ea4e94940750ee5f6d60aca9f504e4a1c7539"
sync_with_huggingface:
- repository: ""
- ref: ""
- gradio_app.py +2 -6
gradio_app.py
CHANGED
|
@@ -128,7 +128,6 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 128 |
show_blocks_ckb = gr.Checkbox(label="Show Blocks", info="Display detected blocks, only when output is JSON", value=False, interactive=False)
|
| 129 |
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
|
| 130 |
strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
|
| 131 |
-
format_lines_ckb = gr.Checkbox(label="Format lines", value=False, info="Format lines in the document with OCR model")
|
| 132 |
disable_ocr_math_ckb = gr.Checkbox(label="Disable math", value=False, info="Disable math in OCR output - no inline math")
|
| 133 |
run_marker_btn = gr.Button("Run Marker", interactive=False)
|
| 134 |
with gr.Column():
|
|
@@ -197,7 +196,7 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 197 |
)
|
| 198 |
|
| 199 |
# Run Marker
|
| 200 |
-
def run_marker_img(filename, page_range, force_ocr, output_format, show_blocks, debug, use_llm, strip_existing_ocr,
|
| 201 |
"""
|
| 202 |
Run marker on the given PDF file and return processed results in multiple formats.
|
| 203 |
|
|
@@ -215,8 +214,6 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 215 |
Defaults to False.
|
| 216 |
strip_existing_ocr (bool, optional): If True, strip embedded OCR text and re-run OCR.
|
| 217 |
Defaults to False.
|
| 218 |
-
format_lines (bool, optional): If True, format lines in the document with OCR model.
|
| 219 |
-
Defaults to False.
|
| 220 |
disable_ocr_math (bool, optional): If True, disable math in OCR output - no inline math.
|
| 221 |
Defaults to False.
|
| 222 |
Returns:
|
|
@@ -240,7 +237,6 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 240 |
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
|
| 241 |
"use_llm": use_llm,
|
| 242 |
"strip_existing_ocr": strip_existing_ocr,
|
| 243 |
-
"format_lines": format_lines,
|
| 244 |
"disable_ocr_math": disable_ocr_math,
|
| 245 |
}
|
| 246 |
config_parser = ConfigParser(cli_options)
|
|
@@ -362,7 +358,7 @@ with gr.Blocks(title="Marker") as demo:
|
|
| 362 |
|
| 363 |
run_marker_btn.click(
|
| 364 |
fn=run_marker_img,
|
| 365 |
-
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, show_blocks_ckb, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb,
|
| 366 |
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout, in_img]
|
| 367 |
)
|
| 368 |
|
|
|
|
| 128 |
show_blocks_ckb = gr.Checkbox(label="Show Blocks", info="Display detected blocks, only when output is JSON", value=False, interactive=False)
|
| 129 |
debug_ckb = gr.Checkbox(label="Debug", value=False, info="Show debug information")
|
| 130 |
strip_existing_ocr_ckb = gr.Checkbox(label="Strip existing OCR", value=False, info="Strip existing OCR text from the PDF and re-OCR.")
|
|
|
|
| 131 |
disable_ocr_math_ckb = gr.Checkbox(label="Disable math", value=False, info="Disable math in OCR output - no inline math")
|
| 132 |
run_marker_btn = gr.Button("Run Marker", interactive=False)
|
| 133 |
with gr.Column():
|
|
|
|
| 196 |
)
|
| 197 |
|
| 198 |
# Run Marker
|
| 199 |
+
def run_marker_img(filename, page_range, force_ocr, output_format, show_blocks, debug, use_llm, strip_existing_ocr, disable_ocr_math):
|
| 200 |
"""
|
| 201 |
Run marker on the given PDF file and return processed results in multiple formats.
|
| 202 |
|
|
|
|
| 214 |
Defaults to False.
|
| 215 |
strip_existing_ocr (bool, optional): If True, strip embedded OCR text and re-run OCR.
|
| 216 |
Defaults to False.
|
|
|
|
|
|
|
| 217 |
disable_ocr_math (bool, optional): If True, disable math in OCR output - no inline math.
|
| 218 |
Defaults to False.
|
| 219 |
Returns:
|
|
|
|
| 237 |
"output_dir": settings.DEBUG_DATA_FOLDER if debug else None,
|
| 238 |
"use_llm": use_llm,
|
| 239 |
"strip_existing_ocr": strip_existing_ocr,
|
|
|
|
| 240 |
"disable_ocr_math": disable_ocr_math,
|
| 241 |
}
|
| 242 |
config_parser = ConfigParser(cli_options)
|
|
|
|
| 358 |
|
| 359 |
run_marker_btn.click(
|
| 360 |
fn=run_marker_img,
|
| 361 |
+
inputs=[in_file, page_range_txt, force_ocr_ckb, output_format_dd, show_blocks_ckb, debug_ckb, use_llm_ckb, strip_existing_ocr_ckb, disable_ocr_math_ckb],
|
| 362 |
outputs=[result_md, result_json, result_html, debug_img_pdf, debug_img_layout, in_img]
|
| 363 |
)
|
| 364 |
|