Spaces:
Running
Running
Commit
·
6d2b0a3
1
Parent(s):
64cd544
improve description
Browse files
app.py
CHANGED
|
@@ -56,14 +56,21 @@ def process_pdfs(
|
|
| 56 |
progress=gr.Progress(),
|
| 57 |
):
|
| 58 |
if not pdf_files:
|
| 59 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
if oauth_token is None:
|
| 62 |
-
gr.Info("Please log in to upload to Hugging Face.")
|
| 63 |
return (
|
| 64 |
None,
|
| 65 |
None,
|
| 66 |
-
|
|
|
|
|
|
|
| 67 |
)
|
| 68 |
|
| 69 |
try:
|
|
@@ -71,6 +78,7 @@ def process_pdfs(
|
|
| 71 |
images_dir = os.path.join(temp_dir, "images")
|
| 72 |
os.makedirs(images_dir)
|
| 73 |
|
|
|
|
| 74 |
images, message = pdf_to_images(pdf_files, sample_size, images_dir)
|
| 75 |
|
| 76 |
# Create a zip file of the images
|
|
@@ -106,7 +114,10 @@ def process_pdfs(
|
|
| 106 |
|
| 107 |
# Define the Gradio interface
|
| 108 |
with gr.Blocks() as demo:
|
| 109 |
-
gr.
|
|
|
|
|
|
|
|
|
|
| 110 |
gr.Markdown(
|
| 111 |
"Upload PDF(s), convert pages to images, and optionally upload them to a Hugging Face repo. If a sample size is specified, random pages will be selected."
|
| 112 |
)
|
|
@@ -121,17 +132,20 @@ with gr.Blocks() as demo:
|
|
| 121 |
with gr.Row():
|
| 122 |
sample_size = gr.Number(
|
| 123 |
value=None,
|
| 124 |
-
label="
|
|
|
|
| 125 |
)
|
| 126 |
hf_repo = gr.Textbox(
|
| 127 |
-
label="Hugging Face Repo",
|
|
|
|
|
|
|
| 128 |
)
|
| 129 |
-
|
| 130 |
-
|
| 131 |
status_text = gr.Markdown(label="Status")
|
| 132 |
download_button = gr.File(label="Download Converted Images")
|
| 133 |
|
| 134 |
-
submit_button = gr.Button("
|
| 135 |
submit_button.click(
|
| 136 |
process_pdfs,
|
| 137 |
inputs=[pdf_files, sample_size, hf_repo],
|
|
|
|
| 56 |
progress=gr.Progress(),
|
| 57 |
):
|
| 58 |
if not pdf_files:
|
| 59 |
+
return (
|
| 60 |
+
None,
|
| 61 |
+
None,
|
| 62 |
+
gr.Markdown(
|
| 63 |
+
"⚠️ No PDF files uploaded. Please upload at least one PDF file."
|
| 64 |
+
),
|
| 65 |
+
)
|
| 66 |
|
| 67 |
if oauth_token is None:
|
|
|
|
| 68 |
return (
|
| 69 |
None,
|
| 70 |
None,
|
| 71 |
+
gr.Markdown(
|
| 72 |
+
"⚠️ Not logged in to Hugging Face. Please log in to upload to a Hugging Face dataset."
|
| 73 |
+
),
|
| 74 |
)
|
| 75 |
|
| 76 |
try:
|
|
|
|
| 78 |
images_dir = os.path.join(temp_dir, "images")
|
| 79 |
os.makedirs(images_dir)
|
| 80 |
|
| 81 |
+
progress(0, desc="Starting PDF processing")
|
| 82 |
images, message = pdf_to_images(pdf_files, sample_size, images_dir)
|
| 83 |
|
| 84 |
# Create a zip file of the images
|
|
|
|
| 114 |
|
| 115 |
# Define the Gradio interface
|
| 116 |
with gr.Blocks() as demo:
|
| 117 |
+
gr.HTML(
|
| 118 |
+
"""<h1 style='text-align: center;'> PDFs to Page Images Converter</h1>
|
| 119 |
+
<center><i> 📁 Convert PDFs to an image dataset 📁 </i></center>"""
|
| 120 |
+
)
|
| 121 |
gr.Markdown(
|
| 122 |
"Upload PDF(s), convert pages to images, and optionally upload them to a Hugging Face repo. If a sample size is specified, random pages will be selected."
|
| 123 |
)
|
|
|
|
| 132 |
with gr.Row():
|
| 133 |
sample_size = gr.Number(
|
| 134 |
value=None,
|
| 135 |
+
label="Pages per PDF (0 for all pages)",
|
| 136 |
+
info="Specify how many pages to convert from each PDF. Use 0 to convert all pages.",
|
| 137 |
)
|
| 138 |
hf_repo = gr.Textbox(
|
| 139 |
+
label="Hugging Face Repo",
|
| 140 |
+
placeholder="username/repo-name",
|
| 141 |
+
info="Enter the Hugging Face repository name in the format 'username/repo-name'",
|
| 142 |
)
|
| 143 |
+
with gr.Accordion("View converted images", open=False):
|
| 144 |
+
output_gallery = gr.Gallery(label="Converted Images")
|
| 145 |
status_text = gr.Markdown(label="Status")
|
| 146 |
download_button = gr.File(label="Download Converted Images")
|
| 147 |
|
| 148 |
+
submit_button = gr.Button("Convert PDFs to page images")
|
| 149 |
submit_button.click(
|
| 150 |
process_pdfs,
|
| 151 |
inputs=[pdf_files, sample_size, hf_repo],
|