Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -597,16 +597,22 @@ with tab_ocr:
|
|
| 597 |
st.header("Test OCR 🔍")
|
| 598 |
all_files = get_gallery_files()
|
| 599 |
if all_files:
|
|
|
|
|
|
|
| 600 |
if st.button("OCR All Assets 🚀"):
|
| 601 |
full_text = "# OCR Results\n\n"
|
| 602 |
-
for file in
|
| 603 |
if file.endswith('.png'):
|
| 604 |
image = Image.open(file)
|
| 605 |
-
else:
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 610 |
output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
|
| 611 |
result = asyncio.run(process_ocr(image, output_file))
|
| 612 |
full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
|
|
@@ -616,21 +622,28 @@ with tab_ocr:
|
|
| 616 |
f.write(full_text)
|
| 617 |
st.success(f"Full OCR saved to {md_output_file}")
|
| 618 |
st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
|
| 619 |
-
selected_file = st.selectbox("Select Image or PDF",
|
| 620 |
if selected_file:
|
| 621 |
if selected_file.endswith('.png'):
|
| 622 |
image = Image.open(selected_file)
|
| 623 |
-
else:
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
|
| 635 |
with tab_build:
|
| 636 |
st.header("Build Titan 🌱")
|
|
|
|
| 597 |
st.header("Test OCR 🔍")
|
| 598 |
all_files = get_gallery_files()
|
| 599 |
if all_files:
|
| 600 |
+
# Filter for only PNG and PDF files
|
| 601 |
+
ocr_files = [f for f in all_files if f.endswith(('.png', '.pdf'))]
|
| 602 |
if st.button("OCR All Assets 🚀"):
|
| 603 |
full_text = "# OCR Results\n\n"
|
| 604 |
+
for file in ocr_files:
|
| 605 |
if file.endswith('.png'):
|
| 606 |
image = Image.open(file)
|
| 607 |
+
else: # PDF
|
| 608 |
+
try:
|
| 609 |
+
doc = fitz.open(file)
|
| 610 |
+
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
| 611 |
+
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 612 |
+
doc.close()
|
| 613 |
+
except Exception as e:
|
| 614 |
+
st.error(f"Failed to process {file}: {str(e)}")
|
| 615 |
+
continue
|
| 616 |
output_file = generate_filename(f"ocr_{os.path.basename(file)}", "txt")
|
| 617 |
result = asyncio.run(process_ocr(image, output_file))
|
| 618 |
full_text += f"## {os.path.basename(file)}\n\n{result}\n\n"
|
|
|
|
| 622 |
f.write(full_text)
|
| 623 |
st.success(f"Full OCR saved to {md_output_file}")
|
| 624 |
st.markdown(get_download_link(md_output_file, "text/markdown", "Download Full OCR Markdown"), unsafe_allow_html=True)
|
| 625 |
+
selected_file = st.selectbox("Select Image or PDF", ocr_files, key="ocr_select")
|
| 626 |
if selected_file:
|
| 627 |
if selected_file.endswith('.png'):
|
| 628 |
image = Image.open(selected_file)
|
| 629 |
+
else: # PDF
|
| 630 |
+
try:
|
| 631 |
+
doc = fitz.open(selected_file)
|
| 632 |
+
pix = doc[0].get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
| 633 |
+
image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 634 |
+
doc.close()
|
| 635 |
+
except Exception as e:
|
| 636 |
+
st.error(f"Cannot process {selected_file}: {str(e)}. Please select a PNG or PDF file.")
|
| 637 |
+
image = None
|
| 638 |
+
if image:
|
| 639 |
+
st.image(image, caption="Input Image", use_container_width=True)
|
| 640 |
+
if st.button("Run OCR 🚀", key="ocr_run"):
|
| 641 |
+
output_file = generate_filename("ocr_output", "txt")
|
| 642 |
+
result = asyncio.run(process_ocr(image, output_file))
|
| 643 |
+
st.text_area("OCR Result", result, height=200)
|
| 644 |
+
st.session_state['history'].append(f"OCR Test: {selected_file} -> {output_file}")
|
| 645 |
+
else:
|
| 646 |
+
st.warning("No assets in gallery yet. Use Camera Snap or Download PDFs!")
|
| 647 |
|
| 648 |
with tab_build:
|
| 649 |
st.header("Build Titan 🌱")
|