Spaces:
Running
Running
File size: 1,722 Bytes
459372e 50613d0 31d231c 459372e 31d231c 459372e c19a2c1 459372e c19a2c1 459372e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
import os
from docx import Document
from utils import extract_text_from_pdf, parse_pdf_to_dict
from docx.shared import RGBColor
from docx import Document
from docx.shared import RGBColor
def replace_red_text_with_data(word_path, data_dict):
doc = Document(word_path)
for para in doc.paragraphs:
full_text = para.text
for i, run in enumerate(para.runs):
if run.font.color and run.font.color.rgb == RGBColor(255, 0, 0):
# Search for the key (label) before this red text
preceding_text = ''.join(r.text for r in para.runs[:i]).lower()
for key in data_dict:
if key.lower() in preceding_text:
run.text = data_dict[key]
break
return doc
def process_files(pdf_file, template_docx):
pdf_path = pdf_file
doc_path = template_docx
output_path = "filled_output.docx"
# Extract and parse PDF
raw_text = extract_text_from_pdf(pdf_path)
data_dict = parse_pdf_to_dict(raw_text)
# Replace red text with data
final_doc = replace_red_text_with_data(doc_path, data_dict)
# Save final document
final_doc.save(output_path)
return output_path
demo = gr.Interface(
fn=process_files,
inputs=[
gr.File(label="Upload PDF Report", file_types=[".pdf"]),
gr.File(label="Upload Word Template (.docx)", file_types=[".docx"])
],
outputs=gr.File(label="Download Filled Report (.docx)"),
title="Audit Report Generator",
description="Upload a PDF and a Word template. This tool will auto-fill red-highlighted fields with data from the PDF."
)
if __name__ == "__main__":
demo.launch() |