Shami96 commited on
Commit
459372e
·
verified ·
1 Parent(s): 423a437

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from docx import Document
4
+ from utils import extract_text_from_pdf, parse_pdf_to_dict
5
+
6
+ def replace_red_text_with_data(doc_path, data_dict):
7
+ doc = Document(doc_path)
8
+
9
+ for para in doc.paragraphs:
10
+ for run in para.runs:
11
+ if run.font.color and run.font.color.rgb and run.font.color.rgb.hex == "FF0000":
12
+ text = run.text.strip()
13
+ if text in data_dict:
14
+ run.text = data_dict[text]
15
+
16
+ return doc
17
+
18
+ def process_files(pdf_file, template_docx):
19
+ # Save uploaded files temporarily
20
+ pdf_path = "temp_input.pdf"
21
+ doc_path = "temp_template.docx"
22
+ output_path = "filled_output.docx"
23
+
24
+ pdf_file.save(pdf_path)
25
+ template_docx.save(doc_path)
26
+
27
+ # Extract and parse PDF
28
+ raw_text = extract_text_from_pdf(pdf_path)
29
+ data_dict = parse_pdf_to_dict(raw_text)
30
+
31
+ # Replace red text with data
32
+ final_doc = replace_red_text_with_data(doc_path, data_dict)
33
+
34
+ # Save final document
35
+ final_doc.save(output_path)
36
+
37
+ return output_path
38
+
39
+ demo = gr.Interface(
40
+ fn=process_files,
41
+ inputs=[
42
+ gr.File(label="Upload PDF Report", file_types=[".pdf"]),
43
+ gr.File(label="Upload Word Template (.docx)", file_types=[".docx"])
44
+ ],
45
+ outputs=gr.File(label="Download Filled Report (.docx)"),
46
+ title="Audit Report Generator",
47
+ description="Upload a PDF and a Word template. This tool will auto-fill red-highlighted fields with data from the PDF."
48
+ )
49
+
50
+ if __name__ == "__main__":
51
+ demo.launch()