File size: 1,853 Bytes
459372e
31d231c
 
1804090
03b2a60
31d231c
9e5331a
 
03b2a60
9e5331a
03b2a60
 
9e5331a
1804090
 
459372e
 
1804090
9e5331a
03b2a60
 
 
 
1804090
03b2a60
1804090
 
459372e
 
03b2a60
 
 
 
459372e
03b2a60
 
459372e
03b2a60
 
459372e
c19a2c1
459372e
 
 
 
 
 
 
03b2a60
 
 
459372e
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
from docx import Document
from docx.shared import RGBColor
from difflib import get_close_matches
from pdf_parser import extract_text_from_pdf, parse_data_blocks

def is_red_color(run):
    color = run.font.color
    if not color or not color.rgb:
        return False
    r, g, b = color.rgb[0], color.rgb[1], color.rgb[2]
    return r >= 200 and g <= 100 and b <= 100  # red-dominant

def replace_red_text_with_data(doc_path, data_dict):
    doc = Document(doc_path)

    for para in doc.paragraphs:
        for run in para.runs:
            if is_red_color(run):
                original_text = run.text.strip()
                # Try exact or close match
                match = get_close_matches(original_text.lower(), [k.lower() for k in data_dict.keys()], n=1, cutoff=0.6)
                if match:
                    for key in data_dict:
                        if key.lower() == match[0]:
                            run.text = data_dict[key]
                            break
    return doc

def process_files(pdf_file, word_template):
    # Extract data from PDF
    raw_text = extract_text_from_pdf(pdf_file)
    data_dict = parse_data_blocks(raw_text)

    # Replace red text in Word
    final_doc = replace_red_text_with_data(word_template, data_dict)

    # Save and return output
    output_path = "filled_output.docx"
    final_doc.save(output_path)
    return output_path

demo = gr.Interface(
    fn=process_files,
    inputs=[
        gr.File(label="Upload PDF Report", file_types=[".pdf"]),
        gr.File(label="Upload Word Template (.docx)", file_types=[".docx"])
    ],
    outputs=gr.File(label="Download Updated Word (.docx)"),
    title="Audit Report Auto-Filler",
    description="Replaces outdated red text in Word using updated values from a PDF report."
)

if __name__ == "__main__":
    demo.launch()