Spaces:
Sleeping
Sleeping
File size: 1,853 Bytes
459372e 31d231c 1804090 03b2a60 31d231c 9e5331a 03b2a60 9e5331a 03b2a60 9e5331a 1804090 459372e 1804090 9e5331a 03b2a60 1804090 03b2a60 1804090 459372e 03b2a60 459372e 03b2a60 459372e 03b2a60 459372e c19a2c1 459372e 03b2a60 459372e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
from docx import Document
from docx.shared import RGBColor
from difflib import get_close_matches
from pdf_parser import extract_text_from_pdf, parse_data_blocks
def is_red_color(run):
color = run.font.color
if not color or not color.rgb:
return False
r, g, b = color.rgb[0], color.rgb[1], color.rgb[2]
return r >= 200 and g <= 100 and b <= 100 # red-dominant
def replace_red_text_with_data(doc_path, data_dict):
doc = Document(doc_path)
for para in doc.paragraphs:
for run in para.runs:
if is_red_color(run):
original_text = run.text.strip()
# Try exact or close match
match = get_close_matches(original_text.lower(), [k.lower() for k in data_dict.keys()], n=1, cutoff=0.6)
if match:
for key in data_dict:
if key.lower() == match[0]:
run.text = data_dict[key]
break
return doc
def process_files(pdf_file, word_template):
# Extract data from PDF
raw_text = extract_text_from_pdf(pdf_file)
data_dict = parse_data_blocks(raw_text)
# Replace red text in Word
final_doc = replace_red_text_with_data(word_template, data_dict)
# Save and return output
output_path = "filled_output.docx"
final_doc.save(output_path)
return output_path
demo = gr.Interface(
fn=process_files,
inputs=[
gr.File(label="Upload PDF Report", file_types=[".pdf"]),
gr.File(label="Upload Word Template (.docx)", file_types=[".docx"])
],
outputs=gr.File(label="Download Updated Word (.docx)"),
title="Audit Report Auto-Filler",
description="Replaces outdated red text in Word using updated values from a PDF report."
)
if __name__ == "__main__":
demo.launch() |