import gradio as gr import os from docx import Document from utils import extract_text_from_pdf, parse_pdf_to_dict from docx.shared import RGBColor from docx import Document from docx.shared import RGBColor def replace_red_text_with_data(word_path, data_dict): doc = Document(word_path) for para in doc.paragraphs: full_text = para.text for i, run in enumerate(para.runs): if run.font.color and run.font.color.rgb == RGBColor(255, 0, 0): # Search for the key (label) before this red text preceding_text = ''.join(r.text for r in para.runs[:i]).lower() for key in data_dict: if key.lower() in preceding_text: run.text = data_dict[key] break return doc def process_files(pdf_file, template_docx): pdf_path = pdf_file doc_path = template_docx output_path = "filled_output.docx" # Extract and parse PDF raw_text = extract_text_from_pdf(pdf_path) data_dict = parse_pdf_to_dict(raw_text) # Replace red text with data final_doc = replace_red_text_with_data(doc_path, data_dict) # Save final document final_doc.save(output_path) return output_path demo = gr.Interface( fn=process_files, inputs=[ gr.File(label="Upload PDF Report", file_types=[".pdf"]), gr.File(label="Upload Word Template (.docx)", file_types=[".docx"]) ], outputs=gr.File(label="Download Filled Report (.docx)"), title="Audit Report Generator", description="Upload a PDF and a Word template. This tool will auto-fill red-highlighted fields with data from the PDF." ) if __name__ == "__main__": demo.launch()