import gradio as gr from docx import Document from docx.shared import RGBColor from difflib import get_close_matches from pdf_parser import extract_text_from_pdf, parse_data_blocks def is_red_color(run): color = run.font.color if not color or not color.rgb: return False r, g, b = color.rgb[0], color.rgb[1], color.rgb[2] return r >= 200 and g <= 100 and b <= 100 # red-dominant def replace_red_text_with_data(doc_path, data_dict): doc = Document(doc_path) for para in doc.paragraphs: for run in para.runs: if is_red_color(run): original_text = run.text.strip() # Try exact or close match match = get_close_matches(original_text.lower(), [k.lower() for k in data_dict.keys()], n=1, cutoff=0.6) if match: for key in data_dict: if key.lower() == match[0]: run.text = data_dict[key] break return doc def process_files(pdf_file, word_template): # Extract data from PDF raw_text = extract_text_from_pdf(pdf_file) data_dict = parse_data_blocks(raw_text) # Replace red text in Word final_doc = replace_red_text_with_data(word_template, data_dict) # Save and return output output_path = "filled_output.docx" final_doc.save(output_path) return output_path demo = gr.Interface( fn=process_files, inputs=[ gr.File(label="Upload PDF Report", file_types=[".pdf"]), gr.File(label="Upload Word Template (.docx)", file_types=[".docx"]) ], outputs=gr.File(label="Download Updated Word (.docx)"), title="Audit Report Auto-Filler", description="Replaces outdated red text in Word using updated values from a PDF report." ) if __name__ == "__main__": demo.launch()