Spaces:
Running
Running
| import gradio as gr | |
| from docx import Document | |
| from docx.shared import RGBColor | |
| from difflib import get_close_matches | |
| from pdf_parser import extract_text_from_pdf, parse_data_blocks | |
| def is_red_color(run): | |
| color = run.font.color | |
| if not color or not color.rgb: | |
| return False | |
| r, g, b = color.rgb[0], color.rgb[1], color.rgb[2] | |
| return r >= 200 and g <= 100 and b <= 100 # red-dominant | |
| def replace_red_text_with_data(doc_path, data_dict): | |
| doc = Document(doc_path) | |
| for para in doc.paragraphs: | |
| for run in para.runs: | |
| if is_red_color(run): | |
| original_text = run.text.strip() | |
| # Try exact or close match | |
| match = get_close_matches(original_text.lower(), [k.lower() for k in data_dict.keys()], n=1, cutoff=0.6) | |
| if match: | |
| for key in data_dict: | |
| if key.lower() == match[0]: | |
| run.text = data_dict[key] | |
| break | |
| return doc | |
| def process_files(pdf_file, word_template): | |
| # Extract data from PDF | |
| raw_text = extract_text_from_pdf(pdf_file) | |
| data_dict = parse_data_blocks(raw_text) | |
| # Replace red text in Word | |
| final_doc = replace_red_text_with_data(word_template, data_dict) | |
| # Save and return output | |
| output_path = "filled_output.docx" | |
| final_doc.save(output_path) | |
| return output_path | |
| demo = gr.Interface( | |
| fn=process_files, | |
| inputs=[ | |
| gr.File(label="Upload PDF Report", file_types=[".pdf"]), | |
| gr.File(label="Upload Word Template (.docx)", file_types=[".docx"]) | |
| ], | |
| outputs=gr.File(label="Download Updated Word (.docx)"), | |
| title="Audit Report Auto-Filler", | |
| description="Replaces outdated red text in Word using updated values from a PDF report." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |