Spaces:
Running
Running
| import gradio as gr | |
| import os | |
| from docx import Document | |
| from utils import extract_text_from_pdf, parse_pdf_to_dict | |
| from docx.shared import RGBColor | |
| from docx import Document | |
| from docx.shared import RGBColor | |
| def replace_red_text_with_data(word_path, data_dict): | |
| doc = Document(word_path) | |
| for para in doc.paragraphs: | |
| full_text = para.text | |
| for i, run in enumerate(para.runs): | |
| if run.font.color and run.font.color.rgb == RGBColor(255, 0, 0): | |
| # Search for the key (label) before this red text | |
| preceding_text = ''.join(r.text for r in para.runs[:i]).lower() | |
| for key in data_dict: | |
| if key.lower() in preceding_text: | |
| run.text = data_dict[key] | |
| break | |
| return doc | |
| def process_files(pdf_file, template_docx): | |
| pdf_path = pdf_file | |
| doc_path = template_docx | |
| output_path = "filled_output.docx" | |
| # Extract and parse PDF | |
| raw_text = extract_text_from_pdf(pdf_path) | |
| data_dict = parse_pdf_to_dict(raw_text) | |
| # Replace red text with data | |
| final_doc = replace_red_text_with_data(doc_path, data_dict) | |
| # Save final document | |
| final_doc.save(output_path) | |
| return output_path | |
| demo = gr.Interface( | |
| fn=process_files, | |
| inputs=[ | |
| gr.File(label="Upload PDF Report", file_types=[".pdf"]), | |
| gr.File(label="Upload Word Template (.docx)", file_types=[".docx"]) | |
| ], | |
| outputs=gr.File(label="Download Filled Report (.docx)"), | |
| title="Audit Report Generator", | |
| description="Upload a PDF and a Word template. This tool will auto-fill red-highlighted fields with data from the PDF." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |