Spaces:
Running
Running
File size: 1,943 Bytes
459372e 4b3f51e cce0884 688144c 21ae957 4b3f51e 21ae957 55e3c9a 21ae957 cce0884 21ae957 688144c 21ae957 cce0884 21ae957 cce0884 21ae957 cce0884 21ae957 cce0884 21ae957 cce0884 21ae957 cce0884 4b3f51e 55e3c9a 459372e 55e3c9a 21ae957 459372e 4b3f51e 6afedff 55e3c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
import tempfile
import os
import shutil
import subprocess
def process_files(pdf_file, word_file):
# Create a unique temporary directory for this run
temp_dir = tempfile.mkdtemp(prefix="hf_redtext_")
# Define standard filenames for use in the pipeline
pdf_path = os.path.join(temp_dir, "input.pdf")
word_path = os.path.join(temp_dir, "input.docx")
pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
word_json_path = os.path.join(temp_dir, "word_data.json")
updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
final_docx_path = os.path.join(temp_dir, "updated.docx")
# Copy the uploaded files to the temp directory
shutil.copy(pdf_file, pdf_path)
shutil.copy(word_file, word_path)
# Step 1: Extract text from the PDF
subprocess.run(["python", "extract_pdf_data.py", pdf_path, pdf_txt_path], check=True)
# Step 2: Extract red text from the Word document
subprocess.run(["python", "extract_red_text.py", word_path, word_json_path], check=True)
# Step 3: Update the Word JSON using the PDF text (calls OpenAI)
subprocess.run(["python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path], check=True)
# Step 4: Apply the updated JSON to the Word doc to create the final output
subprocess.run(["python", "updated_word.py", word_path, updated_json_path, final_docx_path], check=True)
# Return the final .docx file
return final_docx_path
iface = gr.Interface(
fn=process_files,
inputs=[
gr.File(label="Upload PDF File", type="filepath"),
gr.File(label="Upload Word File", type="filepath")
],
outputs=gr.File(label="Download Updated Word File"),
title="Red Text Replacer",
description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF."
)
if __name__ == "__main__":
iface.launch() |