Spaces:
Running
Running
File size: 2,051 Bytes
459372e 4b3f51e cce0884 55e3c9a 4b3f51e 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 55e3c9a cce0884 4b3f51e 55e3c9a 459372e 55e3c9a 459372e 4b3f51e 6afedff 55e3c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
import tempfile
import os
import subprocess
import uuid
def process_files(pdf_file, word_file):
# Each upload returns a path (str) with type="filepath"
# Create a unique temp directory for each run (prevents parallel collision)
temp_dir = tempfile.mkdtemp(prefix="hf_redtext_")
# Copy user-uploaded files into temp directory with standard names
pdf_path = os.path.join(temp_dir, "input.pdf")
word_path = os.path.join(temp_dir, "input.docx")
os.rename(pdf_file, pdf_path)
os.rename(word_file, word_path)
# Step 1: Extract PDF data to txt
pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
subprocess.run(
["python", "extract_pdf_data.py", pdf_path, pdf_txt_path],
check=True
)
# Step 2: Extract red text from Word to JSON
word_json_path = os.path.join(temp_dir, "word_data.json")
subprocess.run(
["python", "extract_red_text.py", word_path, word_json_path],
check=True
)
# Step 3: Update docx JSON with PDF txt, output updated JSON
updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
subprocess.run(
["python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path],
check=True
)
# Step 4: Compare word file with updated JSON and update docx
final_docx_path = os.path.join(temp_dir, "updated.docx")
subprocess.run(
["python", "updated_word.py", word_path, updated_json_path, final_docx_path],
check=True
)
# Return final updated docx file
return final_docx_path
iface = gr.Interface(
fn=process_files,
inputs=[
gr.File(label="Upload PDF File", type="filepath"),
gr.File(label="Upload Word File", type="filepath"),
],
outputs=gr.File(label="Download Updated Word File"),
title="Red Text Replacer",
description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF."
)
if __name__ == "__main__":
iface.launch() |