File size: 1,943 Bytes
459372e
4b3f51e
cce0884
688144c
21ae957
4b3f51e
 
21ae957
55e3c9a
 
21ae957
cce0884
 
21ae957
 
 
 
 
 
688144c
21ae957
cce0884
21ae957
 
cce0884
21ae957
 
cce0884
21ae957
 
cce0884
21ae957
 
cce0884
21ae957
cce0884
4b3f51e
55e3c9a
459372e
 
55e3c9a
21ae957
459372e
4b3f51e
6afedff
 
55e3c9a
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import gradio as gr
import tempfile
import os
import shutil
import subprocess

def process_files(pdf_file, word_file):
    # Create a unique temporary directory for this run
    temp_dir = tempfile.mkdtemp(prefix="hf_redtext_")

    # Define standard filenames for use in the pipeline
    pdf_path = os.path.join(temp_dir, "input.pdf")
    word_path = os.path.join(temp_dir, "input.docx")
    pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
    word_json_path = os.path.join(temp_dir, "word_data.json")
    updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
    final_docx_path = os.path.join(temp_dir, "updated.docx")

    # Copy the uploaded files to the temp directory
    shutil.copy(pdf_file, pdf_path)
    shutil.copy(word_file, word_path)

    # Step 1: Extract text from the PDF
    subprocess.run(["python", "extract_pdf_data.py", pdf_path, pdf_txt_path], check=True)

    # Step 2: Extract red text from the Word document
    subprocess.run(["python", "extract_red_text.py", word_path, word_json_path], check=True)

    # Step 3: Update the Word JSON using the PDF text (calls OpenAI)
    subprocess.run(["python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path], check=True)

    # Step 4: Apply the updated JSON to the Word doc to create the final output
    subprocess.run(["python", "updated_word.py", word_path, updated_json_path, final_docx_path], check=True)

    # Return the final .docx file
    return final_docx_path

iface = gr.Interface(
    fn=process_files,
    inputs=[
        gr.File(label="Upload PDF File", type="filepath"),
        gr.File(label="Upload Word File", type="filepath")
    ],
    outputs=gr.File(label="Download Updated Word File"),
    title="Red Text Replacer",
    description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF."
)

if __name__ == "__main__":
    iface.launch()