Shami96 commited on
Commit
21ae957
·
verified ·
1 Parent(s): 688144c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -32
app.py CHANGED
@@ -1,57 +1,45 @@
1
  import gradio as gr
2
  import tempfile
3
  import os
4
- import subprocess
5
- import uuid
6
  import shutil
 
7
 
8
  def process_files(pdf_file, word_file):
9
- # Each upload returns a path (str) with type="filepath"
10
- # Create a unique temp directory for each run (prevents parallel collision)
11
  temp_dir = tempfile.mkdtemp(prefix="hf_redtext_")
12
 
13
- # Copy user-uploaded files into temp directory with standard names
14
  pdf_path = os.path.join(temp_dir, "input.pdf")
15
  word_path = os.path.join(temp_dir, "input.docx")
 
 
 
 
 
 
16
  shutil.copy(pdf_file, pdf_path)
17
- shutil.copy(word_file, word_path)
18
 
19
- # Step 1: Extract PDF data to txt
20
- pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
21
- subprocess.run(
22
- ["python", "extract_pdf_data.py", pdf_path, pdf_txt_path],
23
- check=True
24
- )
25
 
26
- # Step 2: Extract red text from Word to JSON
27
- word_json_path = os.path.join(temp_dir, "word_data.json")
28
- subprocess.run(
29
- ["python", "extract_red_text.py", word_path, word_json_path],
30
- check=True
31
- )
32
 
33
- # Step 3: Update docx JSON with PDF txt, output updated JSON
34
- updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
35
- subprocess.run(
36
- ["python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path],
37
- check=True
38
- )
39
 
40
- # Step 4: Compare word file with updated JSON and update docx
41
- final_docx_path = os.path.join(temp_dir, "updated.docx")
42
- subprocess.run(
43
- ["python", "updated_word.py", word_path, updated_json_path, final_docx_path],
44
- check=True
45
- )
46
 
47
- # Return final updated docx file
48
  return final_docx_path
49
 
50
  iface = gr.Interface(
51
  fn=process_files,
52
  inputs=[
53
  gr.File(label="Upload PDF File", type="filepath"),
54
- gr.File(label="Upload Word File", type="filepath"),
55
  ],
56
  outputs=gr.File(label="Download Updated Word File"),
57
  title="Red Text Replacer",
 
1
  import gradio as gr
2
  import tempfile
3
  import os
 
 
4
  import shutil
5
+ import subprocess
6
 
7
  def process_files(pdf_file, word_file):
8
+ # Create a unique temporary directory for this run
 
9
  temp_dir = tempfile.mkdtemp(prefix="hf_redtext_")
10
 
11
+ # Define standard filenames for use in the pipeline
12
  pdf_path = os.path.join(temp_dir, "input.pdf")
13
  word_path = os.path.join(temp_dir, "input.docx")
14
+ pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
15
+ word_json_path = os.path.join(temp_dir, "word_data.json")
16
+ updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
17
+ final_docx_path = os.path.join(temp_dir, "updated.docx")
18
+
19
+ # Copy the uploaded files to the temp directory
20
  shutil.copy(pdf_file, pdf_path)
21
+ shutil.copy(word_file, word_path)
22
 
23
+ # Step 1: Extract text from the PDF
24
+ subprocess.run(["python", "extract_pdf_data.py", pdf_path, pdf_txt_path], check=True)
 
 
 
 
25
 
26
+ # Step 2: Extract red text from the Word document
27
+ subprocess.run(["python", "extract_red_text.py", word_path, word_json_path], check=True)
 
 
 
 
28
 
29
+ # Step 3: Update the Word JSON using the PDF text (calls OpenAI)
30
+ subprocess.run(["python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path], check=True)
 
 
 
 
31
 
32
+ # Step 4: Apply the updated JSON to the Word doc to create the final output
33
+ subprocess.run(["python", "updated_word.py", word_path, updated_json_path, final_docx_path], check=True)
 
 
 
 
34
 
35
+ # Return the final .docx file
36
  return final_docx_path
37
 
38
  iface = gr.Interface(
39
  fn=process_files,
40
  inputs=[
41
  gr.File(label="Upload PDF File", type="filepath"),
42
+ gr.File(label="Upload Word File", type="filepath")
43
  ],
44
  outputs=gr.File(label="Download Updated Word File"),
45
  title="Red Text Replacer",