Shami96 commited on
Commit
c61706a
·
verified ·
1 Parent(s): 9aea1c2

Update extract_pdf_data.py

Browse files
Files changed (1) hide show
  1. extract_pdf_data.py +5 -1
extract_pdf_data.py CHANGED
@@ -30,4 +30,8 @@ def extract_pdf_full_text(pdf_path, txt_path):
30
  print(f"✅ Saved deduped full text to {txt_path}")
31
 
32
  if __name__ == "__main__":
33
- extract_pdf_full_text("test1.pdf", "pdf_all_text_full.txt")
 
 
 
 
 
30
  print(f"✅ Saved deduped full text to {txt_path}")
31
 
32
  if __name__ == "__main__":
33
+ import sys
34
+ # Usage: python extract_pdf_data.py input.pdf output.txt
35
+ input_pdf = sys.argv[1]
36
+ output_txt = sys.argv[2]
37
+ extract_pdf_full_text(input_pdf, output_txt)