Shami96 commited on
Commit
377eee0
·
verified ·
1 Parent(s): de67fe9

Delete utils.py

Browse files
Files changed (1) hide show
  1. utils.py +0 -29
utils.py DELETED
@@ -1,29 +0,0 @@
1
- # utils.py
2
-
3
- import pdfplumber
4
- import re
5
-
6
- def extract_text_from_pdf(pdf_path):
7
- text = ""
8
- with pdfplumber.open(pdf_path) as pdf:
9
- for page in pdf.pages:
10
- text += page.extract_text() + "\n"
11
- return text
12
-
13
- def parse_pdf_to_dict(text):
14
- data = {}
15
-
16
- # Example logic — customize based on actual data format
17
- patterns = {
18
- "Operator Name": r"Operator Name[:\s]+(.+)",
19
- "Accreditation Modules": r"Accreditation Modules[:\s]+(.+)",
20
- "Audit Date": r"Audit Date[:\s]+([\d\-\/]+)",
21
- "Auditor Name": r"Auditor Name[:\s]+(.+)",
22
- }
23
-
24
- for key, pattern in patterns.items():
25
- match = re.search(pattern, text, re.IGNORECASE)
26
- if match:
27
- data[key] = match.group(1).strip()
28
-
29
- return data