Spaces:

Shami96
/

PDF-Data_Extractor

Running

Shami96 commited on Aug 1

Commit

377eee0

verified ·

1 Parent(s): de67fe9

Delete utils.py

Files changed (1) hide show

utils.py DELETED Viewed

@@ -1,29 +0,0 @@
-# utils.py
-import pdfplumber
-import re
-def extract_text_from_pdf(pdf_path):
-    text = ""
-    with pdfplumber.open(pdf_path) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text() + "\n"
-    return text
-def parse_pdf_to_dict(text):
-    data = {}
-    # Example logic — customize based on actual data format
-    patterns = {
-        "Operator Name": r"Operator Name[:\s]+(.+)",
-        "Accreditation Modules": r"Accreditation Modules[:\s]+(.+)",
-        "Audit Date": r"Audit Date[:\s]+([\d\-\/]+)",
-        "Auditor Name": r"Auditor Name[:\s]+(.+)",
-    }
-    for key, pattern in patterns.items():
-        match = re.search(pattern, text, re.IGNORECASE)
-        if match:
-            data[key] = match.group(1).strip()
-    return data