Spaces:
Running
Running
| # pdf_parser.py | |
| import fitz # PyMuPDF | |
| def extract_text_from_pdf(pdf_path): | |
| doc = fitz.open(pdf_path) | |
| return "\n".join(page.get_text() for page in doc) | |
| def parse_data_blocks(text): | |
| data = {} | |
| for line in text.splitlines(): | |
| if ':' in line: | |
| key, val = line.split(':', 1) | |
| data[key.strip()] = val.strip() | |
| return data |