Spaces:
Running
Running
| # pdf_parser.py | |
| import fitz # PyMuPDF | |
| def extract_text_from_pdf(pdf_path): | |
| doc = fitz.open(pdf_path) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| return text | |
| def parse_data_blocks(text): | |
| # You can customize this logic as needed | |
| data = {} | |
| lines = text.split("\n") | |
| for line in lines: | |
| if ":" in line: | |
| key, val = line.split(":", 1) | |
| data[key.strip()] = val.strip() | |
| return data |