Spaces:
Sleeping
Sleeping
| from enum import Enum | |
| from typing import List | |
| from langchain_community.document_loaders import PyMuPDFLoader | |
| from langchain_core.documents import Document | |
| import asyncio | |
| class PDFLoaderWrapper(): | |
| class LoaderType(str, Enum): | |
| PYMUPDF = "pymupdf" | |
| def __init__(self, file_path: str | List[str] , loader_type: LoaderType = LoaderType.PYMUPDF): | |
| self.file_path = file_path if isinstance(file_path, list) else [file_path] | |
| self.loader_type = loader_type | |
| async def aload(self) -> List[Document]: | |
| all_docs = [] | |
| for file_path in self.file_path: | |
| if self.loader_type == self.LoaderType.PYMUPDF: | |
| try: | |
| loader = PyMuPDFLoader(file_path) | |
| docs = await loader.aload() | |
| all_docs.extend(docs) | |
| except Exception as e: | |
| print(f"Error loading file {file_path}: {e}") | |
| continue | |
| return all_docs | |