Spaces:
Paused
Paused
| import asyncio | |
| import os | |
| from langchain_core.documents import Document | |
| from typing import List, Dict | |
| # Supports the base Document class from langchain | |
| # - https://github.com/langchain-ai/langchain/blob/master/libs/core/langchain_core/documents/base.py | |
| class LangChainDocumentLoader: | |
| def __init__(self, documents: List[Document]): | |
| self.documents = documents | |
| async def load(self, metadata_source_index="title") -> List[Dict[str, str]]: | |
| docs = [] | |
| for document in self.documents: | |
| docs.append( | |
| { | |
| "raw_content": document.page_content, | |
| "url": document.metadata.get(metadata_source_index, ""), | |
| } | |
| ) | |
| return docs | |