Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass | |
| from typing import List, Tuple | |
| from _utils.langchain_utils.Splitter_class import Splitter | |
| from _utils.models.gerar_documento import DocumentChunk | |
| class HandleFilesClass: | |
| async def get_full_text_and_all_PDFs_chunks( | |
| self, | |
| listaPDFs: List[str], | |
| splitterObject: Splitter, | |
| should_use_llama_parse: bool, | |
| isBubble: bool, | |
| ) -> Tuple[List[DocumentChunk], List[str]]: | |
| all_PDFs_chunks: List[DocumentChunk] = [] | |
| pages: List[str] = [] | |
| # Load and process document | |
| for pdf_path in listaPDFs: | |
| chunks, pages = await splitterObject.load_and_split_document( | |
| pdf_path, should_use_llama_parse, isBubble | |
| ) | |
| all_PDFs_chunks = all_PDFs_chunks + chunks | |
| return all_PDFs_chunks, pages | |