Spaces:
Build error
Build error
File size: 602 Bytes
ee8fb16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
# Split documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
def split_documents(docs):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=0,
length_function=len,
is_separator_regex=False)
contents = docs
if docs and isinstance(docs[0], Document):
contents = [doc.page_content for doc in docs]
texts = text_splitter.create_documents(contents)
n_chunks = len(texts)
print(f"Split into {n_chunks} chunks")
return texts
|