Spaces:
Running
Running
File size: 470 Bytes
73c6377 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
MarkdownHeaderTextSplitter
)
recursive_splitter = RecursiveCharacterTextSplitter(
chunk_size=3500,
chunk_overlap=400,
length_function=len,
separators=["\n\n", "\n", ". ", " ", ""],
)
markdown_splitter = MarkdownHeaderTextSplitter(
headers_to_split_on=[
("##", "Header 2"),
("###", "Header 3"),
],
strip_headers=False,
)
|