File size: 470 Bytes
73c6377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    MarkdownHeaderTextSplitter
)

recursive_splitter = RecursiveCharacterTextSplitter(
        chunk_size=3500,
        chunk_overlap=400,
        length_function=len,
        separators=["\n\n", "\n", ". ", " ", ""],
    )


markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=[
        ("##", "Header 2"),   
        ("###", "Header 3"),  
    ],
    strip_headers=False,
)