HBV_AI_Assistant / core /text_processors.py
moazx's picture
Initial commit with all files including LFS
73c6377
raw
history blame contribute delete
470 Bytes
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
MarkdownHeaderTextSplitter
)
recursive_splitter = RecursiveCharacterTextSplitter(
chunk_size=3500,
chunk_overlap=400,
length_function=len,
separators=["\n\n", "\n", ". ", " ", ""],
)
markdown_splitter = MarkdownHeaderTextSplitter(
headers_to_split_on=[
("##", "Header 2"),
("###", "Header 3"),
],
strip_headers=False,
)