Spaces:
Running
Running
Aymeric Roucher
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ def chunk(text, words, splitter_selection, slider_overlap):
|
|
| 53 |
),
|
| 54 |
)
|
| 55 |
text_splits = [split.content for split in splits]
|
| 56 |
-
elif splitter_selection == "
|
| 57 |
text_splitter = CharacterTextSplitter(
|
| 58 |
separator="",
|
| 59 |
chunk_size=words,
|
|
@@ -63,7 +63,16 @@ def chunk(text, words, splitter_selection, slider_overlap):
|
|
| 63 |
)
|
| 64 |
splits = text_splitter.create_documents([text])
|
| 65 |
text_splits = [split.page_content for split in splits]
|
| 66 |
-
elif splitter_selection == "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 68 |
chunk_size=words,
|
| 69 |
chunk_overlap=slider_overlap,
|
|
@@ -125,10 +134,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 125 |
text = gr.Textbox(label="Your text 🪶", value=ESSAY)
|
| 126 |
split_selection = gr.Radio(
|
| 127 |
[
|
| 128 |
-
"
|
| 129 |
-
"
|
| 130 |
-
"
|
| 131 |
-
"
|
|
|
|
| 132 |
],
|
| 133 |
value="Character",
|
| 134 |
label="Chunking method ",
|
|
|
|
| 53 |
),
|
| 54 |
)
|
| 55 |
text_splits = [split.content for split in splits]
|
| 56 |
+
elif splitter_selection == "LangChain's CharacterTextSplitter":
|
| 57 |
text_splitter = CharacterTextSplitter(
|
| 58 |
separator="",
|
| 59 |
chunk_size=words,
|
|
|
|
| 63 |
)
|
| 64 |
splits = text_splitter.create_documents([text])
|
| 65 |
text_splits = [split.page_content for split in splits]
|
| 66 |
+
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - vanilla":
|
| 67 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 68 |
+
chunk_size=words,
|
| 69 |
+
chunk_overlap=slider_overlap,
|
| 70 |
+
length_function=len,
|
| 71 |
+
add_start_index=True,
|
| 72 |
+
)
|
| 73 |
+
splits = text_splitter.create_documents([text])
|
| 74 |
+
text_splits = [split.page_content for split in splits]
|
| 75 |
+
elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter - with '.'":
|
| 76 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 77 |
chunk_size=words,
|
| 78 |
chunk_overlap=slider_overlap,
|
|
|
|
| 134 |
text = gr.Textbox(label="Your text 🪶", value=ESSAY)
|
| 135 |
split_selection = gr.Radio(
|
| 136 |
[
|
| 137 |
+
"LangChain's CharacterTextSplitter",
|
| 138 |
+
"Langchain's RecursiveCharacterTextSplitter - vanilla",
|
| 139 |
+
"Langchain's RecursiveCharacterTextSplitter - with '.'"
|
| 140 |
+
"Haystack's PreProcessor - Word level, no sentence boundaries",
|
| 141 |
+
"Haystack's PreProcessor - Word level, respect sentence boundaries",
|
| 142 |
],
|
| 143 |
value="Character",
|
| 144 |
label="Chunking method ",
|