Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -40,7 +40,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
|
|
| 40 |
if splitter_selection == LABEL_TEXTSPLITTER:
|
| 41 |
text_splitter = CharacterTextSplitter(
|
| 42 |
chunk_size=length,
|
| 43 |
-
chunk_overlap=
|
| 44 |
length_function=length_function,
|
| 45 |
strip_whitespace=False,
|
| 46 |
is_separator_regex=False,
|
|
@@ -49,7 +49,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
|
|
| 49 |
elif splitter_selection == LABEL_RECURSIVE:
|
| 50 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 51 |
chunk_size=length,
|
| 52 |
-
chunk_overlap=
|
| 53 |
length_function=length_function,
|
| 54 |
strip_whitespace=False,
|
| 55 |
separators=separators,
|
|
@@ -59,7 +59,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
|
|
| 59 |
|
| 60 |
unoverlapped_text_splits = unoverlap_list(text_splits)
|
| 61 |
|
| 62 |
-
output = [((split[0], 0) if split[1] else (split[0], str(i+1))) for i, split in enumerate(unoverlapped_text_splits)]
|
| 63 |
print(output)
|
| 64 |
return output
|
| 65 |
|
|
@@ -138,10 +138,10 @@ with gr.Blocks(theme=gr.themes.Soft(text_size='lg', font=["monospace"], primary_
|
|
| 138 |
info="How should we measure our chunk lengths?",
|
| 139 |
)
|
| 140 |
slider_count = gr.Slider(
|
| 141 |
-
20, 500, value=200, label="Chunk length π", info="In the chosen unit."
|
| 142 |
)
|
| 143 |
chunk_overlap = gr.Slider(
|
| 144 |
-
0, 30, value=10, label="Overlap between chunks", info="In the chosen unit."
|
| 145 |
)
|
| 146 |
out = gr.HighlightedText(
|
| 147 |
label="Output",
|
|
|
|
| 40 |
if splitter_selection == LABEL_TEXTSPLITTER:
|
| 41 |
text_splitter = CharacterTextSplitter(
|
| 42 |
chunk_size=length,
|
| 43 |
+
chunk_overlap=10,
|
| 44 |
length_function=length_function,
|
| 45 |
strip_whitespace=False,
|
| 46 |
is_separator_regex=False,
|
|
|
|
| 49 |
elif splitter_selection == LABEL_RECURSIVE:
|
| 50 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 51 |
chunk_size=length,
|
| 52 |
+
chunk_overlap=10,
|
| 53 |
length_function=length_function,
|
| 54 |
strip_whitespace=False,
|
| 55 |
separators=separators,
|
|
|
|
| 59 |
|
| 60 |
unoverlapped_text_splits = unoverlap_list(text_splits)
|
| 61 |
|
| 62 |
+
output = [((split[0], '0') if split[1] else (split[0], str(i+1))) for i, split in enumerate(unoverlapped_text_splits)]
|
| 63 |
print(output)
|
| 64 |
return output
|
| 65 |
|
|
|
|
| 138 |
info="How should we measure our chunk lengths?",
|
| 139 |
)
|
| 140 |
slider_count = gr.Slider(
|
| 141 |
+
20, 500, value=200, step=1, label="Chunk length π", info="In the chosen unit."
|
| 142 |
)
|
| 143 |
chunk_overlap = gr.Slider(
|
| 144 |
+
0, 30, value=10, step=1, label="Overlap between chunks", info="In the chosen unit."
|
| 145 |
)
|
| 146 |
out = gr.HighlightedText(
|
| 147 |
label="Output",
|