Spaces:
Running
Running
| import gradio as gr | |
| from haystack.nodes import PreProcessor | |
| from haystack import Document | |
| preprocessor = PreProcessor( | |
| clean_empty_lines=True, | |
| clean_whitespace=True, | |
| clean_header_footer=True, | |
| remove_substrings=None, | |
| split_by="word", | |
| split_length=200, | |
| split_respect_sentence_boundary=True, | |
| split_overlap=0, | |
| max_chars_check=10_000 | |
| ) | |
| def chunk(text): | |
| splits = preprocessor.process(Document(text)) | |
| return [ | |
| (i%3, split.content) for i, split in enumerate(splits) | |
| ] | |
| iface = gr.Interface( | |
| fn=chunk, | |
| inputs="text", | |
| outputs=gr.HighlightedText( | |
| label="Highlights", | |
| combine_adjacent=False, | |
| show_legend=True, | |
| color_map={"0": "red", "1": "green", "2": "yellow"}), | |
| ) | |
| iface.launch() |