Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer | |
| # List of available tokenizers | |
| tokenizers = [ | |
| "bert-base-uncased", | |
| "gpt2", | |
| "roberta-base", | |
| "distilbert-base-uncased", | |
| "xlnet-base-cased" | |
| ] | |
| def tokenize_text(text, tokenizer_name): | |
| tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) | |
| tokens = tokenizer.tokenize(text) | |
| return " ".join(tokens) | |
| def compare_tokenizers(text, selected_tokenizers): | |
| results = {} | |
| for tokenizer_name in selected_tokenizers: | |
| results[tokenizer_name] = tokenize_text(text, tokenizer_name) | |
| return results | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=compare_tokenizers, | |
| inputs=[ | |
| gr.Textbox(label="Enter text to tokenize"), | |
| gr.CheckboxGroup(choices=tokenizers, label="Select tokenizers") | |
| ], | |
| outputs=gr.JSON(label="Tokenization Results"), | |
| title="Tokenizer Comparison", | |
| description="Compare tokenization results from different tokenizers.", | |
| ) | |
| # Launch the app | |
| iface.launch() |