Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -357,7 +357,7 @@ def optimize_vocabulary(texts, vocab_size=10000, min_frequency=2):
|
|
| 357 |
|
| 358 |
# Train BPE tokenizer
|
| 359 |
# tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
|
| 360 |
-
trainer = BpeTrainer(vocab_size=vocab_size, special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
|
| 361 |
tokenizer.train_from_iterator(optimized_texts, trainer)
|
| 362 |
|
| 363 |
return tokenizer, optimized_texts
|
|
|
|
| 357 |
|
| 358 |
# Train BPE tokenizer
|
| 359 |
# tokenizer = Tokenizer(BPE(unk_token="[UNK]"))
|
| 360 |
+
trainer = models.BpeTrainer(vocab_size=vocab_size, special_tokens=["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"])
|
| 361 |
tokenizer.train_from_iterator(optimized_texts, trainer)
|
| 362 |
|
| 363 |
return tokenizer, optimized_texts
|