| import pickle | |
| from autocomplete import save_compressed_word_list | |
| import json | |
| def compress_words(words): | |
| """ | |
| Compress a list of words. | |
| """ | |
| with open(words, 'r') as f: | |
| words = f.readlines() | |
| compressed_words = [] | |
| for word in words: | |
| compressed_word = word.strip() | |
| compressed_words.append(compressed_word) | |
| # Save the compressed words | |
| save_compressed_word_list(compressed_words, 'all_lemmas.pkl.gz') | |
| def compress_word_list(words): | |
| """ | |
| Compress a list of words. | |
| """ | |
| compressed_words = [] | |
| for word in words: | |
| compressed_word = word.strip() | |
| compressed_words.append(compressed_word) | |
| # Save the compressed words | |
| save_compressed_word_list(compressed_words, 'all_lemmas.pkl.gz') | |
| def main(): | |
| lemma_dict = json.load(open('lsj_dict.json', 'r')) | |
| # Get all lemmas | |
| all_lemmas = list(lemma_dict.keys()) | |
| # Compress words | |
| compress_word_list(all_lemmas) | |
| if __name__ == "__main__": | |
| main() |