Spaces:
Runtime error
Runtime error
| import random | |
| def repeat(text, n_max_gram=3, n_max_repeat=3): | |
| """repeat n-grams""" | |
| tokens = text.split() | |
| n_gram = random.randint(1, n_max_gram) | |
| repeat_token_idx = random.randint(0, len(tokens) - n_gram) | |
| repeated_tokens = tokens[repeat_token_idx:repeat_token_idx+n_gram] | |
| n_repeat = random.randint(1, n_max_repeat) | |
| for _ in range(n_repeat): | |
| insert_idx = random.randint(0, len(tokens)) | |
| tokens = tokens[:insert_idx] + \ | |
| repeated_tokens + tokens[insert_idx:] | |
| new_text = " ".join(tokens) | |
| return new_text | |
| def remove(text, n_max_gram=3): | |
| """remove n-grams""" | |
| tokens = text.split() | |
| n_gram = random.randint(1, n_max_gram) | |
| remove_token_idx = random.randint(0, len(tokens) - n_gram) | |
| tokens = tokens[:remove_token_idx] + tokens[remove_token_idx + n_gram:] | |
| new_text = " ".join(tokens) | |
| return new_text | |
| def insert(text, vocab, n_max_tokens=3): | |
| """Insert tokens""" | |
| tokens = text.split() | |
| n_insert_token = random.randint(1, n_max_tokens) | |
| for _ in range(n_insert_token): | |
| insert_token_idx = random.randint(0, len(tokens) - 1) | |
| insert_token = random.choice(vocab) | |
| tokens = tokens[:insert_token_idx] + [insert_token] + tokens[insert_token_idx:] | |
| new_text = " ".join(tokens) | |
| return new_text | |
| def swap(text, vocab, n_max_tokens=3): | |
| """Swap tokens""" | |
| tokens = text.split() | |
| n_swap_tokens = random.randint(1, n_max_tokens) | |
| for _ in range(n_swap_tokens): | |
| swap_token_idx = random.randint(0, len(tokens) - 1) | |
| swap_token = random.choice(vocab) | |
| while swap_token == tokens[swap_token_idx]: | |
| swap_token = random.choice(vocab) | |
| tokens[swap_token_idx] = swap_token | |
| new_text = " ".join(tokens) | |
| return new_text | |
| def shuffle(text): | |
| """shuffle tokens""" | |
| tokens = text.split() | |
| random.shuffle(tokens) | |
| new_text = " ".join(tokens) | |
| return new_text | |