Spaces:
Runtime error
Runtime error
| import json | |
| def extract_leaves(item, path=None, leaves=None): | |
| """ | |
| Extracts the leaves of a nested dictionary or list. | |
| """ | |
| if leaves is None: | |
| leaves = [] | |
| if path is None: | |
| path = [] | |
| if isinstance(item, dict): | |
| for key, value in item.items(): | |
| extract_leaves(value, path + [key], leaves) | |
| elif isinstance(item, list): | |
| for value in item: | |
| extract_leaves(value, path, leaves) | |
| else: | |
| if item != '': | |
| leaves.append((path, item)) | |
| return leaves | |
| def split_document(document, window_size, overlap, tokenizer): | |
| """ | |
| Splits a document into chunks of a specified window size with an overlap. | |
| """ | |
| tokens = tokenizer.tokenize(document) | |
| print(f"\tLength of document: {len(tokens)} tokens") | |
| chunks = [] | |
| if len(tokens) > window_size: | |
| for i in range(0, len(tokens), window_size-overlap): | |
| print(f"\t{i} to {i + len(tokens[i:i + window_size])}") | |
| chunk = tokenizer.convert_tokens_to_string(tokens[i:i + window_size]) | |
| chunks.append(chunk) | |
| if i + len(tokens[i:i + window_size]) >= len(tokens): | |
| break | |
| else: | |
| chunks.append(document) | |
| print(f"\tSplit into {len(chunks)} chunks") | |
| return chunks | |
| def handle_broken_output(pred, prev): | |
| """ | |
| Handles broken or empty JSON output by returning the previous prediction. | |
| """ | |
| try: | |
| if all([(v in ["", []]) for v in json.loads(pred).values()]): | |
| # if empty json, return previous | |
| pred = prev | |
| except: | |
| # if broken json, return previous | |
| pred = prev | |
| return pred | |
| def clean_json_text(text): | |
| """ | |
| Cleans JSON text by removing leading/trailing whitespace and escaping special characters. | |
| """ | |
| text = text.strip() | |
| text = text.replace("\#", "#").replace("\&", "&") | |
| return text | |
| def sync_empty_fields(dict1, dict2): | |
| """ | |
| Synchronize empty fields between two dictionaries. | |
| Adds empty fields to dict1 based on dict2, or removes them if they don't exist in dict2. | |
| Args: | |
| dict1 (dict): The dictionary to be modified. | |
| dict2 (dict): The reference dictionary with empty fields to be synced. | |
| Returns: | |
| dict: The modified dict1 with synced empty fields. | |
| """ | |
| # Traverse dict2 to add or remove empty fields in dict1 | |
| for key, value in dict2.items(): | |
| if isinstance(value, dict): # Handle nested dictionaries | |
| dict1[key] = sync_empty_fields(dict1.get(key, {}), value) | |
| elif value in (None, "", [], {}): # Empty field in dict2 | |
| if key not in dict1: | |
| dict1[key] = value # Add empty field to dict1 if not present | |
| else: | |
| if key in dict1 and dict1[key] in (None, "", [], {}): | |
| del dict1[key] # Remove empty field from dict1 if not in dict2 | |
| # Optionally, remove any extra fields in dict1 that are not in dict2 | |
| keys_to_remove = [key for key in dict1 if key not in dict2] | |
| for key in keys_to_remove: | |
| del dict1[key] | |
| return dict1 | |