Spaces:
Sleeping
Sleeping
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import argparse | |
| import json | |
| from functools import partial | |
| import mmengine | |
| from mmocr.utils import list_from_file, list_to_file | |
| def convert(closeset_line, merge_bg_others=False, ignore_idx=0, others_idx=25): | |
| """Convert line-json str of closeset to line-json str of openset. Note that | |
| this function is designed for closeset-wildreceipt to openset-wildreceipt. | |
| It may not be suitable to your own dataset. | |
| Args: | |
| closeset_line (str): The string to be deserialized to | |
| the closeset dictionary object. | |
| merge_bg_others (bool): If True, give the same label to "background" | |
| class and "others" class. | |
| ignore_idx (int): Index for ``ignore`` class. | |
| others_idx (int): Index for ``others`` class. | |
| """ | |
| # Two labels at the same index of the following two lists | |
| # make up a key-value pair. For example, in wildreceipt, | |
| # closeset_key_inds[0] maps to "Store_name_key" | |
| # and closeset_value_inds[0] maps to "Store_addr_value". | |
| closeset_key_inds = list(range(2, others_idx, 2)) | |
| closeset_value_inds = list(range(1, others_idx, 2)) | |
| openset_node_label_mapping = {'bg': 0, 'key': 1, 'value': 2, 'others': 3} | |
| if merge_bg_others: | |
| openset_node_label_mapping['others'] = openset_node_label_mapping['bg'] | |
| closeset_obj = json.loads(closeset_line) | |
| openset_obj = { | |
| 'file_name': closeset_obj['file_name'], | |
| 'height': closeset_obj['height'], | |
| 'width': closeset_obj['width'], | |
| 'annotations': [] | |
| } | |
| edge_idx = 1 | |
| label_to_edge = {} | |
| for anno in closeset_obj['annotations']: | |
| label = anno['label'] | |
| if label == ignore_idx: | |
| anno['label'] = openset_node_label_mapping['bg'] | |
| anno['edge'] = edge_idx | |
| edge_idx += 1 | |
| elif label == others_idx: | |
| anno['label'] = openset_node_label_mapping['others'] | |
| anno['edge'] = edge_idx | |
| edge_idx += 1 | |
| else: | |
| edge = label_to_edge.get(label, None) | |
| if edge is not None: | |
| anno['edge'] = edge | |
| if label in closeset_key_inds: | |
| anno['label'] = openset_node_label_mapping['key'] | |
| elif label in closeset_value_inds: | |
| anno['label'] = openset_node_label_mapping['value'] | |
| else: | |
| tmp_key = 'key' | |
| if label in closeset_key_inds: | |
| label_with_same_edge = closeset_value_inds[ | |
| closeset_key_inds.index(label)] | |
| elif label in closeset_value_inds: | |
| label_with_same_edge = closeset_key_inds[ | |
| closeset_value_inds.index(label)] | |
| tmp_key = 'value' | |
| edge_counterpart = label_to_edge.get(label_with_same_edge, | |
| None) | |
| if edge_counterpart is not None: | |
| anno['edge'] = edge_counterpart | |
| else: | |
| anno['edge'] = edge_idx | |
| edge_idx += 1 | |
| anno['label'] = openset_node_label_mapping[tmp_key] | |
| label_to_edge[label] = anno['edge'] | |
| openset_obj['annotations'] = closeset_obj['annotations'] | |
| return json.dumps(openset_obj, ensure_ascii=False) | |
| def process(closeset_file, openset_file, merge_bg_others=False, n_proc=10): | |
| closeset_lines = list_from_file(closeset_file) | |
| convert_func = partial(convert, merge_bg_others=merge_bg_others) | |
| openset_lines = mmengine.track_parallel_progress( | |
| convert_func, closeset_lines, nproc=n_proc) | |
| list_to_file(openset_file, openset_lines) | |
| def parse_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('in_file', help='Annotation file for closeset.') | |
| parser.add_argument('out_file', help='Annotation file for openset.') | |
| parser.add_argument( | |
| '--merge', | |
| action='store_true', | |
| help='Merge two classes: "background" and "others" in closeset ' | |
| 'to one class in openset.') | |
| parser.add_argument( | |
| '--n_proc', type=int, default=10, help='Number of process.') | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| args = parse_args() | |
| process(args.in_file, args.out_file, args.merge, args.n_proc) | |
| print('finish') | |
| if __name__ == '__main__': | |
| main() | |