Spaces:
Runtime error
Runtime error
| import copy | |
| import shutil | |
| import argparse | |
| import json | |
| import sys | |
| import os | |
| from tqdm import tqdm | |
| sys.path.append('.') | |
| from imaginaire.utils.lmdb import create_metadata, \ | |
| construct_file_path, check_and_add, build_lmdb # noqa: E402 | |
| from imaginaire.config import Config # noqa: E402 | |
| def parse_args(): | |
| r"""Parse user input arguments""" | |
| parser = argparse.ArgumentParser(description='Folder -> LMDB conversion') | |
| parser.add_argument('--data_root', type=str, required=True, | |
| help='Input data location.') | |
| parser.add_argument('--config', type=str, required=True, | |
| help='Config with label info.') | |
| parser.add_argument('--output_root', type=str, required=True, | |
| help='Output LMDB location') | |
| parser.add_argument('--input_list', type=str, default='', | |
| help='list of images that will be used.') | |
| parser.add_argument('--metadata_factor', type=float, default=0.75, | |
| help='Factor of filesize to allocate for metadata?') | |
| parser.add_argument('--overwrite', default=False, action='store_true', | |
| help='Overwrite output file if exists') | |
| parser.add_argument('--paired', default=False, action='store_true', | |
| help='Is the input data paired?') | |
| parser.add_argument('--large', default=False, action='store_true', | |
| help='Is the dataset large?') | |
| parser.add_argument('--remove_missing', default=False, action='store_true', | |
| help='Remove missing files from paired datasets?') | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| r""" Build lmdb for training/testing. | |
| Usage: | |
| python scripts/build_lmdb.py \ | |
| --config configs/data_image.yaml \ | |
| --data_root /mnt/bigdata01/datasets/test_image \ | |
| --output_root /mnt/bigdata01/datasets/test_image/lmdb_0/ \ | |
| --overwrite | |
| """ | |
| args = parse_args() | |
| cfg = Config(args.config) | |
| # Check if output file already exists. | |
| if os.path.exists(args.output_root): | |
| if args.overwrite: | |
| print('Deleting existing output LMDB.') | |
| shutil.rmtree(args.output_root) | |
| else: | |
| print('Output root LMDB already exists. Use --overwrite. ' + | |
| 'Exiting...') | |
| return | |
| all_filenames, extensions = \ | |
| create_metadata(data_root=args.data_root, | |
| cfg=cfg, | |
| paired=args.paired, | |
| input_list=args.input_list) | |
| required_data_types = cfg.data.data_types | |
| # Build LMDB. | |
| os.makedirs(args.output_root) | |
| for data_type in required_data_types: | |
| data_size = 0 | |
| print('Data type:', data_type) | |
| filepaths, keys = [], [] | |
| print('>> Building file list.') | |
| # Get appropriate list of files. | |
| if args.paired: | |
| filenames = all_filenames | |
| else: | |
| filenames = all_filenames[data_type] | |
| for sequence in tqdm(filenames): | |
| for filename in copy.deepcopy(filenames[sequence]): | |
| filepath = construct_file_path( | |
| args.data_root, data_type, sequence, filename, | |
| extensions[data_type]) | |
| key = '%s/%s' % (sequence, filename) | |
| filesize = check_and_add(filepath, key, filepaths, keys, | |
| remove_missing=args.remove_missing) | |
| # Remove file from list, if missing. | |
| if filesize == -1 and args.paired and args.remove_missing: | |
| print('Removing %s from list' % (filename)) | |
| filenames[sequence].remove(filename) | |
| data_size += filesize | |
| # Remove empty sequences. | |
| if args.paired and args.remove_missing: | |
| for sequence in copy.deepcopy(all_filenames): | |
| if not all_filenames[sequence]: | |
| all_filenames.pop(sequence) | |
| # Allocate size. | |
| data_size = max(int((1 + args.metadata_factor) * data_size), 1e9) | |
| print('Reserved size: %s, %dGB' % (data_type, data_size // 1e9)) | |
| # Write LMDB to file. | |
| output_filepath = os.path.join(args.output_root, data_type) | |
| build_lmdb(filepaths, keys, output_filepath, data_size, args.large) | |
| # Output list of all filenames. | |
| if args.output_root: | |
| with open(args.output_root + '/all_filenames.json', 'w') as fout: | |
| json.dump(all_filenames, fout, indent=4) | |
| # Output metadata. | |
| with open(args.output_root + '/metadata.json', 'w') as fout: | |
| json.dump(extensions, fout, indent=4) | |
| else: | |
| return all_filenames, extensions | |
| if __name__ == "__main__": | |
| main() | |