Spaces:
Paused
Paused
| """ | |
| Changes proportion of examples in metadata_sampled.json | |
| Usage: | |
| python3 -m fastchat.serve.vision.create_vqa_examples_json | |
| """ | |
| import json | |
| import argparse | |
| import numpy as np | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--data_dir", type=str, default="~/.cache") | |
| parser.add_argument("--output_dir", type=str, default="./vqa_examples") | |
| args = parser.parse_args() | |
| dataset_prop = { | |
| "DocVQA": 500, | |
| "ChartQA": 500, | |
| "NewYorker": 1000, | |
| "WikiArt": 500, | |
| "TextVQA": 500, | |
| } | |
| dataset_json = [] | |
| for dataset_name in dataset_prop.keys(): | |
| with open(f"{args.output_dir}/{dataset_name}/data.json") as f: | |
| data = json.load(f) | |
| dataset_json.extend(np.random.choice(data, dataset_prop[dataset_name])) | |
| with open(f"{args.output_dir}/metadata_sampled.json", "w") as f: | |
| json.dump(dataset_json, f, indent=4) | |