Spaces:
Runtime error
Runtime error
| import json | |
| import webdataset as wds | |
| from tqdm import tqdm | |
| from PIL import Image | |
| import torch | |
| import numpy as np | |
| import os | |
| import time | |
| import cv2 | |
| import random | |
| import pandas as pd | |
| from .vl_checklist import _eval_text_image | |
| DATASET_ROOT = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/cdl/instruct_data/crepe/prod_hard_negatives" | |
| def evaluate_crepe( | |
| model, | |
| tokenizer, | |
| image_processor, | |
| vis_embed_size=None, | |
| rank=0, | |
| world_size=1, | |
| id=0, | |
| subset=True, | |
| debug=False, | |
| level=4, | |
| type="swap", | |
| ): | |
| if rank == 0: | |
| tqdm.write(f"level: {level}") | |
| tqdm.write(f"type: {type}") | |
| dataset_name = "crepe" | |
| media_token_id = tokenizer("<|#image#|>", add_special_tokens=False)["input_ids"][-1] | |
| box_token_id = tokenizer("<|#box#|>", add_special_tokens=False)["input_ids"][-1] | |
| endofobject_token_id = tokenizer("<|#endofobject#|>", add_special_tokens=False)["input_ids"][-1] | |
| endofattr_token_id = tokenizer("<|#endofattr#|>", add_special_tokens=False)["input_ids"][-1] | |
| endofmedia_token_id = tokenizer("<|#endofimage#|>", add_special_tokens=False)["input_ids"][-1] | |
| visual_token_id = tokenizer("<|#visual#|>", add_special_tokens=False)["input_ids"][-1] | |
| previsual_token_id = tokenizer("<|#previsual#|>", add_special_tokens=False)["input_ids"][-1] | |
| prebox_token_id = tokenizer("<|#prebox#|>", add_special_tokens=False)["input_ids"][-1] | |
| model.eval().cuda() | |
| total = 0 | |
| correct = 0 | |
| assert type in ["swap"] | |
| assert 4 <= level <= 12 | |
| filename = os.path.join(DATASET_ROOT, type, f"prod_vg_hard_negs_{type}_complexity_{level}.csv") | |
| df = pd.read_csv(filename) | |
| pbar = tqdm(df.iterrows(), disable=(rank != 0)) | |
| for ii, sample in pbar: | |
| if ii % world_size != rank: | |
| continue | |
| text = sample.caption | |
| image_path = "/gpfs/u/home/LMCG/LMCGljnn/scratch/datasets/raw/vg/VG_100K/{}.jpg".format(sample.image_id) | |
| x = sample.x | |
| y = sample.y | |
| width = sample.width | |
| height = sample.height | |
| image = Image.open(image_path).convert("RGB") | |
| image = image.crop((x, y, x+width, y+height)) | |
| image = image.resize((224, 224)) | |
| final_rank, final_ranks = _eval_text_image(text, image, model, tokenizer, image_processor, vis_embed_size, media_token_id, prebox_token_id, debug=debug) | |
| if final_rank is None: | |
| continue | |
| correct += int((np.array(final_ranks) < 10).sum()) | |
| total += len(final_ranks) | |
| if debug: | |
| tqdm.write("="*80) | |
| pbar.set_description(f"{text} | score: {correct / total:.4f} | {final_rank} | {final_ranks}") | |
| with open(f"{dataset_name}_results_part{rank}_{id}.json", "w") as f: | |
| f.write(json.dumps([total, correct])) | |
| if world_size > 1: | |
| torch.distributed.barrier() | |
| if rank == 0: | |
| total = 0 | |
| correct = 0 | |
| print(f"evaluate on rank {rank}. world size is {world_size}") | |
| for rank_i in range(world_size): | |
| [total_part, correct_part] = json.load(open(f"{dataset_name}_results_part{rank_i}_{id}.json")) | |
| os.remove(f"{dataset_name}_results_part{rank_i}_{id}.json") | |
| total += total_part | |
| correct += correct_part | |
| score = correct / total | |
| print("score:", score, "total:", total) | |
| with open(os.path.join("eval_results", f"{dataset_name}_{model.expr_name}_{model.step_num}_{int(time.time())}_{score}"), "w") as f: | |
| pass | |
| else: | |
| score = 0.0 | |
| if world_size > 1: | |
| torch.distributed.barrier() | |
| return score | |