File size: 9,013 Bytes
505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 cc07ca5 505d963 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
import torch
import argparse
import json
import time
import os
from copy import deepcopy
from fvcore.nn import FlopCountAnalysis, parameter_count_table
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import AutoModel, AutoModelForImageClassification, AutoImageProcessor
from tqdm import tqdm
from PIL import Image
def get_image_size(processor):
dummy_image = Image.new("RGB", (256, 256), color="white")
dummy_image = processor(dummy_image, return_tensors="pt")
image_size = {
'height': dummy_image['pixel_values'].shape[-2],
'width': dummy_image['pixel_values'].shape[-1]
}
return image_size
def benchmark_inference(model, processor, device, runs=20, warmup=5):
image_size = get_image_size(processor)
dummy_image = torch.randn(1, 3, image_size['height'], image_size['width'], device=device)
model.eval()
# Warmup
for _ in range(warmup):
_ = model(dummy_image)
torch.cuda.synchronize()
start = time.time()
for _ in range(runs):
_ = model(dummy_image)
torch.cuda.synchronize()
elapsed = (time.time() - start) * 1000
avg_latency = elapsed / runs
throughput = 1000.0 / avg_latency
return avg_latency, throughput
def load_dataloader(args):
dataset = load_dataset(args.data_path, split="validation")
def collate_fn(batch):
images = [item['image'].convert('RGB') for item in batch]
labels = [item['label'] for item in batch]
labels = torch.tensor(labels)
return {
'image': images,
'label': labels
}
return DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, collate_fn=collate_fn)
def evaluate_model(args, dataloader, model_info):
"""Evaluate a model on ImageNet-1k validation set"""
device = torch.device(args.device if torch.cuda.is_available() else "cpu")
#model = AutoModel.from_pretrained(model_info["path"], trust_remote_code=True)
model = AutoModelForImageClassification.from_pretrained(
model_info["path"],
#trust_remote_code=True
)
processor = AutoImageProcessor.from_pretrained(model_info["path"])
image_size = get_image_size(processor)
model.to(device)
model.eval()
# Initialize metrics
correct_top1 = 0
correct_top5 = 0
total_samples = 0
with torch.no_grad():
for batch_idx, batch in enumerate(tqdm(dataloader, desc="Evaluating")):
images = batch['image']
labels = batch['label']
inputs = processor(images, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
labels = labels.to(device)
outputs = model(**inputs)
torch.cuda.synchronize()
logits = outputs.logits
predictions = torch.softmax(logits, dim=-1)
_, predicted_classes = torch.topk(predictions, 5, dim=1)
correct_top1 += (predicted_classes[:, 0] == labels).sum().item()
for i in range(5):
correct_top5 += (predicted_classes[:, i] == labels).sum().item()
total_samples += labels.size(0)
top1_accuracy = (correct_top1 / total_samples) * 100
top5_accuracy = (correct_top5 / total_samples) * 100
avg_inference_time, throughput = benchmark_inference(model, processor, device)
sample_inputs = processor(images[:1], return_tensors="pt")
sample_inputs = {k: v.to(device) for k, v in sample_inputs.items()}
# Calculate model parameters
total_params = sum(p.numel() for p in model.parameters())
parameters_millions = total_params
# Calculate model size
model_size = sum(p.numel() * p.element_size() for p in model.parameters())
sample_tensor = sample_inputs['pixel_values']
flops = FlopCountAnalysis(model, sample_tensor).total()
metrics = {
"model": model_info["path"],
"top1_accuracy": top1_accuracy,
"top5_accuracy": top5_accuracy,
"parameters": total_params,
"flops": flops,
"inference_time": avg_inference_time,
"model_size": model_size,
"license": model_info["license"]
}
return metrics
def load_models_list(json_path):
"""Load models list from JSON file"""
with open(json_path, 'r') as f:
models = json.load(f)
return models
def load_existing_results(output_path):
"""Load existing results from JSONL file and return set of evaluated model paths"""
evaluated_models = set()
results = []
if os.path.exists(output_path):
try:
with open(output_path, 'r') as f:
for line in f:
if line.strip(): # Skip empty lines
result = json.loads(line.strip())
evaluated_models.add(result['model'])
results.append(result)
print(f"Found {len(evaluated_models)} existing results in {output_path}")
except (json.JSONDecodeError, KeyError) as e:
print(f"Warning: Error reading existing results from {output_path}: {e}")
print("Starting fresh evaluation...")
return evaluated_models, results
def save_result_to_jsonl(result, output_path):
"""Append a single evaluation result to JSONL file"""
with open(output_path, 'a') as jsonlfile:
jsonlfile.write(json.dumps(result) + '\n')
print(f"Result saved to {output_path}")
def save_results_to_jsonl(results, output_path):
"""Save evaluation results to JSONL file (overwrites existing file)"""
if not results:
print("No results to save.")
return
with open(output_path, 'w') as jsonlfile:
for result in results:
jsonlfile.write(json.dumps(result) + '\n')
print(f"Results saved to {output_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='ImageNet-1k Evaluation',
description='Evaluate models on ImageNet-1k validation set',
epilog='Results will be saved to JSONL file')
parser.add_argument('--data-path', default="ILSVRC/imagenet-1k",
help='Path to ImageNet-1k dataset')
parser.add_argument('--device', default="cuda:6",
help='Device to use for evaluation (cuda/cpu)')
parser.add_argument('--batch-size', type=int, default=128,
help='Batch size for evaluation')
parser.add_argument('--models-list', default="models_list.json",
help='Path to JSON file containing models list')
parser.add_argument('--output-path', default="imagenet_results.jsonl",
help='Path to save evaluation results')
args = parser.parse_args()
# Override data path with absolute path
#args.device = "cuda:6"
args.data_path = "/data3/salah/datasets/imagenet-1k"
# Load models list
models_list = load_models_list(args.models_list)
# Load existing results to avoid re-evaluating models
evaluated_models, existing_results = load_existing_results(args.output_path)
# Filter out models that have already been evaluated
models_to_evaluate = [model for model in models_list if model['path'] not in evaluated_models]
if len(models_to_evaluate) < len(models_list):
skipped_count = len(models_list) - len(models_to_evaluate)
print(f"Skipping {skipped_count} models that have already been evaluated")
if not models_to_evaluate:
print("All models have already been evaluated!")
results = existing_results
else:
# Load dataset only if we have models to evaluate
print("Loading dataset...")
dataloader = load_dataloader(args)
print(f"Dataset loaded with {len(dataloader)} batches")
# Evaluate remaining models
results = existing_results.copy() # Start with existing results
for i, model_info in enumerate(models_to_evaluate):
print(f"\n{'='*50}")
print(f"Evaluating model {i+1}/{len(models_to_evaluate)}: {model_info['path']}")
print(f"{'='*50}")
metrics = evaluate_model(args, dataloader, model_info)
results.append(metrics)
# Save result immediately after each model evaluation
save_result_to_jsonl(metrics, args.output_path)
print(f"\nEvaluation complete! Results saved to {args.output_path}")
# Print summary
print("\nSummary:")
for result in results:
if result['top1_accuracy'] != -1:
print(f" {result['model']}: {result['top1_accuracy']:.2f}% Top-1, {result['top5_accuracy']:.2f}% Top-5")
else:
print(f" {result['model']}: Failed to evaluate") |