File size: 9,013 Bytes
505d963
 
 
 
 
cc07ca5
505d963
 
 
cc07ca5
505d963
cc07ca5
505d963
 
cc07ca5
 
 
 
 
 
 
 
 
 
 
505d963
cc07ca5
 
505d963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc07ca5
 
505d963
cc07ca5
 
505d963
 
 
cc07ca5
 
505d963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc07ca5
505d963
 
 
 
 
 
 
 
 
 
cc07ca5
505d963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import torch
import argparse
import json
import time
import os
from copy import deepcopy
from fvcore.nn import FlopCountAnalysis, parameter_count_table
from torch.utils.data import DataLoader
from datasets import load_dataset
from transformers import AutoModel, AutoModelForImageClassification, AutoImageProcessor
from tqdm import tqdm
from PIL import Image


def get_image_size(processor):
    dummy_image = Image.new("RGB", (256, 256), color="white")
    dummy_image = processor(dummy_image, return_tensors="pt")

    image_size = {
        'height': dummy_image['pixel_values'].shape[-2],
        'width': dummy_image['pixel_values'].shape[-1]
    }

    return image_size

def benchmark_inference(model, processor, device, runs=20, warmup=5):
    image_size = get_image_size(processor)
    dummy_image = torch.randn(1, 3, image_size['height'], image_size['width'], device=device)
    model.eval()
    
    # Warmup
    for _ in range(warmup):
        _ = model(dummy_image)
    
    torch.cuda.synchronize()
    start = time.time()
    
    for _ in range(runs):
        _ = model(dummy_image)
    
    torch.cuda.synchronize()
    elapsed = (time.time() - start) * 1000
    
    avg_latency = elapsed / runs
    throughput = 1000.0 / avg_latency
    
    return avg_latency, throughput


def load_dataloader(args):
    dataset = load_dataset(args.data_path, split="validation")
    def collate_fn(batch):
        images = [item['image'].convert('RGB') for item in batch]
        labels = [item['label'] for item in batch]
        labels = torch.tensor(labels)
        return {
            'image': images,
            'label': labels
        }
    return DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, collate_fn=collate_fn)


def evaluate_model(args, dataloader, model_info):
    """Evaluate a model on ImageNet-1k validation set"""
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    #model = AutoModel.from_pretrained(model_info["path"], trust_remote_code=True)
    model = AutoModelForImageClassification.from_pretrained(
        model_info["path"],
        #trust_remote_code=True
    )
    
    processor = AutoImageProcessor.from_pretrained(model_info["path"])
    image_size = get_image_size(processor)

    model.to(device)
    model.eval()
    
    # Initialize metrics
    correct_top1 = 0
    correct_top5 = 0
    total_samples = 0
    
    with torch.no_grad():
        for batch_idx, batch in enumerate(tqdm(dataloader, desc="Evaluating")):
            images = batch['image']
            labels = batch['label']

            inputs = processor(images, return_tensors="pt")
            
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)
            

            outputs = model(**inputs)
            torch.cuda.synchronize()
            
            logits = outputs.logits
            predictions = torch.softmax(logits, dim=-1)
            
            _, predicted_classes = torch.topk(predictions, 5, dim=1)
            
            correct_top1 += (predicted_classes[:, 0] == labels).sum().item()
            
            for i in range(5):
                correct_top5 += (predicted_classes[:, i] == labels).sum().item()
            
            total_samples += labels.size(0)
            
    top1_accuracy = (correct_top1 / total_samples) * 100
    top5_accuracy = (correct_top5 / total_samples) * 100
    avg_inference_time, throughput = benchmark_inference(model, processor, device)


    sample_inputs = processor(images[:1], return_tensors="pt")
    sample_inputs = {k: v.to(device) for k, v in sample_inputs.items()}

    # Calculate model parameters
    total_params = sum(p.numel() for p in model.parameters())
    parameters_millions = total_params
    
    # Calculate model size
    model_size = sum(p.numel() * p.element_size() for p in model.parameters())

    sample_tensor = sample_inputs['pixel_values']
    flops = FlopCountAnalysis(model, sample_tensor).total()
    
    metrics = {
        "model": model_info["path"],
        "top1_accuracy": top1_accuracy,
        "top5_accuracy": top5_accuracy,
        "parameters": total_params,
        "flops": flops,
        "inference_time": avg_inference_time,
        "model_size": model_size,
        "license": model_info["license"]
    }
    
    return metrics
        



def load_models_list(json_path):
    """Load models list from JSON file"""
    with open(json_path, 'r') as f:
        models = json.load(f)
    return models


def load_existing_results(output_path):
    """Load existing results from JSONL file and return set of evaluated model paths"""
    evaluated_models = set()
    results = []
    
    if os.path.exists(output_path):
        try:
            with open(output_path, 'r') as f:
                for line in f:
                    if line.strip():  # Skip empty lines
                        result = json.loads(line.strip())
                        evaluated_models.add(result['model'])
                        results.append(result)
            print(f"Found {len(evaluated_models)} existing results in {output_path}")
        except (json.JSONDecodeError, KeyError) as e:
            print(f"Warning: Error reading existing results from {output_path}: {e}")
            print("Starting fresh evaluation...")
    
    return evaluated_models, results


def save_result_to_jsonl(result, output_path):
    """Append a single evaluation result to JSONL file"""
    with open(output_path, 'a') as jsonlfile:
        jsonlfile.write(json.dumps(result) + '\n')
    
    print(f"Result saved to {output_path}")


def save_results_to_jsonl(results, output_path):
    """Save evaluation results to JSONL file (overwrites existing file)"""
    if not results:
        print("No results to save.")
        return
    
    with open(output_path, 'w') as jsonlfile:
        for result in results:
            jsonlfile.write(json.dumps(result) + '\n')
    
    print(f"Results saved to {output_path}")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
                    prog='ImageNet-1k Evaluation',
                    description='Evaluate models on ImageNet-1k validation set',
                    epilog='Results will be saved to JSONL file')
    parser.add_argument('--data-path', default="ILSVRC/imagenet-1k", 
                       help='Path to ImageNet-1k dataset')
    parser.add_argument('--device', default="cuda:6", 
                       help='Device to use for evaluation (cuda/cpu)')
    parser.add_argument('--batch-size', type=int, default=128, 
                       help='Batch size for evaluation')
    parser.add_argument('--models-list', default="models_list.json", 
                       help='Path to JSON file containing models list')
    parser.add_argument('--output-path', default="imagenet_results.jsonl", 
                       help='Path to save evaluation results')
    args = parser.parse_args()

    # Override data path with absolute path
    #args.device = "cuda:6"
    args.data_path = "/data3/salah/datasets/imagenet-1k"

    # Load models list
    models_list = load_models_list(args.models_list)

    # Load existing results to avoid re-evaluating models
    evaluated_models, existing_results = load_existing_results(args.output_path)
    
    # Filter out models that have already been evaluated
    models_to_evaluate = [model for model in models_list if model['path'] not in evaluated_models]
    
    if len(models_to_evaluate) < len(models_list):
        skipped_count = len(models_list) - len(models_to_evaluate)
        print(f"Skipping {skipped_count} models that have already been evaluated")
    
    if not models_to_evaluate:
        print("All models have already been evaluated!")
        results = existing_results
    else:
        # Load dataset only if we have models to evaluate
        print("Loading dataset...")
        dataloader = load_dataloader(args)
        print(f"Dataset loaded with {len(dataloader)} batches")
        
        # Evaluate remaining models
        results = existing_results.copy()  # Start with existing results
        for i, model_info in enumerate(models_to_evaluate):
            print(f"\n{'='*50}")
            print(f"Evaluating model {i+1}/{len(models_to_evaluate)}: {model_info['path']}")
            print(f"{'='*50}")
            
            metrics = evaluate_model(args, dataloader, model_info)
            results.append(metrics)
            
            # Save result immediately after each model evaluation
            save_result_to_jsonl(metrics, args.output_path)

    print(f"\nEvaluation complete! Results saved to {args.output_path}")
    
    # Print summary
    print("\nSummary:")
    for result in results:
        if result['top1_accuracy'] != -1:
            print(f"  {result['model']}: {result['top1_accuracy']:.2f}% Top-1, {result['top5_accuracy']:.2f}% Top-5")
        else:
            print(f"  {result['model']}: Failed to evaluate")