Add 13/09/2025 results
Browse files- .gitignore +2 -1
- evaluation.py +24 -9
- imagenet_results.jsonl +11 -0
- models_list.json +24 -40
.gitignore
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
__pycache__/
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
tests/
|
evaluation.py
CHANGED
|
@@ -3,15 +3,29 @@ import argparse
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import os
|
|
|
|
| 6 |
from fvcore.nn import FlopCountAnalysis, parameter_count_table
|
| 7 |
from torch.utils.data import DataLoader
|
| 8 |
from datasets import load_dataset
|
| 9 |
-
from transformers import AutoModelForImageClassification, AutoImageProcessor
|
| 10 |
from tqdm import tqdm
|
|
|
|
| 11 |
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def benchmark_inference(model, processor, device, runs=20, warmup=5):
|
| 14 |
-
|
|
|
|
| 15 |
model.eval()
|
| 16 |
|
| 17 |
# Warmup
|
|
@@ -49,15 +63,16 @@ def load_dataloader(args):
|
|
| 49 |
def evaluate_model(args, dataloader, model_info):
|
| 50 |
"""Evaluate a model on ImageNet-1k validation set"""
|
| 51 |
device = torch.device(args.device if torch.cuda.is_available() else "cpu")
|
| 52 |
-
|
|
|
|
| 53 |
model = AutoModelForImageClassification.from_pretrained(
|
| 54 |
-
model_info["path"],
|
| 55 |
-
|
| 56 |
)
|
| 57 |
|
| 58 |
processor = AutoImageProcessor.from_pretrained(model_info["path"])
|
| 59 |
-
|
| 60 |
-
|
| 61 |
model.to(device)
|
| 62 |
model.eval()
|
| 63 |
|
|
@@ -181,7 +196,7 @@ if __name__ == "__main__":
|
|
| 181 |
epilog='Results will be saved to JSONL file')
|
| 182 |
parser.add_argument('--data-path', default="ILSVRC/imagenet-1k",
|
| 183 |
help='Path to ImageNet-1k dataset')
|
| 184 |
-
parser.add_argument('--device', default="cuda",
|
| 185 |
help='Device to use for evaluation (cuda/cpu)')
|
| 186 |
parser.add_argument('--batch-size', type=int, default=128,
|
| 187 |
help='Batch size for evaluation')
|
|
@@ -192,7 +207,7 @@ if __name__ == "__main__":
|
|
| 192 |
args = parser.parse_args()
|
| 193 |
|
| 194 |
# Override data path with absolute path
|
| 195 |
-
args.device = "cuda:6"
|
| 196 |
args.data_path = "/data3/salah/datasets/imagenet-1k"
|
| 197 |
|
| 198 |
# Load models list
|
|
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
import os
|
| 6 |
+
from copy import deepcopy
|
| 7 |
from fvcore.nn import FlopCountAnalysis, parameter_count_table
|
| 8 |
from torch.utils.data import DataLoader
|
| 9 |
from datasets import load_dataset
|
| 10 |
+
from transformers import AutoModel, AutoModelForImageClassification, AutoImageProcessor
|
| 11 |
from tqdm import tqdm
|
| 12 |
+
from PIL import Image
|
| 13 |
|
| 14 |
|
| 15 |
+
def get_image_size(processor):
|
| 16 |
+
dummy_image = Image.new("RGB", (256, 256), color="white")
|
| 17 |
+
dummy_image = processor(dummy_image, return_tensors="pt")
|
| 18 |
+
|
| 19 |
+
image_size = {
|
| 20 |
+
'height': dummy_image['pixel_values'].shape[-2],
|
| 21 |
+
'width': dummy_image['pixel_values'].shape[-1]
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
return image_size
|
| 25 |
+
|
| 26 |
def benchmark_inference(model, processor, device, runs=20, warmup=5):
|
| 27 |
+
image_size = get_image_size(processor)
|
| 28 |
+
dummy_image = torch.randn(1, 3, image_size['height'], image_size['width'], device=device)
|
| 29 |
model.eval()
|
| 30 |
|
| 31 |
# Warmup
|
|
|
|
| 63 |
def evaluate_model(args, dataloader, model_info):
|
| 64 |
"""Evaluate a model on ImageNet-1k validation set"""
|
| 65 |
device = torch.device(args.device if torch.cuda.is_available() else "cpu")
|
| 66 |
+
|
| 67 |
+
#model = AutoModel.from_pretrained(model_info["path"], trust_remote_code=True)
|
| 68 |
model = AutoModelForImageClassification.from_pretrained(
|
| 69 |
+
model_info["path"],
|
| 70 |
+
#trust_remote_code=True
|
| 71 |
)
|
| 72 |
|
| 73 |
processor = AutoImageProcessor.from_pretrained(model_info["path"])
|
| 74 |
+
image_size = get_image_size(processor)
|
| 75 |
+
|
| 76 |
model.to(device)
|
| 77 |
model.eval()
|
| 78 |
|
|
|
|
| 196 |
epilog='Results will be saved to JSONL file')
|
| 197 |
parser.add_argument('--data-path', default="ILSVRC/imagenet-1k",
|
| 198 |
help='Path to ImageNet-1k dataset')
|
| 199 |
+
parser.add_argument('--device', default="cuda:6",
|
| 200 |
help='Device to use for evaluation (cuda/cpu)')
|
| 201 |
parser.add_argument('--batch-size', type=int, default=128,
|
| 202 |
help='Batch size for evaluation')
|
|
|
|
| 207 |
args = parser.parse_args()
|
| 208 |
|
| 209 |
# Override data path with absolute path
|
| 210 |
+
#args.device = "cuda:6"
|
| 211 |
args.data_path = "/data3/salah/datasets/imagenet-1k"
|
| 212 |
|
| 213 |
# Load models list
|
imagenet_results.jsonl
CHANGED
|
@@ -17,3 +17,14 @@
|
|
| 17 |
{"model": "facebook/convnext-xlarge-224-22k-1k", "top1_accuracy": 86.98599999999999, "top5_accuracy": 98.202, "parameters": 350196968, "flops": 60967704576, "inference_time": 22.357892990112305, "model_size": 1400787872, "license": "Open"}
|
| 18 |
{"model": "facebook/convnext-xlarge-384-22k-1k", "top1_accuracy": 87.764, "top5_accuracy": 98.55199999999999, "parameters": 350196968, "flops": 179166406656, "inference_time": 22.17559814453125, "model_size": 1400787872, "license": "Open"}
|
| 19 |
{"model": "facebook/deit-base-distilled-patch16-224", "top1_accuracy": 83.39800000000001, "top5_accuracy": 96.43599999999999, "parameters": 87338192, "flops": 16953211392, "inference_time": 5.678451061248779, "model_size": 349352768, "license": "Open"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
{"model": "facebook/convnext-xlarge-224-22k-1k", "top1_accuracy": 86.98599999999999, "top5_accuracy": 98.202, "parameters": 350196968, "flops": 60967704576, "inference_time": 22.357892990112305, "model_size": 1400787872, "license": "Open"}
|
| 18 |
{"model": "facebook/convnext-xlarge-384-22k-1k", "top1_accuracy": 87.764, "top5_accuracy": 98.55199999999999, "parameters": 350196968, "flops": 179166406656, "inference_time": 22.17559814453125, "model_size": 1400787872, "license": "Open"}
|
| 19 |
{"model": "facebook/deit-base-distilled-patch16-224", "top1_accuracy": 83.39800000000001, "top5_accuracy": 96.43599999999999, "parameters": 87338192, "flops": 16953211392, "inference_time": 5.678451061248779, "model_size": 349352768, "license": "Open"}
|
| 20 |
+
{"model": "google/efficientnet-b0", "top1_accuracy": 75.472, "top5_accuracy": 92.552, "parameters": 5288548, "flops": 17061920, "inference_time": 6.572115421295166, "model_size": 21154192, "license": "Apache-2.0"}
|
| 21 |
+
{"model": "google/efficientnet-b1", "top1_accuracy": 77.05, "top5_accuracy": 93.514, "parameters": 7794184, "flops": 27127664, "inference_time": 9.38577651977539, "model_size": 31176736, "license": "Apache-2.0"}
|
| 22 |
+
{"model": "google/efficientnet-b2", "top1_accuracy": 78.25999999999999, "top5_accuracy": 94.192, "parameters": 9109994, "flops": 34311200, "inference_time": 13.025212287902832, "model_size": 36439976, "license": "Apache-2.0"}
|
| 23 |
+
{"model": "google/efficientnet-b3", "top1_accuracy": 80.43, "top5_accuracy": 95.346, "parameters": 12233232, "flops": 58144576, "inference_time": 10.713708400726318, "model_size": 48932928, "license": "Apache-2.0"}
|
| 24 |
+
{"model": "google/efficientnet-b4", "top1_accuracy": 82.53, "top5_accuracy": 96.136, "parameters": 19341616, "flops": 120258128, "inference_time": 13.012278079986572, "model_size": 77366464, "license": "Apache-2.0"}
|
| 25 |
+
{"model": "google/efficientnet-b5", "top1_accuracy": 83.358, "top5_accuracy": 96.612, "parameters": 30389784, "flops": 240274768, "inference_time": 21.18891477584839, "model_size": 121559136, "license": "Apache-2.0"}
|
| 26 |
+
{"model": "google/efficientnet-b6", "top1_accuracy": 83.928, "top5_accuracy": 96.892, "parameters": 43040704, "flops": 406106768, "inference_time": 18.17166805267334, "model_size": 172162816, "license": "Apache-2.0"}
|
| 27 |
+
{"model": "microsoft/cvt-13", "top1_accuracy": 81.404, "top5_accuracy": 95.616, "parameters": 19997480, "flops": 4584573392.0, "inference_time": 11.934351921081543, "model_size": 79989920, "license": "Open"}
|
| 28 |
+
{"model": "microsoft/cvt-21-384", "top1_accuracy": 82.648, "top5_accuracy": 95.882, "parameters": 31622696, "flops": 24932082688.0, "inference_time": 18.59004497528076, "model_size": 126490784, "license": "Open"}
|
| 29 |
+
{"model": "microsoft/cvt-21", "top1_accuracy": 81.54, "top5_accuracy": 95.438, "parameters": 31622696, "flops": 7206283808.0, "inference_time": 19.456958770751953, "model_size": 126490784, "license": "Open"}
|
| 30 |
+
{"model": "microsoft/cvt-13-384", "top1_accuracy": 82.788, "top5_accuracy": 96.308, "parameters": 19997480, "flops": 16323617952.0, "inference_time": 16.565978527069092, "model_size": 79989920, "license": "Open"}
|
models_list.json
CHANGED
|
@@ -5,6 +5,11 @@
|
|
| 5 |
{"path": "microsoft/resnet-50", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
| 6 |
{"path": "microsoft/resnet-101", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
| 7 |
{"path": "microsoft/resnet-152", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
{"path": "facebook/convnext-base-224-22k-1k", "paper": "https://arxiv.org/abs/2201.09792", "license": "Open", "year": 2022},
|
| 10 |
{"path": "facebook/convnext-base-224", "paper": "https://arxiv.org/abs/2201.09792", "license": "Open", "year": 2022},
|
|
@@ -115,9 +120,6 @@
|
|
| 115 |
{"path": "google/vit-large-patch32-384", "paper": "https://arxiv.org/abs/2010.11929", "license": "Apache-2.0", "year": 2020},
|
| 116 |
{"path": "google/vit-hybrid-base-bit-384", "paper": "https://arxiv.org/abs/2010.11929", "license": "Apache-2.0", "year": 2020},
|
| 117 |
|
| 118 |
-
{"path": "google/cxr-foundation", "paper": "https://arxiv.org/abs/2311.18775", "license": "Apache-2.0", "year": 2023},
|
| 119 |
-
{"path": "google/derm-foundation", "paper": "https://arxiv.org/abs/2311.18775", "license": "Apache-2.0", "year": 2023},
|
| 120 |
-
|
| 121 |
{"path": "google/mobilenet_v2_1.4_224", "paper": "https://arxiv.org/abs/1801.04381", "license": "Apache-2.0", "year": 2018},
|
| 122 |
{"path": "google/mobilenet_v2_1.0_224", "paper": "https://arxiv.org/abs/1801.04381", "license": "Apache-2.0", "year": 2018},
|
| 123 |
{"path": "google/mobilenet_v2_0.75_160", "paper": "https://arxiv.org/abs/1801.04381", "license": "Apache-2.0", "year": 2018},
|
|
@@ -137,41 +139,23 @@
|
|
| 137 |
{"path": "google/efficientnet-b6", "paper": "https://arxiv.org/abs/1905.11946", "license": "Apache-2.0", "year": 2019},
|
| 138 |
{"path": "google/efficientnet-b7", "paper": "https://arxiv.org/abs/1905.11946", "license": "Apache-2.0", "year": 2019},
|
| 139 |
|
| 140 |
-
|
| 141 |
-
{"path": "
|
| 142 |
-
{"path": "
|
| 143 |
-
{"path": "
|
| 144 |
-
{"path": "
|
| 145 |
-
{"path": "
|
| 146 |
-
{"path": "
|
| 147 |
-
{"path": "
|
| 148 |
-
{"path": "
|
| 149 |
-
|
| 150 |
-
{"path": "
|
| 151 |
-
{"path": "
|
| 152 |
-
{"path": "
|
| 153 |
-
{"path": "
|
| 154 |
-
{"path": "
|
| 155 |
-
{"path": "
|
| 156 |
-
{"path": "
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
{"path": "qualcomm/Swin-Small", "paper": "https://arxiv.org/abs/2103.14030", "license": "Open", "year": 2021},
|
| 160 |
-
{"path": "qualcomm/ResNet50", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
| 161 |
-
{"path": "qualcomm/ResNeXt50", "paper": "https://arxiv.org/abs/1611.05431", "license": "Open", "year": 2016},
|
| 162 |
-
{"path": "qualcomm/VIT", "paper": "https://arxiv.org/abs/2010.11929", "license": "Open", "year": 2020},
|
| 163 |
-
{"path": "qualcomm/EfficientViT-l2-cls", "paper": "https://arxiv.org/abs/2205.14756", "license": "Open", "year": 2022},
|
| 164 |
-
{"path": "qualcomm/EfficientNet-B4", "paper": "https://arxiv.org/abs/1905.11946", "license": "Open", "year": 2019},
|
| 165 |
-
{"path": "qualcomm/EfficientViT-b2-cls", "paper": "https://arxiv.org/abs/2205.14756", "license": "Open", "year": 2022},
|
| 166 |
-
{"path": "qualcomm/EfficientNet-V2-s", "paper": "https://arxiv.org/abs/2104.00298", "license": "Open", "year": 2021},
|
| 167 |
-
{"path": "qualcomm/ConvNext-Base", "paper": "https://arxiv.org/abs/2201.03545", "license": "Open", "year": 2022},
|
| 168 |
-
{"path": "qualcomm/Beit", "paper": "https://arxiv.org/abs/2106.08254", "license": "Open", "year": 2021},
|
| 169 |
-
{"path": "qualcomm/LeViT", "paper": "https://arxiv.org/abs/2104.01136", "license": "Open", "year": 2021},
|
| 170 |
-
{"path": "qualcomm/NASNet", "paper": "https://arxiv.org/abs/1707.07012", "license": "Open", "year": 2017},
|
| 171 |
-
{"path": "qualcomm/DLA-102-X", "paper": "https://arxiv.org/abs/1707.06484", "license": "Open", "year": 2017},
|
| 172 |
-
{"path": "qualcomm/Mobile-VIT", "paper": "https://arxiv.org/abs/2110.02178", "license": "Open", "year": 2021},
|
| 173 |
-
{"path": "qualcomm/SqueezeNet-1.1", "paper": "https://arxiv.org/abs/1602.07360", "license": "Open", "year": 2016},
|
| 174 |
-
{"path": "qualcomm/EfficientFormer", "paper": "https://arxiv.org/abs/2206.01191", "license": "Open", "year": 2022},
|
| 175 |
-
|
| 176 |
-
{"path": "Intel/vit-base-patch16-224-int8-static-inc", "paper": "https://arxiv.org/abs/2010.11929", "license": "Apache-2.0", "year": 2020}
|
| 177 |
]
|
|
|
|
| 5 |
{"path": "microsoft/resnet-50", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
| 6 |
{"path": "microsoft/resnet-101", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
| 7 |
{"path": "microsoft/resnet-152", "paper": "https://arxiv.org/abs/1512.03385", "license": "Open", "year": 2015},
|
| 8 |
+
|
| 9 |
+
{"path": "microsoft/cvt-13", "paper": "https://arxiv.org/abs/2103.15808", "license": "Open", "year": 2021},
|
| 10 |
+
{"path": "microsoft/cvt-21-384", "paper": "https://arxiv.org/abs/2103.15808", "license": "Open", "year": 2021},
|
| 11 |
+
{"path": "microsoft/cvt-21", "paper": "https://arxiv.org/abs/2103.15808", "license": "Open", "year": 2021},
|
| 12 |
+
{"path": "microsoft/cvt-13-384", "paper": "https://arxiv.org/abs/2103.15808", "license": "Open", "year": 2021},
|
| 13 |
|
| 14 |
{"path": "facebook/convnext-base-224-22k-1k", "paper": "https://arxiv.org/abs/2201.09792", "license": "Open", "year": 2022},
|
| 15 |
{"path": "facebook/convnext-base-224", "paper": "https://arxiv.org/abs/2201.09792", "license": "Open", "year": 2022},
|
|
|
|
| 120 |
{"path": "google/vit-large-patch32-384", "paper": "https://arxiv.org/abs/2010.11929", "license": "Apache-2.0", "year": 2020},
|
| 121 |
{"path": "google/vit-hybrid-base-bit-384", "paper": "https://arxiv.org/abs/2010.11929", "license": "Apache-2.0", "year": 2020},
|
| 122 |
|
|
|
|
|
|
|
|
|
|
| 123 |
{"path": "google/mobilenet_v2_1.4_224", "paper": "https://arxiv.org/abs/1801.04381", "license": "Apache-2.0", "year": 2018},
|
| 124 |
{"path": "google/mobilenet_v2_1.0_224", "paper": "https://arxiv.org/abs/1801.04381", "license": "Apache-2.0", "year": 2018},
|
| 125 |
{"path": "google/mobilenet_v2_0.75_160", "paper": "https://arxiv.org/abs/1801.04381", "license": "Apache-2.0", "year": 2018},
|
|
|
|
| 139 |
{"path": "google/efficientnet-b6", "paper": "https://arxiv.org/abs/1905.11946", "license": "Apache-2.0", "year": 2019},
|
| 140 |
{"path": "google/efficientnet-b7", "paper": "https://arxiv.org/abs/1905.11946", "license": "Apache-2.0", "year": 2019},
|
| 141 |
|
| 142 |
+
{"path": "nvidia/mamba_vision_160k_ade20k-512x512_base", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 143 |
+
{"path": "nvidia/mamba_vision_160k_ade20k-512x512_small", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 144 |
+
{"path": "nvidia/mamba_vision_160k_ade20k-512x512_tiny", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 145 |
+
{"path": "nvidia/MambaVision-L2-1K", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 146 |
+
{"path": "nvidia/MambaVision-L-1K", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 147 |
+
{"path": "nvidia/MambaVision-B-1K", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 148 |
+
{"path": "nvidia/MambaVision-T-1K", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 149 |
+
{"path": "nvidia/MambaVision-T2-1K", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 150 |
+
{"path": "nvidia/MambaVision-S-1K", "paper": "https://arxiv.org/abs/2407.08083", "license": "NVIDIA Source Code License-NC", "year": 2024},
|
| 151 |
+
|
| 152 |
+
{"path": "OpenGVLab/internimage_t_1k_224", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022},
|
| 153 |
+
{"path": "OpenGVLab/internimage_s_1k_224", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022},
|
| 154 |
+
{"path": "OpenGVLab/internimage_b_1k_224", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022},
|
| 155 |
+
{"path": "OpenGVLab/internimage_l_22kto1k_384", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022},
|
| 156 |
+
{"path": "OpenGVLab/internimage_xl_22kto1k_384", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022},
|
| 157 |
+
{"path": "OpenGVLab/internimage_h_22kto1k_640", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022},
|
| 158 |
+
{"path": "OpenGVLab/internimage_g_22kto1k_512", "paper": "https://arxiv.org/abs/2211.05778", "license": "Open", "year": 2022}
|
| 159 |
+
|
| 160 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
]
|