|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
This tool provides performance benchmarks by using ONNX Runtime and TensorRT |
|
|
to run inference on a given model with the COCO validation set. It offers |
|
|
reliable measurements of inference latency using ONNX Runtime or TensorRT |
|
|
on the device. |
|
|
""" |
|
|
import argparse |
|
|
import copy |
|
|
import contextlib |
|
|
import datetime |
|
|
import json |
|
|
import os |
|
|
import os.path as osp |
|
|
import random |
|
|
import time |
|
|
import ast |
|
|
from pathlib import Path |
|
|
from collections import namedtuple, OrderedDict |
|
|
|
|
|
from pycocotools.cocoeval import COCOeval |
|
|
from pycocotools.coco import COCO |
|
|
import pycocotools.mask as mask_util |
|
|
|
|
|
import numpy as np |
|
|
from PIL import Image |
|
|
import torch |
|
|
from torch.utils.data import DataLoader, DistributedSampler |
|
|
import torchvision.transforms as T |
|
|
import torchvision.transforms.functional as F |
|
|
import tqdm |
|
|
|
|
|
import pycuda.driver as cuda |
|
|
import pycuda.autoinit |
|
|
import onnxruntime as nxrun |
|
|
import tensorrt as trt |
|
|
|
|
|
|
|
|
def parser_args(): |
|
|
parser = argparse.ArgumentParser('performance benchmark tool for onnx/trt model') |
|
|
parser.add_argument('--path', type=str, help='engine file path') |
|
|
parser.add_argument('--coco_path', type=str, default="data/coco", help='coco dataset path') |
|
|
parser.add_argument('--device', default=0, type=int) |
|
|
parser.add_argument('--run_benchmark', action='store_true', help='repeat the inference to benchmark the latency') |
|
|
parser.add_argument('--disable_eval', action='store_true', help='disable evaluation') |
|
|
return parser.parse_args() |
|
|
|
|
|
|
|
|
class CocoEvaluator(object): |
|
|
def __init__(self, coco_gt, iou_types): |
|
|
assert isinstance(iou_types, (list, tuple)) |
|
|
coco_gt = COCO(coco_gt) |
|
|
coco_gt = copy.deepcopy(coco_gt) |
|
|
self.coco_gt = coco_gt |
|
|
|
|
|
self.iou_types = iou_types |
|
|
self.coco_eval = {} |
|
|
for iou_type in iou_types: |
|
|
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) |
|
|
|
|
|
self.img_ids = [] |
|
|
self.eval_imgs = {k: [] for k in iou_types} |
|
|
|
|
|
def update(self, predictions): |
|
|
img_ids = list(np.unique(list(predictions.keys()))) |
|
|
self.img_ids.extend(img_ids) |
|
|
|
|
|
for iou_type in self.iou_types: |
|
|
results = self.prepare(predictions, iou_type) |
|
|
|
|
|
|
|
|
with open(os.devnull, 'w') as devnull: |
|
|
with contextlib.redirect_stdout(devnull): |
|
|
coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() |
|
|
coco_eval = self.coco_eval[iou_type] |
|
|
|
|
|
coco_eval.cocoDt = coco_dt |
|
|
coco_eval.params.imgIds = list(img_ids) |
|
|
img_ids, eval_imgs = evaluate(coco_eval) |
|
|
|
|
|
self.eval_imgs[iou_type].append(eval_imgs) |
|
|
|
|
|
def synchronize_between_processes(self): |
|
|
for iou_type in self.iou_types: |
|
|
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) |
|
|
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) |
|
|
|
|
|
def accumulate(self): |
|
|
for coco_eval in self.coco_eval.values(): |
|
|
coco_eval.accumulate() |
|
|
|
|
|
def summarize(self): |
|
|
for iou_type, coco_eval in self.coco_eval.items(): |
|
|
print("IoU metric: {}".format(iou_type)) |
|
|
coco_eval.summarize() |
|
|
|
|
|
def prepare(self, predictions, iou_type): |
|
|
if iou_type == "bbox": |
|
|
return self.prepare_for_coco_detection(predictions) |
|
|
else: |
|
|
raise ValueError("Unknown iou type {}".format(iou_type)) |
|
|
|
|
|
def prepare_for_coco_detection(self, predictions): |
|
|
coco_results = [] |
|
|
for original_id, prediction in predictions.items(): |
|
|
if len(prediction) == 0: |
|
|
continue |
|
|
|
|
|
boxes = prediction["boxes"] |
|
|
boxes = convert_to_xywh(boxes).tolist() |
|
|
scores = prediction["scores"].tolist() |
|
|
labels = prediction["labels"].tolist() |
|
|
|
|
|
coco_results.extend( |
|
|
[ |
|
|
{ |
|
|
"image_id": original_id, |
|
|
"category_id": labels[k], |
|
|
"bbox": box, |
|
|
"score": scores[k], |
|
|
} |
|
|
for k, box in enumerate(boxes) |
|
|
] |
|
|
) |
|
|
return coco_results |
|
|
|
|
|
def create_common_coco_eval(coco_eval, img_ids, eval_imgs): |
|
|
img_ids = list(img_ids) |
|
|
eval_imgs = list(eval_imgs.flatten()) |
|
|
|
|
|
coco_eval.evalImgs = eval_imgs |
|
|
coco_eval.params.imgIds = img_ids |
|
|
coco_eval._paramsEval = copy.deepcopy(coco_eval.params) |
|
|
|
|
|
def evaluate(self): |
|
|
''' |
|
|
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs |
|
|
:return: None |
|
|
''' |
|
|
|
|
|
p = self.params |
|
|
|
|
|
if p.useSegm is not None: |
|
|
p.iouType = 'segm' if p.useSegm == 1 else 'bbox' |
|
|
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) |
|
|
|
|
|
p.imgIds = list(np.unique(p.imgIds)) |
|
|
if p.useCats: |
|
|
p.catIds = list(np.unique(p.catIds)) |
|
|
p.maxDets = sorted(p.maxDets) |
|
|
self.params = p |
|
|
|
|
|
self._prepare() |
|
|
|
|
|
catIds = p.catIds if p.useCats else [-1] |
|
|
|
|
|
if p.iouType == 'segm' or p.iouType == 'bbox': |
|
|
computeIoU = self.computeIoU |
|
|
elif p.iouType == 'keypoints': |
|
|
computeIoU = self.computeOks |
|
|
self.ious = { |
|
|
(imgId, catId): computeIoU(imgId, catId) |
|
|
for imgId in p.imgIds |
|
|
for catId in catIds} |
|
|
|
|
|
evaluateImg = self.evaluateImg |
|
|
maxDet = p.maxDets[-1] |
|
|
evalImgs = [ |
|
|
evaluateImg(imgId, catId, areaRng, maxDet) |
|
|
for catId in catIds |
|
|
for areaRng in p.areaRng |
|
|
for imgId in p.imgIds |
|
|
] |
|
|
|
|
|
evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) |
|
|
self._paramsEval = copy.deepcopy(self.params) |
|
|
return p.imgIds, evalImgs |
|
|
|
|
|
def convert_to_xywh(boxes): |
|
|
boxes[:, 2:] -= boxes[:, :2] |
|
|
return boxes |
|
|
|
|
|
|
|
|
def get_image_list(ann_file): |
|
|
with open(ann_file, 'r') as fin: |
|
|
data = json.load(fin) |
|
|
return data['images'] |
|
|
|
|
|
|
|
|
def load_image(file_path): |
|
|
return Image.open(file_path).convert("RGB") |
|
|
|
|
|
|
|
|
class Compose(object): |
|
|
def __init__(self, transforms): |
|
|
self.transforms = transforms |
|
|
|
|
|
def __call__(self, image, target): |
|
|
for t in self.transforms: |
|
|
image, target = t(image, target) |
|
|
return image, target |
|
|
|
|
|
def __repr__(self): |
|
|
format_string = self.__class__.__name__ + "(" |
|
|
for t in self.transforms: |
|
|
format_string += "\n" |
|
|
format_string += " {0}".format(t) |
|
|
format_string += "\n)" |
|
|
return format_string |
|
|
|
|
|
|
|
|
class ToTensor(object): |
|
|
def __call__(self, img, target): |
|
|
return F.to_tensor(img), target |
|
|
|
|
|
|
|
|
class Normalize(object): |
|
|
def __init__(self, mean, std): |
|
|
self.mean = mean |
|
|
self.std = std |
|
|
|
|
|
def __call__(self, image, target=None): |
|
|
image = F.normalize(image, mean=self.mean, std=self.std) |
|
|
if target is None: |
|
|
return image, None |
|
|
target = target.copy() |
|
|
h, w = image.shape[-2:] |
|
|
if "boxes" in target: |
|
|
boxes = target["boxes"] |
|
|
boxes = box_xyxy_to_cxcywh(boxes) |
|
|
boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) |
|
|
target["boxes"] = boxes |
|
|
return image, target |
|
|
|
|
|
|
|
|
class SquareResize(object): |
|
|
def __init__(self, sizes): |
|
|
assert isinstance(sizes, (list, tuple)) |
|
|
self.sizes = sizes |
|
|
|
|
|
def __call__(self, img, target=None): |
|
|
size = random.choice(self.sizes) |
|
|
rescaled_img=F.resize(img, (size, size)) |
|
|
w, h = rescaled_img.size |
|
|
if target is None: |
|
|
return rescaled_img, None |
|
|
ratios = tuple( |
|
|
float(s) / float(s_orig) for s, s_orig in zip(rescaled_img.size, img.size)) |
|
|
ratio_width, ratio_height = ratios |
|
|
|
|
|
target = target.copy() |
|
|
if "boxes" in target: |
|
|
boxes = target["boxes"] |
|
|
scaled_boxes = boxes * torch.as_tensor( |
|
|
[ratio_width, ratio_height, ratio_width, ratio_height]) |
|
|
target["boxes"] = scaled_boxes |
|
|
|
|
|
if "area" in target: |
|
|
area = target["area"] |
|
|
scaled_area = area * (ratio_width * ratio_height) |
|
|
target["area"] = scaled_area |
|
|
|
|
|
target["size"] = torch.tensor([h, w]) |
|
|
|
|
|
return rescaled_img, target |
|
|
|
|
|
|
|
|
def infer_transforms(): |
|
|
normalize = Compose([ |
|
|
ToTensor(), |
|
|
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) |
|
|
]) |
|
|
return Compose([ |
|
|
SquareResize([640]), |
|
|
normalize, |
|
|
]) |
|
|
|
|
|
|
|
|
def box_cxcywh_to_xyxy(x): |
|
|
x_c, y_c, w, h = x.unbind(-1) |
|
|
b = [(x_c - 0.5 * w.clamp(min=0.0)), (y_c - 0.5 * h.clamp(min=0.0)), |
|
|
(x_c + 0.5 * w.clamp(min=0.0)), (y_c + 0.5 * h.clamp(min=0.0))] |
|
|
return torch.stack(b, dim=-1) |
|
|
|
|
|
|
|
|
def post_process(outputs, target_sizes): |
|
|
out_logits, out_bbox = outputs['labels'], outputs['dets'] |
|
|
|
|
|
assert len(out_logits) == len(target_sizes) |
|
|
assert target_sizes.shape[1] == 2 |
|
|
|
|
|
prob = out_logits.sigmoid() |
|
|
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 300, dim=1) |
|
|
scores = topk_values |
|
|
topk_boxes = topk_indexes // out_logits.shape[2] |
|
|
labels = topk_indexes % out_logits.shape[2] |
|
|
boxes = box_cxcywh_to_xyxy(out_bbox) |
|
|
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1,1,4)) |
|
|
|
|
|
|
|
|
img_h, img_w = target_sizes.unbind(1) |
|
|
scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1) |
|
|
boxes = boxes * scale_fct[:, None, :] |
|
|
|
|
|
results = [{'scores': s, 'labels': l, 'boxes': b} for s, l, b in zip(scores, labels, boxes)] |
|
|
|
|
|
return results |
|
|
|
|
|
|
|
|
def infer_onnx(sess, coco_evaluator, time_profile, prefix, img_list, device, repeats=1): |
|
|
time_list = [] |
|
|
for img_dict in tqdm.tqdm(img_list): |
|
|
image = load_image(os.path.join(prefix, img_dict['file_name'])) |
|
|
width, height = image.size |
|
|
orig_target_sizes = torch.Tensor([height, width]) |
|
|
image_tensor, _ = infer_transforms()(image, None) |
|
|
|
|
|
samples = image_tensor[None].numpy() |
|
|
|
|
|
time_profile.reset() |
|
|
with time_profile: |
|
|
for _ in range(repeats): |
|
|
res = sess.run(None, {"input": samples}) |
|
|
time_list.append(time_profile.total / repeats) |
|
|
outputs = {} |
|
|
outputs['labels'] = torch.Tensor(res[1]).to(device) |
|
|
outputs['dets'] = torch.Tensor(res[0]).to(device) |
|
|
|
|
|
orig_target_sizes = torch.stack([orig_target_sizes], dim=0).to(device) |
|
|
results = post_process(outputs, orig_target_sizes) |
|
|
res = {img_dict['id']: results[0]} |
|
|
if coco_evaluator is not None: |
|
|
coco_evaluator.update(res) |
|
|
|
|
|
print("Model latency with ONNX Runtime: {}ms".format(1000 * sum(time_list) / len(img_list))) |
|
|
|
|
|
|
|
|
stats = {} |
|
|
if coco_evaluator is not None: |
|
|
coco_evaluator.synchronize_between_processes() |
|
|
coco_evaluator.accumulate() |
|
|
coco_evaluator.summarize() |
|
|
stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() |
|
|
print(stats) |
|
|
|
|
|
|
|
|
def infer_engine(model, coco_evaluator, time_profile, prefix, img_list, device, repeats=1): |
|
|
time_list = [] |
|
|
for img_dict in tqdm.tqdm(img_list): |
|
|
image = load_image(os.path.join(prefix, img_dict['file_name'])) |
|
|
width, height = image.size |
|
|
orig_target_sizes = torch.Tensor([height, width]) |
|
|
image_tensor, _ = infer_transforms()(image, None) |
|
|
|
|
|
samples = image_tensor[None].to(device) |
|
|
_, _, h, w = samples.shape |
|
|
im_shape = torch.Tensor(np.array([h, w]).reshape((1, 2)).astype(np.float32)).to(device) |
|
|
scale_factor = torch.Tensor(np.array([h / height, w / width]).reshape((1, 2)).astype(np.float32)).to(device) |
|
|
|
|
|
time_profile.reset() |
|
|
with time_profile: |
|
|
for _ in range(repeats): |
|
|
outputs = model({"input": samples}) |
|
|
|
|
|
time_list.append(time_profile.total / repeats) |
|
|
orig_target_sizes = torch.stack([orig_target_sizes], dim=0).to(device) |
|
|
if coco_evaluator is not None: |
|
|
results = post_process(outputs, orig_target_sizes) |
|
|
res = {img_dict['id']: results[0]} |
|
|
coco_evaluator.update(res) |
|
|
|
|
|
print("Model latency with TensorRT: {}ms".format(1000 * sum(time_list) / len(img_list))) |
|
|
|
|
|
|
|
|
stats = {} |
|
|
if coco_evaluator is not None: |
|
|
coco_evaluator.synchronize_between_processes() |
|
|
coco_evaluator.accumulate() |
|
|
coco_evaluator.summarize() |
|
|
stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() |
|
|
print(stats) |
|
|
|
|
|
|
|
|
class TRTInference(object): |
|
|
"""TensorRT inference engine |
|
|
""" |
|
|
def __init__(self, engine_path='dino.engine', device='cuda:0', sync_mode:bool=False, max_batch_size=32, verbose=False): |
|
|
self.engine_path = engine_path |
|
|
self.device = device |
|
|
self.sync_mode = sync_mode |
|
|
self.max_batch_size = max_batch_size |
|
|
|
|
|
self.logger = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.INFO) |
|
|
|
|
|
self.engine = self.load_engine(engine_path) |
|
|
|
|
|
self.context = self.engine.create_execution_context() |
|
|
|
|
|
self.bindings = self.get_bindings(self.engine, self.context, self.max_batch_size, self.device) |
|
|
self.bindings_addr = OrderedDict((n, v.ptr) for n, v in self.bindings.items()) |
|
|
|
|
|
self.input_names = self.get_input_names() |
|
|
self.output_names = self.get_output_names() |
|
|
|
|
|
if not self.sync_mode: |
|
|
self.stream = cuda.Stream() |
|
|
|
|
|
|
|
|
self.time_profile = None |
|
|
|
|
|
def get_dummy_input(self, batch_size:int): |
|
|
blob = {} |
|
|
for name, binding in self.bindings.items(): |
|
|
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: |
|
|
print(f"make dummy input {name} with shape {binding.shape}") |
|
|
blob[name] = torch.rand(batch_size, *binding.shape[1:]).float().to('cuda:0') |
|
|
return blob |
|
|
|
|
|
def load_engine(self, path): |
|
|
'''load engine |
|
|
''' |
|
|
trt.init_libnvinfer_plugins(self.logger, '') |
|
|
with open(path, 'rb') as f, trt.Runtime(self.logger) as runtime: |
|
|
return runtime.deserialize_cuda_engine(f.read()) |
|
|
|
|
|
def get_input_names(self, ): |
|
|
names = [] |
|
|
for _, name in enumerate(self.engine): |
|
|
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: |
|
|
names.append(name) |
|
|
return names |
|
|
|
|
|
def get_output_names(self, ): |
|
|
names = [] |
|
|
for _, name in enumerate(self.engine): |
|
|
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT: |
|
|
names.append(name) |
|
|
return names |
|
|
|
|
|
def get_bindings(self, engine, context, max_batch_size=32, device=None): |
|
|
'''build binddings |
|
|
''' |
|
|
Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr')) |
|
|
bindings = OrderedDict() |
|
|
|
|
|
for i, name in enumerate(engine): |
|
|
shape = engine.get_tensor_shape(name) |
|
|
dtype = trt.nptype(engine.get_tensor_dtype(name)) |
|
|
|
|
|
if shape[0] == -1: |
|
|
raise NotImplementedError |
|
|
|
|
|
if False: |
|
|
if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: |
|
|
data = np.random.randn(*shape).astype(dtype) |
|
|
ptr = cuda.mem_alloc(data.nbytes) |
|
|
bindings[name] = Binding(name, dtype, shape, data, ptr) |
|
|
else: |
|
|
data = cuda.pagelocked_empty(trt.volume(shape), dtype) |
|
|
ptr = cuda.mem_alloc(data.nbytes) |
|
|
bindings[name] = Binding(name, dtype, shape, data, ptr) |
|
|
|
|
|
else: |
|
|
data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) |
|
|
bindings[name] = Binding(name, dtype, shape, data, data.data_ptr()) |
|
|
|
|
|
return bindings |
|
|
|
|
|
def run_sync(self, blob): |
|
|
self.bindings_addr.update({n: blob[n].data_ptr() for n in self.input_names}) |
|
|
self.context.execute_v2(list(self.bindings_addr.values())) |
|
|
outputs = {n: self.bindings[n].data for n in self.output_names} |
|
|
return outputs |
|
|
|
|
|
def run_async(self, blob): |
|
|
self.bindings_addr.update({n: blob[n].data_ptr() for n in self.input_names}) |
|
|
bindings_addr = [int(v) for _, v in self.bindings_addr.items()] |
|
|
self.context.execute_async_v2(bindings=bindings_addr, stream_handle=self.stream.handle) |
|
|
outputs = {n: self.bindings[n].data for n in self.output_names} |
|
|
self.stream.synchronize() |
|
|
return outputs |
|
|
|
|
|
def __call__(self, blob): |
|
|
if self.sync_mode: |
|
|
return self.run_sync(blob) |
|
|
else: |
|
|
return self.run_async(blob) |
|
|
|
|
|
def synchronize(self, ): |
|
|
if not self.sync_mode and torch.cuda.is_available(): |
|
|
torch.cuda.synchronize() |
|
|
elif self.sync_mode: |
|
|
self.stream.synchronize() |
|
|
|
|
|
def speed(self, blob, n): |
|
|
self.time_profile.reset() |
|
|
with self.time_profile: |
|
|
for _ in range(n): |
|
|
_ = self(blob) |
|
|
return self.time_profile.total / n |
|
|
|
|
|
|
|
|
def build_engine(self, onnx_file_path, engine_file_path, max_batch_size=32): |
|
|
'''Takes an ONNX file and creates a TensorRT engine to run inference with |
|
|
http://gitlab.baidu.com/paddle-inference/benchmark/blob/main/backend_trt.py#L57 |
|
|
''' |
|
|
EXPLICIT_BATCH = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) |
|
|
with trt.Builder(self.logger) as builder, \ |
|
|
builder.create_network(EXPLICIT_BATCH) as network, \ |
|
|
trt.OnnxParser(network, self.logger) as parser, \ |
|
|
builder.create_builder_config() as config: |
|
|
|
|
|
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30) |
|
|
config.set_flag(trt.BuilderFlag.FP16) |
|
|
|
|
|
with open(onnx_file_path, 'rb') as model: |
|
|
if not parser.parse(model.read()): |
|
|
print('ERROR: Failed to parse the ONNX file.') |
|
|
for error in range(parser.num_errors): |
|
|
print(parser.get_error(error)) |
|
|
return None |
|
|
|
|
|
serialized_engine = builder.build_serialized_network(network, config) |
|
|
with open(engine_file_path, 'wb') as f: |
|
|
f.write(serialized_engine) |
|
|
|
|
|
return serialized_engine |
|
|
|
|
|
|
|
|
class TimeProfiler(contextlib.ContextDecorator): |
|
|
def __init__(self, ): |
|
|
self.total = 0 |
|
|
|
|
|
def __enter__(self, ): |
|
|
self.start = self.time() |
|
|
return self |
|
|
|
|
|
def __exit__(self, type, value, traceback): |
|
|
self.total += self.time() - self.start |
|
|
|
|
|
def reset(self, ): |
|
|
self.total = 0 |
|
|
|
|
|
def time(self, ): |
|
|
if torch.cuda.is_available(): |
|
|
torch.cuda.synchronize() |
|
|
return time.perf_counter() |
|
|
|
|
|
|
|
|
def main(args): |
|
|
print(args) |
|
|
|
|
|
coco_gt = osp.join(args.coco_path, 'annotations/instances_val2017.json') |
|
|
img_list = get_image_list(coco_gt) |
|
|
prefix = osp.join(args.coco_path, 'val2017') |
|
|
if args.run_benchmark: |
|
|
repeats = 10 |
|
|
print('Inference for each image will be repeated 10 times to obtain ' |
|
|
'a reliable measurement of inference latency.') |
|
|
else: |
|
|
repeats = 1 |
|
|
|
|
|
if args.disable_eval: |
|
|
coco_evaluator = None |
|
|
else: |
|
|
coco_evaluator = CocoEvaluator(coco_gt, ('bbox',)) |
|
|
|
|
|
time_profile = TimeProfiler() |
|
|
|
|
|
if args.path.endswith(".onnx"): |
|
|
sess = nxrun.InferenceSession(args.path, providers=['CUDAExecutionProvider']) |
|
|
infer_onnx(sess, coco_evaluator, time_profile, prefix, img_list, device=f'cuda:{args.device}', repeats=repeats) |
|
|
elif args.path.endswith(".engine"): |
|
|
model = TRTInference(args.path, sync_mode=True, device=f'cuda:{args.device}') |
|
|
infer_engine(model, coco_evaluator, time_profile, prefix, img_list, device=f'cuda:{args.device}', repeats=repeats) |
|
|
else: |
|
|
raise NotImplementedError('Only model file names ending with ".onnx" and ".engine" are supported.') |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
args = parser_args() |
|
|
main(args) |
|
|
|