Spaces:
Runtime error
Runtime error
| import argparse | |
| import logging | |
| import torch | |
| import torch.nn as nn | |
| import timeit | |
| from maskrcnn_benchmark.layers import * | |
| from maskrcnn_benchmark.modeling.backbone.resnet_big import StdConv2d | |
| from maskrcnn_benchmark.modeling.backbone.fpn import * | |
| from maskrcnn_benchmark.modeling.rpn.inference import * | |
| from maskrcnn_benchmark.modeling.roi_heads.box_head.inference import PostProcessor | |
| from maskrcnn_benchmark.modeling.rpn.anchor_generator import BufferList | |
| def profile(model, input_size, custom_ops={}, device="cpu", verbose=False, extra_args={}, return_time=False): | |
| handler_collection = [] | |
| def add_hooks(m): | |
| if len(list(m.children())) > 0: | |
| return | |
| m.register_buffer('total_ops', torch.zeros(1)) | |
| m.register_buffer('total_params', torch.zeros(1)) | |
| for p in m.parameters(): | |
| m.total_params += torch.Tensor([p.numel()]) | |
| m_type = type(m) | |
| fn = None | |
| if m_type in custom_ops: | |
| fn = custom_ops[m_type] | |
| elif m_type in register_hooks: | |
| fn = register_hooks[m_type] | |
| else: | |
| print("Not implemented for ", m) | |
| if fn is not None: | |
| if verbose: | |
| print("Register FLOP counter for module %s" % str(m)) | |
| handler = m.register_forward_hook(fn) | |
| handler_collection.append(handler) | |
| original_device = model.parameters().__next__().device | |
| training = model.training | |
| model.eval().to(device) | |
| model.apply(add_hooks) | |
| x = torch.zeros(input_size).to(device) | |
| with torch.no_grad(): | |
| tic = timeit.time.perf_counter() | |
| model(x, **extra_args) | |
| toc = timeit.time.perf_counter() | |
| total_time = toc-tic | |
| total_ops = 0 | |
| total_params = 0 | |
| for m in model.modules(): | |
| if len(list(m.children())) > 0: # skip for non-leaf module | |
| continue | |
| total_ops += m.total_ops | |
| total_params += m.total_params | |
| total_ops = total_ops.item() | |
| total_params = total_params.item() | |
| model.train(training).to(original_device) | |
| for handler in handler_collection: | |
| handler.remove() | |
| if return_time: | |
| return total_ops, total_params, total_time | |
| else: | |
| return total_ops, total_params | |
| multiply_adds = 1 | |
| def count_conv2d(m, x, y): | |
| x = x[0] | |
| cin = m.in_channels | |
| cout = m.out_channels | |
| kh, kw = m.kernel_size | |
| batch_size = x.size()[0] | |
| out_h = y.size(2) | |
| out_w = y.size(3) | |
| # ops per output element | |
| # kernel_mul = kh * kw * cin | |
| # kernel_add = kh * kw * cin - 1 | |
| kernel_ops = multiply_adds * kh * kw * cin // m.groups | |
| bias_ops = 1 if m.bias is not None else 0 | |
| ops_per_element = kernel_ops + bias_ops | |
| # total ops | |
| # num_out_elements = y.numel() | |
| output_elements = batch_size * out_w * out_h * cout | |
| total_ops = output_elements * ops_per_element | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_convtranspose2d(m, x, y): | |
| x = x[0] | |
| cin = m.in_channels | |
| cout = m.out_channels | |
| kh, kw = m.kernel_size | |
| batch_size = x.size()[0] | |
| out_h = y.size(2) | |
| out_w = y.size(3) | |
| # ops per output element | |
| # kernel_mul = kh * kw * cin | |
| # kernel_add = kh * kw * cin - 1 | |
| kernel_ops = multiply_adds * kh * kw * cin // m.groups | |
| bias_ops = 1 if m.bias is not None else 0 | |
| ops_per_element = kernel_ops + bias_ops | |
| # total ops | |
| # num_out_elements = y.numel() | |
| # output_elements = batch_size * out_w * out_h * cout | |
| ops_per_element = m.weight.nelement() | |
| output_elements = y.nelement() | |
| total_ops = output_elements * ops_per_element | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_bn(m, x, y): | |
| x = x[0] | |
| nelements = x.numel() | |
| # subtract, divide, gamma, beta | |
| total_ops = 4*nelements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_relu(m, x, y): | |
| x = x[0] | |
| nelements = x.numel() | |
| total_ops = nelements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_softmax(m, x, y): | |
| x = x[0] | |
| batch_size, nfeatures = x.size() | |
| total_exp = nfeatures | |
| total_add = nfeatures - 1 | |
| total_div = nfeatures | |
| total_ops = batch_size * (total_exp + total_add + total_div) | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_maxpool(m, x, y): | |
| kernel_ops = torch.prod(torch.Tensor([m.kernel_size])) | |
| num_elements = y.numel() | |
| total_ops = kernel_ops * num_elements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_adap_maxpool(m, x, y): | |
| kernel = torch.Tensor([*(x[0].shape[2:])])//torch.Tensor(list((m.output_size,))).squeeze() | |
| kernel_ops = torch.prod(kernel) | |
| num_elements = y.numel() | |
| total_ops = kernel_ops * num_elements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_avgpool(m, x, y): | |
| total_add = torch.prod(torch.Tensor([m.kernel_size])) | |
| total_div = 1 | |
| kernel_ops = total_add + total_div | |
| num_elements = y.numel() | |
| total_ops = kernel_ops * num_elements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_adap_avgpool(m, x, y): | |
| kernel = torch.Tensor([*(x[0].shape[2:])])//torch.Tensor(list((m.output_size,))).squeeze() | |
| total_add = torch.prod(kernel) | |
| total_div = 1 | |
| kernel_ops = total_add + total_div | |
| num_elements = y.numel() | |
| total_ops = kernel_ops * num_elements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_linear(m, x, y): | |
| # per output element | |
| total_mul = m.in_features | |
| total_add = m.in_features - 1 | |
| num_elements = y.numel() | |
| total_ops = (total_mul + total_add) * num_elements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_LastLevelMaxPool(m, x, y): | |
| num_elements = y[-1].numel() | |
| total_ops = num_elements | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| def count_ROIAlign(m, x, y): | |
| num_elements = y.numel() | |
| total_ops = num_elements*4 | |
| m.total_ops = torch.Tensor([int(total_ops)]) | |
| register_hooks = { | |
| Scale: None, | |
| Conv2d: count_conv2d, | |
| nn.Conv2d: count_conv2d, | |
| ModulatedDeformConv: count_conv2d, | |
| StdConv2d: count_conv2d, | |
| nn.BatchNorm1d: count_bn, | |
| nn.BatchNorm2d: count_bn, | |
| nn.BatchNorm3d: count_bn, | |
| FrozenBatchNorm2d: count_bn, | |
| nn.GroupNorm: count_bn, | |
| NaiveSyncBatchNorm2d: count_bn, | |
| nn.ReLU: count_relu, | |
| nn.ReLU6: count_relu, | |
| swish: None, | |
| nn.ConstantPad2d: None, | |
| SPPLayer: count_LastLevelMaxPool, | |
| LastLevelMaxPool: count_LastLevelMaxPool, | |
| nn.MaxPool1d: count_maxpool, | |
| nn.MaxPool2d: count_maxpool, | |
| nn.MaxPool3d: count_maxpool, | |
| nn.AdaptiveMaxPool1d: count_adap_maxpool, | |
| nn.AdaptiveMaxPool2d: count_adap_maxpool, | |
| nn.AdaptiveMaxPool3d: count_adap_maxpool, | |
| nn.AvgPool1d: count_avgpool, | |
| nn.AvgPool2d: count_avgpool, | |
| nn.AvgPool3d: count_avgpool, | |
| nn.AdaptiveAvgPool1d: count_adap_avgpool, | |
| nn.AdaptiveAvgPool2d: count_adap_avgpool, | |
| nn.AdaptiveAvgPool3d: count_adap_avgpool, | |
| nn.Linear: count_linear, | |
| nn.Upsample: None, | |
| nn.Dropout: None, | |
| nn.Sigmoid: None, | |
| DropBlock2D: None, | |
| ROIAlign: count_ROIAlign, | |
| RPNPostProcessor: None, | |
| PostProcessor: None, | |
| BufferList: None, | |
| RetinaPostProcessor: None, | |
| FCOSPostProcessor: None, | |
| ATSSPostProcessor: None, | |
| } |