[GSoC] Add block quantized models (#270)
Browse files* Gemm and MatMul block quantization support
* refactoring
* fix indentation
* node name independent
* Block quantization tool:
- constant weight category supported
- add data type saturation
- handled the case in which all the elements within a block are the same
benchmark script modified to support block quantized models
block quantized some models
* add missing block quantized models
* formatting
* add blocked models to eval script. Evaluation yunet
* Add sface and pphumanseg evaluation, block quantization tool fix, handpose blocked model fix, removed blocked CRNN EN,
* changed evaluation metric in block_quantize script and add verbose mode
* Add evaluation for PP-ResNet and Mobilenet
* changed file suffix and update readmes
* renamed int8bq
- benchmark/README.md +1 -1
- benchmark/benchmark.py +5 -2
- models/__init__.py +4 -0
- models/face_detection_yunet/README.md +7 -2
- models/face_recognition_sface/README.md +3 -0
- models/facial_expression_recognition/README.md +1 -0
- models/handpose_estimation_mediapipe/README.md +1 -0
- models/human_segmentation_pphumanseg/README.md +7 -2
- models/image_classification_mobilenet/README.md +6 -0
- models/image_classification_ppresnet/README.md +5 -0
- models/license_plate_detection_yunet/README.md +3 -0
- models/object_detection_nanodet/README.md +3 -1
- models/object_detection_yolox/README.md +3 -1
- models/object_tracking_vittrack/README.md +4 -1
- models/optical_flow_estimation_raft/README.md +2 -0
- models/palm_detection_mediapipe/README.md +1 -0
- models/person_detection_mediapipe/README.md +3 -0
- models/person_reid_youtureid/README.md +2 -2
- models/pose_estimation_mediapipe/README.md +2 -0
- models/text_detection_ppocr/README.md +2 -1
- models/text_recognition_crnn/README.md +2 -1
- tools/eval/README.md +15 -1
- tools/eval/eval.py +33 -0
- tools/quantize/README.md +2 -0
- tools/quantize/block_quantize.py +111 -37
benchmark/README.md
CHANGED
|
@@ -26,7 +26,7 @@ python benchmark.py --cfg ./config/face_detection_yunet.yaml
|
|
| 26 |
# All configs
|
| 27 |
python benchmark.py --all
|
| 28 |
|
| 29 |
-
# All configs but only fp32 models (--fp32, --fp16, --int8 are available for now)
|
| 30 |
python benchmark.py --all --fp32
|
| 31 |
|
| 32 |
# All configs but exclude some of them (fill with config name keywords, not sensitive to upper/lower case, seperate with colons)
|
|
|
|
| 26 |
# All configs
|
| 27 |
python benchmark.py --all
|
| 28 |
|
| 29 |
+
# All configs but only fp32 models (--fp32, --fp16, --int8 --int8bq are available for now)
|
| 30 |
python benchmark.py --all --fp32
|
| 31 |
|
| 32 |
# All configs but exclude some of them (fill with config name keywords, not sensitive to upper/lower case, seperate with colons)
|
benchmark/benchmark.py
CHANGED
|
@@ -46,6 +46,7 @@ parser.add_argument("--model_exclude", type=str, help="Models to be excluded. Sp
|
|
| 46 |
parser.add_argument("--fp32", action="store_true", help="Benchmark models of float32 precision only.")
|
| 47 |
parser.add_argument("--fp16", action="store_true", help="Benchmark models of float16 precision only.")
|
| 48 |
parser.add_argument("--int8", action="store_true", help="Benchmark models of int8 precision only.")
|
|
|
|
| 49 |
parser.add_argument("--all", action="store_true", help="Benchmark all models")
|
| 50 |
args = parser.parse_args()
|
| 51 |
|
|
@@ -194,15 +195,17 @@ if __name__ == '__main__':
|
|
| 194 |
model_handler, model_paths = MODELS.get(model_config.pop('name'))
|
| 195 |
|
| 196 |
_model_paths = []
|
| 197 |
-
if args.fp32 or args.fp16 or args.int8:
|
| 198 |
if args.fp32:
|
| 199 |
_model_paths += model_paths['fp32']
|
| 200 |
if args.fp16:
|
| 201 |
_model_paths += model_paths['fp16']
|
| 202 |
if args.int8:
|
| 203 |
_model_paths += model_paths['int8']
|
|
|
|
|
|
|
| 204 |
else:
|
| 205 |
-
_model_paths = model_paths['fp32'] + model_paths['fp16'] + model_paths['int8']
|
| 206 |
# filter out excluded models
|
| 207 |
excludes = []
|
| 208 |
if args.model_exclude is not None:
|
|
|
|
| 46 |
parser.add_argument("--fp32", action="store_true", help="Benchmark models of float32 precision only.")
|
| 47 |
parser.add_argument("--fp16", action="store_true", help="Benchmark models of float16 precision only.")
|
| 48 |
parser.add_argument("--int8", action="store_true", help="Benchmark models of int8 precision only.")
|
| 49 |
+
parser.add_argument("--int8bq", action="store_true", help="Benchmark models of blocked int8 precision only.")
|
| 50 |
parser.add_argument("--all", action="store_true", help="Benchmark all models")
|
| 51 |
args = parser.parse_args()
|
| 52 |
|
|
|
|
| 195 |
model_handler, model_paths = MODELS.get(model_config.pop('name'))
|
| 196 |
|
| 197 |
_model_paths = []
|
| 198 |
+
if args.fp32 or args.fp16 or args.int8 or args.int8bq:
|
| 199 |
if args.fp32:
|
| 200 |
_model_paths += model_paths['fp32']
|
| 201 |
if args.fp16:
|
| 202 |
_model_paths += model_paths['fp16']
|
| 203 |
if args.int8:
|
| 204 |
_model_paths += model_paths['int8']
|
| 205 |
+
if args.int8bq:
|
| 206 |
+
_model_paths += model_paths['int8bq']
|
| 207 |
else:
|
| 208 |
+
_model_paths = model_paths['fp32'] + model_paths['fp16'] + model_paths['int8'] + model_paths["int8bq"]
|
| 209 |
# filter out excluded models
|
| 210 |
excludes = []
|
| 211 |
if args.model_exclude is not None:
|
models/__init__.py
CHANGED
|
@@ -46,6 +46,7 @@ class ModuleRegistery:
|
|
| 46 |
fp32_model_paths = []
|
| 47 |
fp16_model_paths = []
|
| 48 |
int8_model_paths = []
|
|
|
|
| 49 |
# onnx
|
| 50 |
ret_onnx = sorted(glob.glob(os.path.join(model_dir, "*.onnx")))
|
| 51 |
if "object_tracking" in item.__module__:
|
|
@@ -57,6 +58,8 @@ class ModuleRegistery:
|
|
| 57 |
int8_model_paths.append([r])
|
| 58 |
elif "fp16" in r: # exclude fp16 for now
|
| 59 |
fp16_model_paths.append([r])
|
|
|
|
|
|
|
| 60 |
else:
|
| 61 |
fp32_model_paths.append([r])
|
| 62 |
# caffe
|
|
@@ -72,6 +75,7 @@ class ModuleRegistery:
|
|
| 72 |
fp32=fp32_model_paths,
|
| 73 |
fp16=fp16_model_paths,
|
| 74 |
int8=int8_model_paths,
|
|
|
|
| 75 |
)
|
| 76 |
|
| 77 |
self._dict[item.__name__] = (item, all_model_paths)
|
|
|
|
| 46 |
fp32_model_paths = []
|
| 47 |
fp16_model_paths = []
|
| 48 |
int8_model_paths = []
|
| 49 |
+
int8bq_model_paths = []
|
| 50 |
# onnx
|
| 51 |
ret_onnx = sorted(glob.glob(os.path.join(model_dir, "*.onnx")))
|
| 52 |
if "object_tracking" in item.__module__:
|
|
|
|
| 58 |
int8_model_paths.append([r])
|
| 59 |
elif "fp16" in r: # exclude fp16 for now
|
| 60 |
fp16_model_paths.append([r])
|
| 61 |
+
elif "blocked" in r:
|
| 62 |
+
int8bq_model_paths.append([r])
|
| 63 |
else:
|
| 64 |
fp32_model_paths.append([r])
|
| 65 |
# caffe
|
|
|
|
| 75 |
fp32=fp32_model_paths,
|
| 76 |
fp16=fp16_model_paths,
|
| 77 |
int8=int8_model_paths,
|
| 78 |
+
int8bq=int8bq_model_paths
|
| 79 |
)
|
| 80 |
|
| 81 |
self._dict[item.__name__] = (item, all_model_paths)
|
models/face_detection_yunet/README.md
CHANGED
|
@@ -8,15 +8,20 @@ Notes:
|
|
| 8 |
- This model can detect **faces of pixels between around 10x10 to 300x300** due to the training scheme.
|
| 9 |
- For details on training this model, please visit https://github.com/ShiqiYu/libfacedetection.train.
|
| 10 |
- This ONNX model has fixed input shape, but OpenCV DNN infers on the exact shape of input image. See https://github.com/opencv/opencv_zoo/issues/44 for more information.
|
|
|
|
| 11 |
|
| 12 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 13 |
|
| 14 |
| Models | Easy AP | Medium AP | Hard AP |
|
| 15 |
| ----------- | ------- | --------- | ------- |
|
| 16 |
-
| YuNet | 0.
|
| 17 |
-
| YuNet
|
|
|
|
|
|
|
| 18 |
|
| 19 |
\*: 'quant' stands for 'quantized'.
|
|
|
|
|
|
|
| 20 |
|
| 21 |
## Demo
|
| 22 |
|
|
|
|
| 8 |
- This model can detect **faces of pixels between around 10x10 to 300x300** due to the training scheme.
|
| 9 |
- For details on training this model, please visit https://github.com/ShiqiYu/libfacedetection.train.
|
| 10 |
- This ONNX model has fixed input shape, but OpenCV DNN infers on the exact shape of input image. See https://github.com/opencv/opencv_zoo/issues/44 for more information.
|
| 11 |
+
- `face_detection_yunet_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 12 |
|
| 13 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 14 |
|
| 15 |
| Models | Easy AP | Medium AP | Hard AP |
|
| 16 |
| ----------- | ------- | --------- | ------- |
|
| 17 |
+
| YuNet | 0.8844 | 0.8656 | 0.7503 |
|
| 18 |
+
| YuNet block | 0.8845 | 0.8652 | 0.7504 |
|
| 19 |
+
| YuNet quant | 0.8810 | 0.8629 | 0.7503 |
|
| 20 |
+
|
| 21 |
|
| 22 |
\*: 'quant' stands for 'quantized'.
|
| 23 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
| 24 |
+
|
| 25 |
|
| 26 |
## Demo
|
| 27 |
|
models/face_recognition_sface/README.md
CHANGED
|
@@ -8,15 +8,18 @@ Note:
|
|
| 8 |
- Model files encode MobileFaceNet instances trained on the SFace loss function, see the [SFace paper](https://arxiv.org/abs/2205.12010) for reference.
|
| 9 |
- ONNX file conversions from [original code base](https://github.com/zhongyy/SFace) thanks to [Chengrui Wang](https://github.com/crywang).
|
| 10 |
- (As of Sep 2021) Supporting 5-landmark warping for now, see below for details.
|
|
|
|
| 11 |
|
| 12 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 13 |
|
| 14 |
| Models | Accuracy |
|
| 15 |
| ----------- | -------- |
|
| 16 |
| SFace | 0.9940 |
|
|
|
|
| 17 |
| SFace quant | 0.9932 |
|
| 18 |
|
| 19 |
\*: 'quant' stands for 'quantized'.
|
|
|
|
| 20 |
|
| 21 |
## Demo
|
| 22 |
|
|
|
|
| 8 |
- Model files encode MobileFaceNet instances trained on the SFace loss function, see the [SFace paper](https://arxiv.org/abs/2205.12010) for reference.
|
| 9 |
- ONNX file conversions from [original code base](https://github.com/zhongyy/SFace) thanks to [Chengrui Wang](https://github.com/crywang).
|
| 10 |
- (As of Sep 2021) Supporting 5-landmark warping for now, see below for details.
|
| 11 |
+
- `face_recognition_sface_2021dec_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 12 |
|
| 13 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 14 |
|
| 15 |
| Models | Accuracy |
|
| 16 |
| ----------- | -------- |
|
| 17 |
| SFace | 0.9940 |
|
| 18 |
+
| SFace block | 0.9942 |
|
| 19 |
| SFace quant | 0.9932 |
|
| 20 |
|
| 21 |
\*: 'quant' stands for 'quantized'.
|
| 22 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
| 23 |
|
| 24 |
## Demo
|
| 25 |
|
models/facial_expression_recognition/README.md
CHANGED
|
@@ -7,6 +7,7 @@ Note:
|
|
| 7 |
- Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN).
|
| 8 |
- [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised).
|
| 9 |
- [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/crywang).
|
|
|
|
| 10 |
|
| 11 |
Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
|
| 12 |
|
|
|
|
| 7 |
- Progressive Teacher is contributed by [Jing Jiang](https://scholar.google.com/citations?user=OCwcfAwAAAAJ&hl=zh-CN).
|
| 8 |
- [MobileFaceNet](https://link.springer.com/chapter/10.1007/978-3-319-97909-0_46) is used as the backbone and the model is able to classify seven basic facial expressions (angry, disgust, fearful, happy, neutral, sad, surprised).
|
| 9 |
- [facial_expression_recognition_mobilefacenet_2022july.onnx](https://github.com/opencv/opencv_zoo/raw/master/models/facial_expression_recognition/facial_expression_recognition_mobilefacenet_2022july.onnx) is implemented thanks to [Chengrui Wang](https://github.com/crywang).
|
| 10 |
+
- `facial_expression_recognition_mobilefacenet_2022july_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 11 |
|
| 12 |
Results of accuracy evaluation on [RAF-DB](http://whdeng.cn/RAF/model1.html).
|
| 13 |
|
models/handpose_estimation_mediapipe/README.md
CHANGED
|
@@ -14,6 +14,7 @@ This model is converted from TFlite to ONNX using following tools:
|
|
| 14 |
**Note**:
|
| 15 |
- The int8-quantized model may produce invalid results due to a significant drop of accuracy.
|
| 16 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
|
|
|
| 17 |
|
| 18 |
## Demo
|
| 19 |
|
|
|
|
| 14 |
**Note**:
|
| 15 |
- The int8-quantized model may produce invalid results due to a significant drop of accuracy.
|
| 16 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
| 17 |
+
- `handpose_estimation_mediapipe_2023feb_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 18 |
|
| 19 |
## Demo
|
| 20 |
|
models/human_segmentation_pphumanseg/README.md
CHANGED
|
@@ -2,6 +2,9 @@
|
|
| 2 |
|
| 3 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_humanseg.py).
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
## Demo
|
| 6 |
|
| 7 |
### Python
|
|
@@ -46,11 +49,13 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
|
| 46 |
|
| 47 |
| Models | Accuracy | mIoU |
|
| 48 |
| ------------------ | -------------- | ------------- |
|
| 49 |
-
| PPHumanSeg | 0.
|
| 50 |
-
| PPHumanSeg
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
\*: 'quant' stands for 'quantized'.
|
|
|
|
| 54 |
|
| 55 |
---
|
| 56 |
## License
|
|
|
|
| 2 |
|
| 3 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_humanseg.py).
|
| 4 |
|
| 5 |
+
**Note**:
|
| 6 |
+
- `human_segmentation_pphumanseg_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 7 |
+
|
| 8 |
## Demo
|
| 9 |
|
| 10 |
### Python
|
|
|
|
| 49 |
|
| 50 |
| Models | Accuracy | mIoU |
|
| 51 |
| ------------------ | -------------- | ------------- |
|
| 52 |
+
| PPHumanSeg | 0.9656 | 0.9164 |
|
| 53 |
+
| PPHumanSeg block | 0.9655 | 0.9162 |
|
| 54 |
+
| PPHumanSeg quant | 0.7285 | 0.3642 |
|
| 55 |
|
| 56 |
|
| 57 |
\*: 'quant' stands for 'quantized'.
|
| 58 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
| 59 |
|
| 60 |
---
|
| 61 |
## License
|
models/image_classification_mobilenet/README.md
CHANGED
|
@@ -4,16 +4,22 @@ MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applicatio
|
|
| 4 |
|
| 5 |
MobileNetV2: Inverted Residuals and Linear Bottlenecks
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 8 |
|
| 9 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
| 10 |
| ------------------ | -------------- | -------------- |
|
| 11 |
| MobileNet V1 | 67.64 | 87.97 |
|
|
|
|
| 12 |
| MobileNet V1 quant | 55.53 | 78.74 |
|
| 13 |
| MobileNet V2 | 69.44 | 89.23 |
|
|
|
|
| 14 |
| MobileNet V2 quant | 68.37 | 88.56 |
|
| 15 |
|
| 16 |
\*: 'quant' stands for 'quantized'.
|
|
|
|
| 17 |
|
| 18 |
## Demo
|
| 19 |
|
|
|
|
| 4 |
|
| 5 |
MobileNetV2: Inverted Residuals and Linear Bottlenecks
|
| 6 |
|
| 7 |
+
**Note**:
|
| 8 |
+
- `image_classification_mobilenetvX_2022apr_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 9 |
+
|
| 10 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 11 |
|
| 12 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
| 13 |
| ------------------ | -------------- | -------------- |
|
| 14 |
| MobileNet V1 | 67.64 | 87.97 |
|
| 15 |
+
| MobileNet V1 block | 67.21 | 87.62 |
|
| 16 |
| MobileNet V1 quant | 55.53 | 78.74 |
|
| 17 |
| MobileNet V2 | 69.44 | 89.23 |
|
| 18 |
+
| MobileNet V2 block | 68.66 | 88.90 |
|
| 19 |
| MobileNet V2 quant | 68.37 | 88.56 |
|
| 20 |
|
| 21 |
\*: 'quant' stands for 'quantized'.
|
| 22 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
| 23 |
|
| 24 |
## Demo
|
| 25 |
|
models/image_classification_ppresnet/README.md
CHANGED
|
@@ -4,14 +4,19 @@ Deep Residual Learning for Image Recognition
|
|
| 4 |
|
| 5 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_resnet50.py).
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 8 |
|
| 9 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
| 10 |
| --------------- | -------------- | -------------- |
|
| 11 |
| PP-ResNet | 82.28 | 96.15 |
|
|
|
|
| 12 |
| PP-ResNet quant | 0.22 | 0.96 |
|
| 13 |
|
| 14 |
\*: 'quant' stands for 'quantized'.
|
|
|
|
| 15 |
|
| 16 |
## Demo
|
| 17 |
|
|
|
|
| 4 |
|
| 5 |
This model is ported from [PaddleHub](https://github.com/PaddlePaddle/PaddleHub) using [this script from OpenCV](https://github.com/opencv/opencv/blob/master/samples/dnn/dnn_model_runner/dnn_conversion/paddlepaddle/paddle_resnet50.py).
|
| 6 |
|
| 7 |
+
**Note**:
|
| 8 |
+
- `image_classification_ppresnet50_2022jan_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 9 |
+
|
| 10 |
Results of accuracy evaluation with [tools/eval](../../tools/eval).
|
| 11 |
|
| 12 |
| Models | Top-1 Accuracy | Top-5 Accuracy |
|
| 13 |
| --------------- | -------------- | -------------- |
|
| 14 |
| PP-ResNet | 82.28 | 96.15 |
|
| 15 |
+
| PP-ResNet block | 82.27 | 96.15 |
|
| 16 |
| PP-ResNet quant | 0.22 | 0.96 |
|
| 17 |
|
| 18 |
\*: 'quant' stands for 'quantized'.
|
| 19 |
+
\*\*: 'block' stands for 'blockwise quantized'.
|
| 20 |
|
| 21 |
## Demo
|
| 22 |
|
models/license_plate_detection_yunet/README.md
CHANGED
|
@@ -4,6 +4,9 @@ This model is contributed by Dong Xu (徐栋) from [watrix.ai](watrix.ai) (银
|
|
| 4 |
|
| 5 |
Please note that the model is trained with Chinese license plates, so the detection results of other license plates with this model may be limited.
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
## Demo
|
| 8 |
|
| 9 |
Run the following command to try the demo:
|
|
|
|
| 4 |
|
| 5 |
Please note that the model is trained with Chinese license plates, so the detection results of other license plates with this model may be limited.
|
| 6 |
|
| 7 |
+
**Note**:
|
| 8 |
+
- `license_plate_detection_lpd_yunet_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 9 |
+
|
| 10 |
## Demo
|
| 11 |
|
| 12 |
Run the following command to try the demo:
|
models/object_detection_nanodet/README.md
CHANGED
|
@@ -2,8 +2,10 @@
|
|
| 2 |
|
| 3 |
Nanodet: NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalized Focal Loss as classification and regression loss.In NanoDet-Plus, we propose a novel label assignment strategy with a simple assign guidance module (AGM) and a dynamic soft label assigner (DSLA) to solve the optimal label assignment problem in lightweight model training.
|
| 4 |
|
| 5 |
-
Note
|
| 6 |
- This version of nanodet: Nanodet-m-plus-1.5x_416
|
|
|
|
|
|
|
| 7 |
|
| 8 |
## Demo
|
| 9 |
|
|
|
|
| 2 |
|
| 3 |
Nanodet: NanoDet is a FCOS-style one-stage anchor-free object detection model which using Generalized Focal Loss as classification and regression loss.In NanoDet-Plus, we propose a novel label assignment strategy with a simple assign guidance module (AGM) and a dynamic soft label assigner (DSLA) to solve the optimal label assignment problem in lightweight model training.
|
| 4 |
|
| 5 |
+
**Note**:
|
| 6 |
- This version of nanodet: Nanodet-m-plus-1.5x_416
|
| 7 |
+
- `object_detection_nanodet_2022nov_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 8 |
+
|
| 9 |
|
| 10 |
## Demo
|
| 11 |
|
models/object_detection_yolox/README.md
CHANGED
|
@@ -8,8 +8,10 @@ Key features of the YOLOX object detector
|
|
| 8 |
- **SimOTA advanced label assignment strategy** reduces training time and avoids additional solver hyperparameters
|
| 9 |
- **Strong data augmentations like MixUp and Mosiac** to boost YOLOX performance
|
| 10 |
|
| 11 |
-
Note
|
| 12 |
- This version of YoloX: YoloX_s
|
|
|
|
|
|
|
| 13 |
|
| 14 |
## Demo
|
| 15 |
|
|
|
|
| 8 |
- **SimOTA advanced label assignment strategy** reduces training time and avoids additional solver hyperparameters
|
| 9 |
- **Strong data augmentations like MixUp and Mosiac** to boost YOLOX performance
|
| 10 |
|
| 11 |
+
**Note**:
|
| 12 |
- This version of YoloX: YoloX_s
|
| 13 |
+
- `object_detection_yolox_2022nov_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 14 |
+
|
| 15 |
|
| 16 |
## Demo
|
| 17 |
|
models/object_tracking_vittrack/README.md
CHANGED
|
@@ -8,7 +8,10 @@ Video demo: https://youtu.be/MJiPnu1ZQRI
|
|
| 8 |
|
| 9 |
This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
|
| 10 |
|
| 11 |
-
**
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# Demo
|
| 14 |
## Python
|
|
|
|
| 8 |
|
| 9 |
This model is contributed by [Pengyu Liu](https://github.com/lpylpy0514) in GSoC 2023 project [**Realtime object tracking models**](https://github.com/opencv/opencv/wiki/GSoC_2023#idea-realtime-object-tracking-models)
|
| 10 |
|
| 11 |
+
**Note**:
|
| 12 |
+
- OpenCV > 4.8.0 is required. Build from source with instructions from https://opencv.org/get-started/.**
|
| 13 |
+
- `object_tracking_vittrack_2023sep_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 14 |
+
|
| 15 |
|
| 16 |
# Demo
|
| 17 |
## Python
|
models/optical_flow_estimation_raft/README.md
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
# RAFT
|
| 2 |
This model is originally created by Zachary Teed and Jia Deng of Princeton University. The source code for the model is at [their repository on GitHub](https://github.com/princeton-vl/RAFT), and the original [research paper](https://arxiv.org/abs/2003.12039) is published on [Arxiv](https://arxiv.org/abs/2003.12039). The model was converted to ONNX by [PINTO0309](https://github.com/PINTO0309) in his [model zoo](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/252_RAFT). The ONNX model has several variations depending on the training dataset and input dimesnions. The model used in this demo is trained on Sintel dataset with input size of 360 $\times$ 480.
|
| 3 |
|
|
|
|
|
|
|
| 4 |
|
| 5 |
## Demo
|
| 6 |
|
|
|
|
| 1 |
# RAFT
|
| 2 |
This model is originally created by Zachary Teed and Jia Deng of Princeton University. The source code for the model is at [their repository on GitHub](https://github.com/princeton-vl/RAFT), and the original [research paper](https://arxiv.org/abs/2003.12039) is published on [Arxiv](https://arxiv.org/abs/2003.12039). The model was converted to ONNX by [PINTO0309](https://github.com/PINTO0309) in his [model zoo](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/252_RAFT). The ONNX model has several variations depending on the training dataset and input dimesnions. The model used in this demo is trained on Sintel dataset with input size of 360 $\times$ 480.
|
| 3 |
|
| 4 |
+
**Note**:
|
| 5 |
+
- `optical_flow_estimation_raft_2023aug_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 6 |
|
| 7 |
## Demo
|
| 8 |
|
models/palm_detection_mediapipe/README.md
CHANGED
|
@@ -9,6 +9,7 @@ SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://githu
|
|
| 9 |
|
| 10 |
**Note**:
|
| 11 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
|
|
|
| 12 |
|
| 13 |
## Demo
|
| 14 |
|
|
|
|
| 9 |
|
| 10 |
**Note**:
|
| 11 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#hands for models of larger scale.
|
| 12 |
+
- `palm_detection_mediapipe_2023feb_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 13 |
|
| 14 |
## Demo
|
| 15 |
|
models/person_detection_mediapipe/README.md
CHANGED
|
@@ -7,6 +7,9 @@ This model detects upper body and full body keypoints of a person, and is downlo
|
|
| 7 |
|
| 8 |
SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://github.com/VimalMollyn/GenMediaPipePalmDectionSSDAnchors)
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
## Demo
|
| 11 |
|
| 12 |
### Python
|
|
|
|
| 7 |
|
| 8 |
SSD Anchors are generated from [GenMediaPipePalmDectionSSDAnchors](https://github.com/VimalMollyn/GenMediaPipePalmDectionSSDAnchors)
|
| 9 |
|
| 10 |
+
**Note**:
|
| 11 |
+
- `person_detection_mediapipe_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 12 |
+
|
| 13 |
## Demo
|
| 14 |
|
| 15 |
### Python
|
models/person_reid_youtureid/README.md
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
|
| 3 |
This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/opencv/opencv/blob/394e640909d5d8edf9c1f578f8216d513373698c/samples/dnn/person_reid.py#L6-L11).
|
| 4 |
|
| 5 |
-
Note
|
| 6 |
-
|
| 7 |
- Model source: https://github.com/ReID-Team/ReID_extra_testdata
|
|
|
|
| 8 |
|
| 9 |
## Demo
|
| 10 |
|
|
|
|
| 2 |
|
| 3 |
This model is provided by Tencent Youtu Lab [[Credits]](https://github.com/opencv/opencv/blob/394e640909d5d8edf9c1f578f8216d513373698c/samples/dnn/person_reid.py#L6-L11).
|
| 4 |
|
| 5 |
+
**Note**:
|
|
|
|
| 6 |
- Model source: https://github.com/ReID-Team/ReID_extra_testdata
|
| 7 |
+
- `person_reid_youtu_2021nov_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 8 |
|
| 9 |
## Demo
|
| 10 |
|
models/pose_estimation_mediapipe/README.md
CHANGED
|
@@ -10,6 +10,8 @@ This model is converted from TFlite to ONNX using following tools:
|
|
| 10 |
|
| 11 |
**Note**:
|
| 12 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#pose for models of larger scale.
|
|
|
|
|
|
|
| 13 |
## Demo
|
| 14 |
|
| 15 |
### python
|
|
|
|
| 10 |
|
| 11 |
**Note**:
|
| 12 |
- Visit https://github.com/google/mediapipe/blob/master/docs/solutions/models.md#pose for models of larger scale.
|
| 13 |
+
- `pose_estimation_mediapipe_2023mar_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 14 |
+
|
| 15 |
## Demo
|
| 16 |
|
| 17 |
### python
|
models/text_detection_ppocr/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System.
|
| 4 |
|
| 5 |
-
Note
|
| 6 |
|
| 7 |
- The int8 quantization model may produce unstable results due to some loss of accuracy.
|
| 8 |
- Original Paddle Models source of English: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar).
|
|
@@ -10,6 +10,7 @@ Note:
|
|
| 10 |
- `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
|
| 11 |
- `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
|
| 12 |
- Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
|
|
|
|
| 13 |
|
| 14 |
## Demo
|
| 15 |
|
|
|
|
| 2 |
|
| 3 |
PP-OCRv3: More Attempts for the Improvement of Ultra Lightweight OCR System.
|
| 4 |
|
| 5 |
+
**Note**:
|
| 6 |
|
| 7 |
- The int8 quantization model may produce unstable results due to some loss of accuracy.
|
| 8 |
- Original Paddle Models source of English: [here](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar).
|
|
|
|
| 10 |
- `IC15` in the filename means the model is trained on [IC15 dataset](https://rrc.cvc.uab.es/?ch=4&com=introduction), which can detect English text instances only.
|
| 11 |
- `TD500` in the filename means the model is trained on [TD500 dataset](http://www.iapr-tc11.org/mediawiki/index.php/MSRA_Text_Detection_500_Database_(MSRA-TD500)), which can detect both English & Chinese instances.
|
| 12 |
- Visit https://docs.opencv.org/master/d4/d43/tutorial_dnn_text_spotting.html for more information.
|
| 13 |
+
- `text_detection_xx_ppocrv3_2023may_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 14 |
|
| 15 |
## Demo
|
| 16 |
|
models/text_recognition_crnn/README.md
CHANGED
|
@@ -15,7 +15,7 @@ Results of accuracy evaluation with [tools/eval](../../tools/eval) at different
|
|
| 15 |
|
| 16 |
\*: 'FP16' or 'INT8' stands for 'model quantized into FP16' or 'model quantized into int8'
|
| 17 |
|
| 18 |
-
Note
|
| 19 |
|
| 20 |
- Model source:
|
| 21 |
- `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx)
|
|
@@ -25,6 +25,7 @@ Note:
|
|
| 25 |
- `text_recognition_CRNN_CH_2021sep.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters (see `CHARSET_CH_94` for details in `crnn.py`).
|
| 26 |
- `text_recognition_CRNN_CN_2021nov.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters (see `CHARSET_CN_3944` for details in `crnn.py`).
|
| 27 |
- For details on training this model series, please visit https://github.com/zihaomu/deep-text-recognition-benchmark.
|
|
|
|
| 28 |
|
| 29 |
## Demo
|
| 30 |
|
|
|
|
| 15 |
|
| 16 |
\*: 'FP16' or 'INT8' stands for 'model quantized into FP16' or 'model quantized into int8'
|
| 17 |
|
| 18 |
+
**Note**:
|
| 19 |
|
| 20 |
- Model source:
|
| 21 |
- `text_recognition_CRNN_EN_2021sep.onnx`: https://docs.opencv.org/4.5.2/d9/d1e/tutorial_dnn_OCR.html (CRNN_VGG_BiLSTM_CTC.onnx)
|
|
|
|
| 25 |
- `text_recognition_CRNN_CH_2021sep.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), and some special characters (see `CHARSET_CH_94` for details in `crnn.py`).
|
| 26 |
- `text_recognition_CRNN_CN_2021nov.onnx` can detect digits (0\~9), upper/lower-case letters (a\~z and A\~Z), some Chinese characters and some special characters (see `CHARSET_CN_3944` for details in `crnn.py`).
|
| 27 |
- For details on training this model series, please visit https://github.com/zihaomu/deep-text-recognition-benchmark.
|
| 28 |
+
- `text_recognition_CRNN_XX_2021xxx_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
|
| 29 |
|
| 30 |
## Demo
|
| 31 |
|
tools/eval/README.md
CHANGED
|
@@ -146,7 +146,7 @@ python eval.py -m sface -d lfw -dr /path/to/lfw
|
|
| 146 |
|
| 147 |
### Prepare data
|
| 148 |
|
| 149 |
-
Please visit http://iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions to download the ICDAR2003 dataset and the labels.
|
| 150 |
|
| 151 |
```shell
|
| 152 |
$ tree -L 2 /path/to/icdar
|
|
@@ -199,6 +199,20 @@ python eval.py -m crnn -d iiit5k -dr /path/to/iiit5k
|
|
| 199 |
### Prepare data
|
| 200 |
Please download the mini_supervisely data from [here](https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip) which includes the validation dataset and unzip it.
|
| 201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
### Evaluation
|
| 203 |
|
| 204 |
Run evaluation with the following command :
|
|
|
|
| 146 |
|
| 147 |
### Prepare data
|
| 148 |
|
| 149 |
+
Please visit http://iapr-tc11.org/mediawiki/index.php/ICDAR_2003_Robust_Reading_Competitions to download the ICDAR2003 dataset and the labels. You have to download the Robust Word Recognition [TrialTrain Set](http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTrain/word.zip) only.
|
| 150 |
|
| 151 |
```shell
|
| 152 |
$ tree -L 2 /path/to/icdar
|
|
|
|
| 199 |
### Prepare data
|
| 200 |
Please download the mini_supervisely data from [here](https://paddleseg.bj.bcebos.com/humanseg/data/mini_supervisely.zip) which includes the validation dataset and unzip it.
|
| 201 |
|
| 202 |
+
```shell
|
| 203 |
+
$ tree -L 2 /path/to/mini_supervisely
|
| 204 |
+
.
|
| 205 |
+
├── Annotations
|
| 206 |
+
│ ├── ache-adult-depression-expression-41253.png
|
| 207 |
+
│ ├── ...
|
| 208 |
+
├── Images
|
| 209 |
+
│ ├── ache-adult-depression-expression-41253.jpg
|
| 210 |
+
│ ├── ...
|
| 211 |
+
├── test.txt
|
| 212 |
+
├── train.txt
|
| 213 |
+
└── val.txt
|
| 214 |
+
```
|
| 215 |
+
|
| 216 |
### Evaluation
|
| 217 |
|
| 218 |
Run evaluation with the following command :
|
tools/eval/eval.py
CHANGED
|
@@ -33,6 +33,12 @@ models = dict(
|
|
| 33 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr_int8.onnx"),
|
| 34 |
topK=5,
|
| 35 |
loadLabel=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
mobilenetv2=dict(
|
| 37 |
name="MobileNet",
|
| 38 |
topic="image_classification",
|
|
@@ -45,6 +51,12 @@ models = dict(
|
|
| 45 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr_int8.onnx"),
|
| 46 |
topK=5,
|
| 47 |
loadLabel=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
ppresnet=dict(
|
| 49 |
name="PPResNet",
|
| 50 |
topic="image_classification",
|
|
@@ -57,6 +69,12 @@ models = dict(
|
|
| 57 |
modelPath=os.path.join(root_dir, "models/image_classification_ppresnet/image_classification_ppresnet50_2022jan_int8.onnx"),
|
| 58 |
topK=5,
|
| 59 |
loadLabel=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
yunet=dict(
|
| 61 |
name="YuNet",
|
| 62 |
topic="face_detection",
|
|
@@ -71,6 +89,13 @@ models = dict(
|
|
| 71 |
topK=5000,
|
| 72 |
confThreshold=0.3,
|
| 73 |
nmsThreshold=0.45),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
sface=dict(
|
| 75 |
name="SFace",
|
| 76 |
topic="face_recognition",
|
|
@@ -79,6 +104,10 @@ models = dict(
|
|
| 79 |
name="SFace",
|
| 80 |
topic="face_recognition",
|
| 81 |
modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec_int8.onnx")),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
crnn_en=dict(
|
| 83 |
name="CRNN",
|
| 84 |
topic="text_recognition",
|
|
@@ -95,6 +124,10 @@ models = dict(
|
|
| 95 |
name="PPHumanSeg",
|
| 96 |
topic="human_segmentation",
|
| 97 |
modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
datasets = dict(
|
|
|
|
| 33 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr_int8.onnx"),
|
| 34 |
topK=5,
|
| 35 |
loadLabel=False),
|
| 36 |
+
mobilenetv1_bq=dict(
|
| 37 |
+
name="MobileNet",
|
| 38 |
+
topic="image_classification",
|
| 39 |
+
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv1_2022apr_int8bq.onnx"),
|
| 40 |
+
topK=5,
|
| 41 |
+
loadLabel=False),
|
| 42 |
mobilenetv2=dict(
|
| 43 |
name="MobileNet",
|
| 44 |
topic="image_classification",
|
|
|
|
| 51 |
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr_int8.onnx"),
|
| 52 |
topK=5,
|
| 53 |
loadLabel=False),
|
| 54 |
+
mobilenetv2_bq=dict(
|
| 55 |
+
name="MobileNet",
|
| 56 |
+
topic="image_classification",
|
| 57 |
+
modelPath=os.path.join(root_dir, "models/image_classification_mobilenet/image_classification_mobilenetv2_2022apr_int8bq.onnx"),
|
| 58 |
+
topK=5,
|
| 59 |
+
loadLabel=False),
|
| 60 |
ppresnet=dict(
|
| 61 |
name="PPResNet",
|
| 62 |
topic="image_classification",
|
|
|
|
| 69 |
modelPath=os.path.join(root_dir, "models/image_classification_ppresnet/image_classification_ppresnet50_2022jan_int8.onnx"),
|
| 70 |
topK=5,
|
| 71 |
loadLabel=False),
|
| 72 |
+
ppresnet_bq=dict(
|
| 73 |
+
name="PPResNet",
|
| 74 |
+
topic="image_classification",
|
| 75 |
+
modelPath=os.path.join(root_dir, "models/image_classification_ppresnet/image_classification_ppresnet50_2022jan_int8bq.onnx"),
|
| 76 |
+
topK=5,
|
| 77 |
+
loadLabel=False),
|
| 78 |
yunet=dict(
|
| 79 |
name="YuNet",
|
| 80 |
topic="face_detection",
|
|
|
|
| 89 |
topK=5000,
|
| 90 |
confThreshold=0.3,
|
| 91 |
nmsThreshold=0.45),
|
| 92 |
+
yunet_bq=dict(
|
| 93 |
+
name="YuNet",
|
| 94 |
+
topic="face_detection",
|
| 95 |
+
modelPath=os.path.join(root_dir, "models/face_detection_yunet/face_detection_yunet_2023mar_int8bq.onnx"),
|
| 96 |
+
topK=5000,
|
| 97 |
+
confThreshold=0.3,
|
| 98 |
+
nmsThreshold=0.45),
|
| 99 |
sface=dict(
|
| 100 |
name="SFace",
|
| 101 |
topic="face_recognition",
|
|
|
|
| 104 |
name="SFace",
|
| 105 |
topic="face_recognition",
|
| 106 |
modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec_int8.onnx")),
|
| 107 |
+
sface_bq=dict(
|
| 108 |
+
name="SFace",
|
| 109 |
+
topic="face_recognition",
|
| 110 |
+
modelPath=os.path.join(root_dir, "models/face_recognition_sface/face_recognition_sface_2021dec_int8bq.onnx")),
|
| 111 |
crnn_en=dict(
|
| 112 |
name="CRNN",
|
| 113 |
topic="text_recognition",
|
|
|
|
| 124 |
name="PPHumanSeg",
|
| 125 |
topic="human_segmentation",
|
| 126 |
modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")),
|
| 127 |
+
pphumanseg_bq=dict(
|
| 128 |
+
name="PPHumanSeg",
|
| 129 |
+
topic="human_segmentation",
|
| 130 |
+
modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8bq.onnx")),
|
| 131 |
)
|
| 132 |
|
| 133 |
datasets = dict(
|
tools/quantize/README.md
CHANGED
|
@@ -54,6 +54,8 @@ python quantize-inc.py model1
|
|
| 54 |
|
| 55 |
## Blockwise quantization usage
|
| 56 |
|
|
|
|
|
|
|
| 57 |
`block_quantize.py` requires Python>=3.7
|
| 58 |
|
| 59 |
To perform weight-only blockwise quantization:
|
|
|
|
| 54 |
|
| 55 |
## Blockwise quantization usage
|
| 56 |
|
| 57 |
+
Block-quantized models under each model directory are generated with `--block_size=64`
|
| 58 |
+
|
| 59 |
`block_quantize.py` requires Python>=3.7
|
| 60 |
|
| 61 |
To perform weight-only blockwise quantization:
|
tools/quantize/block_quantize.py
CHANGED
|
@@ -8,7 +8,8 @@ if sys.version_info < MIN_PYTHON_VERSION:
|
|
| 8 |
import argparse
|
| 9 |
import os
|
| 10 |
from dataclasses import dataclass, field
|
| 11 |
-
from typing import
|
|
|
|
| 12 |
|
| 13 |
import numpy as np
|
| 14 |
import onnx
|
|
@@ -22,12 +23,19 @@ SUPPORTED_OPS = {"Conv", "Gemm", "MatMul"}
|
|
| 22 |
ONNX_OPSET = 21
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
@dataclass
|
| 26 |
class BlockQuantizeConfig:
|
| 27 |
input_model_path: str
|
| 28 |
output_model_path: str
|
| 29 |
block_size: int
|
| 30 |
bits: int
|
|
|
|
| 31 |
|
| 32 |
|
| 33 |
@dataclass
|
|
@@ -75,9 +83,13 @@ def block_quantize_tensor(
|
|
| 75 |
y_scale_elementwise = np.repeat(scale, repeats=repeats, axis=block_axis)
|
| 76 |
y_zero_point_elementwise = np.repeat(zero_point, repeats=repeats, axis=block_axis)
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
return y
|
| 83 |
|
|
@@ -129,6 +141,13 @@ class BlockQuantizer:
|
|
| 129 |
self.initializers_map = {
|
| 130 |
init.name: init for init in self.model.graph.initializer
|
| 131 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
def validate_conf(self):
|
| 134 |
if not os.path.isfile(self.conf.input_model_path):
|
|
@@ -155,34 +174,59 @@ class BlockQuantizer:
|
|
| 155 |
f"Bits must be one of the following values: [{allowed_values}]."
|
| 156 |
)
|
| 157 |
|
| 158 |
-
def
|
| 159 |
if name in self.initializers_map:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
return onnx.numpy_helper.to_array(self.initializers_map[name])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
def compute_scale_zeropoint(
|
| 165 |
self, b_min: np.ndarray, b_max: np.ndarray
|
| 166 |
) -> Tuple[np.ndarray, np.ndarray]:
|
| 167 |
assert (
|
| 168 |
-
b_min
|
| 169 |
-
).all(),
|
| 170 |
-
"minimum must be lower than maximum when computing scale and zero point"
|
| 171 |
-
)
|
| 172 |
|
| 173 |
# zero must be present in the range, this enforces qmin <= zero_point <= qmax
|
| 174 |
b_min = np.minimum(b_min, np.zeros_like(b_min, dtype=b_min.dtype))
|
| 175 |
b_max = np.maximum(b_max, np.zeros_like(b_max, dtype=b_max.dtype))
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
|
|
|
| 179 |
|
| 180 |
dq = qmax - qmin
|
| 181 |
|
| 182 |
-
scales = (b_max - b_min) / dq
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
)
|
| 186 |
|
| 187 |
return (scales, zeropoints)
|
| 188 |
|
|
@@ -221,7 +265,8 @@ class BlockQuantizer:
|
|
| 221 |
quantized_weight, quantization_axis, scales, zeropoints
|
| 222 |
)
|
| 223 |
|
| 224 |
-
|
|
|
|
| 225 |
|
| 226 |
res = BlockQuantizeResult(
|
| 227 |
quantized_weight,
|
|
@@ -241,16 +286,32 @@ class BlockQuantizer:
|
|
| 241 |
|
| 242 |
return size_mb
|
| 243 |
|
| 244 |
-
def display_summary(self, sqe:
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
original_model_size = self.get_model_size(self.conf.input_model_path)
|
| 247 |
quantized_model_size = self.get_model_size(self.conf.output_model_path)
|
| 248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
print("Done! Results saved in", self.conf.output_model_path)
|
| 250 |
print("\nSummary of Results:\n")
|
| 251 |
print(f"{'Metric':<30} {'Value':<10}")
|
| 252 |
print(f"{'-'*40}")
|
| 253 |
-
print(f"{'
|
| 254 |
print(f"{'Original Model Size (KB)':<31} {original_model_size:,.2f}")
|
| 255 |
print(f"{'Block-Quantized Model Size (KB)':<30} {quantized_model_size:,.2f}")
|
| 256 |
|
|
@@ -258,7 +319,7 @@ class BlockQuantizer:
|
|
| 258 |
print("Quantizing the model...")
|
| 259 |
|
| 260 |
quantized_inputs = []
|
| 261 |
-
sqe =
|
| 262 |
|
| 263 |
node_idx = 0
|
| 264 |
|
|
@@ -267,7 +328,13 @@ class BlockQuantizer:
|
|
| 267 |
|
| 268 |
if node.op_type in SUPPORTED_OPS:
|
| 269 |
for input_idx, input_name in enumerate(node.input):
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
quantized_weights_name = f"{input_name}_quantized"
|
| 273 |
quantized_node_name = f"{input_name}_quantized_node"
|
|
@@ -279,9 +346,8 @@ class BlockQuantizer:
|
|
| 279 |
shape_name = f"{input_name}_shape"
|
| 280 |
reshaped_weights_name = f"{input_name}_reshaped"
|
| 281 |
|
| 282 |
-
# Skip quantization if weights
|
| 283 |
-
|
| 284 |
-
if weight is None or weight.size < self.conf.block_size:
|
| 285 |
continue
|
| 286 |
|
| 287 |
reshape_needed = weight.ndim > 2
|
|
@@ -295,9 +361,15 @@ class BlockQuantizer:
|
|
| 295 |
)
|
| 296 |
continue
|
| 297 |
|
| 298 |
-
|
| 299 |
block_quantize_res = self.block_quantize(weight)
|
| 300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
dequantize_node = create_dequantize_node(
|
| 302 |
quantized_node_name,
|
| 303 |
quantized_weights_name,
|
|
@@ -352,14 +424,7 @@ class BlockQuantizer:
|
|
| 352 |
]
|
| 353 |
)
|
| 354 |
|
| 355 |
-
|
| 356 |
-
self.graph.initializer.remove(
|
| 357 |
-
next(
|
| 358 |
-
init
|
| 359 |
-
for init in self.graph.initializer
|
| 360 |
-
if init.name == input_name
|
| 361 |
-
)
|
| 362 |
-
)
|
| 363 |
|
| 364 |
node.input[input_idx] = (
|
| 365 |
reshaped_weights_name
|
|
@@ -374,11 +439,12 @@ class BlockQuantizer:
|
|
| 374 |
|
| 375 |
self.graph.node.insert(0, dequantize_node)
|
| 376 |
node_idx += 1
|
| 377 |
-
|
|
|
|
| 378 |
self.graph.value_info.insert(0, dequantized_weights_info)
|
| 379 |
|
| 380 |
-
sqe
|
| 381 |
-
|
| 382 |
node_idx += 1
|
| 383 |
|
| 384 |
onnx.checker.check_model(self.model, full_check=True)
|
|
@@ -421,6 +487,13 @@ def setup_args() -> argparse.Namespace:
|
|
| 421 |
default="block_quantized_model.onnx",
|
| 422 |
required=False,
|
| 423 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 424 |
|
| 425 |
return parser.parse_args()
|
| 426 |
|
|
@@ -433,6 +506,7 @@ if __name__ == "__main__":
|
|
| 433 |
output_model_path=args.output_model,
|
| 434 |
block_size=args.block_size,
|
| 435 |
bits=args.bits,
|
|
|
|
| 436 |
)
|
| 437 |
|
| 438 |
quantizer = BlockQuantizer(quantization_config)
|
|
|
|
| 8 |
import argparse
|
| 9 |
import os
|
| 10 |
from dataclasses import dataclass, field
|
| 11 |
+
from typing import Dict, Tuple
|
| 12 |
+
from enum import Enum, auto
|
| 13 |
|
| 14 |
import numpy as np
|
| 15 |
import onnx
|
|
|
|
| 23 |
ONNX_OPSET = 21
|
| 24 |
|
| 25 |
|
| 26 |
+
class WeightCategory(Enum):
|
| 27 |
+
INITIALIZER = auto()
|
| 28 |
+
CONSTANT = auto()
|
| 29 |
+
NONE = auto()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
@dataclass
|
| 33 |
class BlockQuantizeConfig:
|
| 34 |
input_model_path: str
|
| 35 |
output_model_path: str
|
| 36 |
block_size: int
|
| 37 |
bits: int
|
| 38 |
+
verbose: bool
|
| 39 |
|
| 40 |
|
| 41 |
@dataclass
|
|
|
|
| 83 |
y_scale_elementwise = np.repeat(scale, repeats=repeats, axis=block_axis)
|
| 84 |
y_zero_point_elementwise = np.repeat(zero_point, repeats=repeats, axis=block_axis)
|
| 85 |
|
| 86 |
+
type_info = np.iinfo(BITS_TO_NUMPY_TYPE[n_bits])
|
| 87 |
+
min_value = type_info.min
|
| 88 |
+
max_value = type_info.max
|
| 89 |
+
|
| 90 |
+
y = np.rint(x / y_scale_elementwise + y_zero_point_elementwise)
|
| 91 |
+
y = np.clip(y, min_value, max_value)
|
| 92 |
+
y = y.astype(BITS_TO_NUMPY_TYPE[n_bits])
|
| 93 |
|
| 94 |
return y
|
| 95 |
|
|
|
|
| 141 |
self.initializers_map = {
|
| 142 |
init.name: init for init in self.model.graph.initializer
|
| 143 |
}
|
| 144 |
+
self.costants_map = {
|
| 145 |
+
node.output[0]: next(
|
| 146 |
+
attr.t for attr in node.attribute if attr.name == "value"
|
| 147 |
+
)
|
| 148 |
+
for node in self.model.graph.node
|
| 149 |
+
if node.op_type == "Constant"
|
| 150 |
+
}
|
| 151 |
|
| 152 |
def validate_conf(self):
|
| 153 |
if not os.path.isfile(self.conf.input_model_path):
|
|
|
|
| 174 |
f"Bits must be one of the following values: [{allowed_values}]."
|
| 175 |
)
|
| 176 |
|
| 177 |
+
def get_weight_category(self, name: str) -> WeightCategory:
|
| 178 |
if name in self.initializers_map:
|
| 179 |
+
return WeightCategory.INITIALIZER
|
| 180 |
+
if name in self.costants_map:
|
| 181 |
+
return WeightCategory.CONSTANT
|
| 182 |
+
else:
|
| 183 |
+
return WeightCategory.NONE
|
| 184 |
+
|
| 185 |
+
def get_weight_tensor(self, name: str, category: WeightCategory) -> np.ndarray:
|
| 186 |
+
if category == WeightCategory.INITIALIZER:
|
| 187 |
return onnx.numpy_helper.to_array(self.initializers_map[name])
|
| 188 |
+
elif category == WeightCategory.CONSTANT:
|
| 189 |
+
return onnx.numpy_helper.to_array(self.costants_map[name])
|
| 190 |
+
else:
|
| 191 |
+
raise AssertionError("Invalid weight category")
|
| 192 |
|
| 193 |
+
def remove_fp32_weights(self, name: str, category: WeightCategory):
|
| 194 |
+
if category == WeightCategory.INITIALIZER:
|
| 195 |
+
self.graph.initializer.remove(
|
| 196 |
+
next(init for init in self.graph.initializer if init.name == name)
|
| 197 |
+
)
|
| 198 |
+
elif category == WeightCategory.CONSTANT:
|
| 199 |
+
self.graph.node.remove(
|
| 200 |
+
next(
|
| 201 |
+
node
|
| 202 |
+
for node in self.graph.node
|
| 203 |
+
if node.op_type == "Constant" and node.output[0] == name
|
| 204 |
+
)
|
| 205 |
+
)
|
| 206 |
+
else:
|
| 207 |
+
raise AssertionError("Invalid weight category")
|
| 208 |
|
| 209 |
def compute_scale_zeropoint(
|
| 210 |
self, b_min: np.ndarray, b_max: np.ndarray
|
| 211 |
) -> Tuple[np.ndarray, np.ndarray]:
|
| 212 |
assert (
|
| 213 |
+
b_min <= b_max
|
| 214 |
+
).all(), "minimum must not be greater than maximum when computing scale and zero point"
|
|
|
|
|
|
|
| 215 |
|
| 216 |
# zero must be present in the range, this enforces qmin <= zero_point <= qmax
|
| 217 |
b_min = np.minimum(b_min, np.zeros_like(b_min, dtype=b_min.dtype))
|
| 218 |
b_max = np.maximum(b_max, np.zeros_like(b_max, dtype=b_max.dtype))
|
| 219 |
|
| 220 |
+
type_info = np.iinfo(BITS_TO_NUMPY_TYPE[self.conf.bits])
|
| 221 |
+
qmin = type_info.min
|
| 222 |
+
qmax = type_info.max
|
| 223 |
|
| 224 |
dq = qmax - qmin
|
| 225 |
|
| 226 |
+
scales = np.where(b_max != b_min, (b_max - b_min) / dq, 1.0)
|
| 227 |
+
|
| 228 |
+
zeropoints = np.where(b_max != b_min, np.rint(qmin - b_min / scales), 0.0)
|
| 229 |
+
zeropoints = zeropoints.astype(BITS_TO_NUMPY_TYPE[self.conf.bits])
|
| 230 |
|
| 231 |
return (scales, zeropoints)
|
| 232 |
|
|
|
|
| 265 |
quantized_weight, quantization_axis, scales, zeropoints
|
| 266 |
)
|
| 267 |
|
| 268 |
+
# Relative Norm
|
| 269 |
+
qerror = np.linalg.norm(reconstructed_mat - weight) / (np.linalg.norm(weight) + 1e-10)
|
| 270 |
|
| 271 |
res = BlockQuantizeResult(
|
| 272 |
quantized_weight,
|
|
|
|
| 286 |
|
| 287 |
return size_mb
|
| 288 |
|
| 289 |
+
def display_summary(self, sqe: Dict[str, int]):
|
| 290 |
+
sqe_v = list(sqe.values())
|
| 291 |
+
if len(sqe_v) == 0:
|
| 292 |
+
mse = 0
|
| 293 |
+
print(
|
| 294 |
+
"Warning: No weights have been quantized, likely due to unsupported layers."
|
| 295 |
+
)
|
| 296 |
+
else:
|
| 297 |
+
mse = sum(sqe_v) / len(sqe_v)
|
| 298 |
original_model_size = self.get_model_size(self.conf.input_model_path)
|
| 299 |
quantized_model_size = self.get_model_size(self.conf.output_model_path)
|
| 300 |
|
| 301 |
+
if self.conf.verbose:
|
| 302 |
+
sorted_sqe = sorted(sqe.items(), key=lambda item: item[1], reverse=True)
|
| 303 |
+
longest_key_len = max(len(key) for key in sqe.keys())
|
| 304 |
+
|
| 305 |
+
print("Quantization error (Relative Norm) sorted in ascending order:")
|
| 306 |
+
|
| 307 |
+
for key, value in sorted_sqe:
|
| 308 |
+
print(f"{key:<{longest_key_len}} : {value}")
|
| 309 |
+
|
| 310 |
print("Done! Results saved in", self.conf.output_model_path)
|
| 311 |
print("\nSummary of Results:\n")
|
| 312 |
print(f"{'Metric':<30} {'Value':<10}")
|
| 313 |
print(f"{'-'*40}")
|
| 314 |
+
print(f"{'Relative Norm Error':<31} {mse:.6f}")
|
| 315 |
print(f"{'Original Model Size (KB)':<31} {original_model_size:,.2f}")
|
| 316 |
print(f"{'Block-Quantized Model Size (KB)':<30} {quantized_model_size:,.2f}")
|
| 317 |
|
|
|
|
| 319 |
print("Quantizing the model...")
|
| 320 |
|
| 321 |
quantized_inputs = []
|
| 322 |
+
sqe = {}
|
| 323 |
|
| 324 |
node_idx = 0
|
| 325 |
|
|
|
|
| 328 |
|
| 329 |
if node.op_type in SUPPORTED_OPS:
|
| 330 |
for input_idx, input_name in enumerate(node.input):
|
| 331 |
+
weightCategory = self.get_weight_category(input_name)
|
| 332 |
+
|
| 333 |
+
# Skip quantization if weights are taken as external input
|
| 334 |
+
if weightCategory == WeightCategory.NONE:
|
| 335 |
+
continue
|
| 336 |
+
|
| 337 |
+
weight = self.get_weight_tensor(input_name, weightCategory)
|
| 338 |
|
| 339 |
quantized_weights_name = f"{input_name}_quantized"
|
| 340 |
quantized_node_name = f"{input_name}_quantized_node"
|
|
|
|
| 346 |
shape_name = f"{input_name}_shape"
|
| 347 |
reshaped_weights_name = f"{input_name}_reshaped"
|
| 348 |
|
| 349 |
+
# Skip quantization if weights don't contain enough elements to create at least 1 block
|
| 350 |
+
if weight.size < self.conf.block_size:
|
|
|
|
| 351 |
continue
|
| 352 |
|
| 353 |
reshape_needed = weight.ndim > 2
|
|
|
|
| 361 |
)
|
| 362 |
continue
|
| 363 |
|
| 364 |
+
|
| 365 |
block_quantize_res = self.block_quantize(weight)
|
| 366 |
|
| 367 |
+
# Skip quantization if it wouldn't reduce the model size
|
| 368 |
+
if block_quantize_res.block_size == 1:
|
| 369 |
+
continue
|
| 370 |
+
|
| 371 |
+
quantized_inputs.append(input_name)
|
| 372 |
+
|
| 373 |
dequantize_node = create_dequantize_node(
|
| 374 |
quantized_node_name,
|
| 375 |
quantized_weights_name,
|
|
|
|
| 424 |
]
|
| 425 |
)
|
| 426 |
|
| 427 |
+
self.remove_fp32_weights(input_name, weightCategory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
node.input[input_idx] = (
|
| 430 |
reshaped_weights_name
|
|
|
|
| 439 |
|
| 440 |
self.graph.node.insert(0, dequantize_node)
|
| 441 |
node_idx += 1
|
| 442 |
+
if reshape_needed:
|
| 443 |
+
self.graph.value_info.insert(0, shape_info)
|
| 444 |
self.graph.value_info.insert(0, dequantized_weights_info)
|
| 445 |
|
| 446 |
+
sqe[input_name] = block_quantize_res.quantization_error
|
| 447 |
+
|
| 448 |
node_idx += 1
|
| 449 |
|
| 450 |
onnx.checker.check_model(self.model, full_check=True)
|
|
|
|
| 487 |
default="block_quantized_model.onnx",
|
| 488 |
required=False,
|
| 489 |
)
|
| 490 |
+
parser.add_argument(
|
| 491 |
+
"-v",
|
| 492 |
+
"--verbose",
|
| 493 |
+
action="store_true",
|
| 494 |
+
help="Enable verbose output",
|
| 495 |
+
required=False,
|
| 496 |
+
)
|
| 497 |
|
| 498 |
return parser.parse_args()
|
| 499 |
|
|
|
|
| 506 |
output_model_path=args.output_model,
|
| 507 |
block_size=args.block_size,
|
| 508 |
bits=args.bits,
|
| 509 |
+
verbose=args.verbose
|
| 510 |
)
|
| 511 |
|
| 512 |
quantizer = BlockQuantizer(quantization_config)
|