Add options for demo scripts to select backend & targets (#43)
Browse files* add options for selecting backend & targets
* add eol
- models/face_detection_yunet/demo.py +19 -2
- models/face_detection_yunet/yunet.py +2 -1
- models/face_recognition_sface/demo.py +20 -3
- models/human_segmentation_pphumanseg/demo.py +17 -2
- models/human_segmentation_pphumanseg/pphumanseg.py +12 -4
- models/image_classification_ppresnet/demo.py +16 -1
- models/image_classification_ppresnet/ppresnet.py +13 -5
- models/person_reid_youtureid/demo.py +17 -2
- models/person_reid_youtureid/youtureid.py +14 -4
- models/text_detection_db/db.py +12 -4
- models/text_detection_db/demo.py +18 -1
- models/text_recognition_crnn/crnn.py +15 -5
- models/text_recognition_crnn/demo.py +19 -2
models/face_detection_yunet/demo.py
CHANGED
|
@@ -19,9 +19,23 @@ def str2bool(v):
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
|
| 23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 24 |
parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
|
|
|
|
|
|
|
| 25 |
parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
|
| 26 |
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
|
| 27 |
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
|
|
@@ -61,7 +75,9 @@ if __name__ == '__main__':
|
|
| 61 |
inputSize=[320, 320],
|
| 62 |
confThreshold=args.conf_threshold,
|
| 63 |
nmsThreshold=args.nms_threshold,
|
| 64 |
-
topK=args.top_k
|
|
|
|
|
|
|
| 65 |
|
| 66 |
# If input is an image
|
| 67 |
if args.input is not None:
|
|
@@ -117,4 +133,5 @@ if __name__ == '__main__':
|
|
| 117 |
# Visualize results in a new Window
|
| 118 |
cv.imshow('YuNet Demo', frame)
|
| 119 |
|
| 120 |
-
tm.reset()
|
|
|
|
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
| 22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 26 |
+
try:
|
| 27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 30 |
+
help_msg_targets += "; {:d}: NPU"
|
| 31 |
+
except:
|
| 32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 33 |
+
|
| 34 |
parser = argparse.ArgumentParser(description='YuNet: A Fast and Accurate CNN-based Face Detector (https://github.com/ShiqiYu/libfacedetection).')
|
| 35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 36 |
parser.add_argument('--model', '-m', type=str, default='face_detection_yunet_2021dec.onnx', help='Path to the model.')
|
| 37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 39 |
parser.add_argument('--conf_threshold', type=float, default=0.9, help='Filter out faces of confidence < conf_threshold.')
|
| 40 |
parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
|
| 41 |
parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
|
|
|
|
| 75 |
inputSize=[320, 320],
|
| 76 |
confThreshold=args.conf_threshold,
|
| 77 |
nmsThreshold=args.nms_threshold,
|
| 78 |
+
topK=args.top_k,
|
| 79 |
+
backendId=args.backend,
|
| 80 |
+
targetId=args.target)
|
| 81 |
|
| 82 |
# If input is an image
|
| 83 |
if args.input is not None:
|
|
|
|
| 133 |
# Visualize results in a new Window
|
| 134 |
cv.imshow('YuNet Demo', frame)
|
| 135 |
|
| 136 |
+
tm.reset()
|
| 137 |
+
|
models/face_detection_yunet/yunet.py
CHANGED
|
@@ -63,4 +63,5 @@ class YuNet:
|
|
| 63 |
def infer(self, image):
|
| 64 |
# Forward
|
| 65 |
faces = self._model.detect(image)
|
| 66 |
-
return faces[1]
|
|
|
|
|
|
| 63 |
def infer(self, image):
|
| 64 |
# Forward
|
| 65 |
faces = self._model.detect(image)
|
| 66 |
+
return faces[1]
|
| 67 |
+
|
models/face_recognition_sface/demo.py
CHANGED
|
@@ -23,11 +23,25 @@ def str2bool(v):
|
|
| 23 |
else:
|
| 24 |
raise NotImplementedError
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
parser = argparse.ArgumentParser(
|
| 27 |
description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
|
| 28 |
parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
|
| 29 |
parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
|
| 30 |
parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
|
|
|
|
|
|
|
| 31 |
parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
|
| 32 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
| 33 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
@@ -35,13 +49,15 @@ args = parser.parse_args()
|
|
| 35 |
|
| 36 |
if __name__ == '__main__':
|
| 37 |
# Instantiate SFace for face recognition
|
| 38 |
-
recognizer = SFace(modelPath=args.model, disType=args.dis_type)
|
| 39 |
# Instantiate YuNet for face detection
|
| 40 |
detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
|
| 41 |
inputSize=[320, 320],
|
| 42 |
confThreshold=0.9,
|
| 43 |
nmsThreshold=0.3,
|
| 44 |
-
topK=5000
|
|
|
|
|
|
|
| 45 |
|
| 46 |
img1 = cv.imread(args.input1)
|
| 47 |
img2 = cv.imread(args.input2)
|
|
@@ -56,4 +72,5 @@ if __name__ == '__main__':
|
|
| 56 |
|
| 57 |
# Match
|
| 58 |
result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
|
| 59 |
-
print('Result: {}.'.format('same identity' if result else 'different identities'))
|
|
|
|
|
|
| 23 |
else:
|
| 24 |
raise NotImplementedError
|
| 25 |
|
| 26 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 27 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 28 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 29 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 30 |
+
try:
|
| 31 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 32 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 33 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 34 |
+
help_msg_targets += "; {:d}: NPU"
|
| 35 |
+
except:
|
| 36 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 37 |
+
|
| 38 |
parser = argparse.ArgumentParser(
|
| 39 |
description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
|
| 40 |
parser.add_argument('--input1', '-i1', type=str, help='Path to the input image 1.')
|
| 41 |
parser.add_argument('--input2', '-i2', type=str, help='Path to the input image 2.')
|
| 42 |
parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx', help='Path to the model.')
|
| 43 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 44 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 45 |
parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0, help='Distance type. \'0\': cosine, \'1\': norm_l1.')
|
| 46 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
| 47 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
|
|
| 49 |
|
| 50 |
if __name__ == '__main__':
|
| 51 |
# Instantiate SFace for face recognition
|
| 52 |
+
recognizer = SFace(modelPath=args.model, disType=args.dis_type, backendId=args.backend, targetId=args.target)
|
| 53 |
# Instantiate YuNet for face detection
|
| 54 |
detector = YuNet(modelPath='../face_detection_yunet/face_detection_yunet_2021dec.onnx',
|
| 55 |
inputSize=[320, 320],
|
| 56 |
confThreshold=0.9,
|
| 57 |
nmsThreshold=0.3,
|
| 58 |
+
topK=5000,
|
| 59 |
+
backendId=args.backend,
|
| 60 |
+
targetId=args.target)
|
| 61 |
|
| 62 |
img1 = cv.imread(args.input1)
|
| 63 |
img2 = cv.imread(args.input2)
|
|
|
|
| 72 |
|
| 73 |
# Match
|
| 74 |
result = recognizer.match(img1, face1[0][:-1], img2, face2[0][:-1])
|
| 75 |
+
print('Result: {}.'.format('same identity' if result else 'different identities'))
|
| 76 |
+
|
models/human_segmentation_pphumanseg/demo.py
CHANGED
|
@@ -19,9 +19,23 @@ def str2bool(v):
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
|
| 23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 24 |
parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
|
|
|
|
|
|
|
| 25 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
| 26 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
| 27 |
args = parser.parse_args()
|
|
@@ -84,7 +98,7 @@ def visualize(image, result, weight=0.6, fps=None):
|
|
| 84 |
|
| 85 |
if __name__ == '__main__':
|
| 86 |
# Instantiate PPHumanSeg
|
| 87 |
-
model = PPHumanSeg(modelPath=args.model)
|
| 88 |
|
| 89 |
if args.input is not None:
|
| 90 |
# Read image and resize to 192x192
|
|
@@ -138,4 +152,5 @@ if __name__ == '__main__':
|
|
| 138 |
# Visualize results in a new window
|
| 139 |
cv.imshow('PPHumanSeg Demo', frame)
|
| 140 |
|
| 141 |
-
tm.reset()
|
|
|
|
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
| 22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 26 |
+
try:
|
| 27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 30 |
+
help_msg_targets += "; {:d}: NPU"
|
| 31 |
+
except:
|
| 32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 33 |
+
|
| 34 |
parser = argparse.ArgumentParser(description='PPHumanSeg (https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/PP-HumanSeg)')
|
| 35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 36 |
parser.add_argument('--model', '-m', type=str, default='human_segmentation_pphumanseg_2021oct.onnx', help='Path to the model.')
|
| 37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 39 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
| 40 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
| 41 |
args = parser.parse_args()
|
|
|
|
| 98 |
|
| 99 |
if __name__ == '__main__':
|
| 100 |
# Instantiate PPHumanSeg
|
| 101 |
+
model = PPHumanSeg(modelPath=args.model, backendId=args.backend, targetId=args.target)
|
| 102 |
|
| 103 |
if args.input is not None:
|
| 104 |
# Read image and resize to 192x192
|
|
|
|
| 152 |
# Visualize results in a new window
|
| 153 |
cv.imshow('PPHumanSeg Demo', frame)
|
| 154 |
|
| 155 |
+
tm.reset()
|
| 156 |
+
|
models/human_segmentation_pphumanseg/pphumanseg.py
CHANGED
|
@@ -8,9 +8,14 @@ import numpy as np
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class PPHumanSeg:
|
| 11 |
-
def __init__(self, modelPath):
|
| 12 |
self._modelPath = modelPath
|
|
|
|
|
|
|
|
|
|
| 13 |
self._model = cv.dnn.readNet(self._modelPath)
|
|
|
|
|
|
|
| 14 |
|
| 15 |
self._inputNames = ''
|
| 16 |
self._outputNames = ['save_infer_model/scale_0.tmp_1']
|
|
@@ -23,10 +28,12 @@ class PPHumanSeg:
|
|
| 23 |
return self.__class__.__name__
|
| 24 |
|
| 25 |
def setBackend(self, backend_id):
|
| 26 |
-
self.
|
|
|
|
| 27 |
|
| 28 |
def setTarget(self, target_id):
|
| 29 |
-
self.
|
|
|
|
| 30 |
|
| 31 |
def _preprocess(self, image):
|
| 32 |
image = image.astype(np.float32, copy=False) / 255.0
|
|
@@ -52,4 +59,5 @@ class PPHumanSeg:
|
|
| 52 |
|
| 53 |
def _postprocess(self, outputBlob):
|
| 54 |
result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
|
| 55 |
-
return result
|
|
|
|
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class PPHumanSeg:
|
| 11 |
+
def __init__(self, modelPath, backendId=0, targetId=0):
|
| 12 |
self._modelPath = modelPath
|
| 13 |
+
self._backendId = backendId
|
| 14 |
+
self._targetId = targetId
|
| 15 |
+
|
| 16 |
self._model = cv.dnn.readNet(self._modelPath)
|
| 17 |
+
self._model.setPreferableBackend(self._backendId)
|
| 18 |
+
self._model.setPreferableTarget(self._targetId)
|
| 19 |
|
| 20 |
self._inputNames = ''
|
| 21 |
self._outputNames = ['save_infer_model/scale_0.tmp_1']
|
|
|
|
| 28 |
return self.__class__.__name__
|
| 29 |
|
| 30 |
def setBackend(self, backend_id):
|
| 31 |
+
self._backendId = backend_id
|
| 32 |
+
self._model.setPreferableBackend(self._backendId)
|
| 33 |
|
| 34 |
def setTarget(self, target_id):
|
| 35 |
+
self._targetId = target_id
|
| 36 |
+
self._model.setPreferableTarget(self._targetId)
|
| 37 |
|
| 38 |
def _preprocess(self, image):
|
| 39 |
image = image.astype(np.float32, copy=False) / 255.0
|
|
|
|
| 59 |
|
| 60 |
def _postprocess(self, outputBlob):
|
| 61 |
result = np.argmax(outputBlob[0], axis=1).astype(np.uint8)
|
| 62 |
+
return result
|
| 63 |
+
|
models/image_classification_ppresnet/demo.py
CHANGED
|
@@ -19,15 +19,29 @@ def str2bool(v):
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
|
| 23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
|
| 24 |
parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
|
|
|
|
|
|
|
| 25 |
parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
|
| 26 |
args = parser.parse_args()
|
| 27 |
|
| 28 |
if __name__ == '__main__':
|
| 29 |
# Instantiate ResNet
|
| 30 |
-
model = PPResNet(modelPath=args.model, labelPath=args.label)
|
| 31 |
|
| 32 |
# Read image and get a 224x224 crop from a 256x256 resized
|
| 33 |
image = cv.imread(args.input)
|
|
@@ -40,3 +54,4 @@ if __name__ == '__main__':
|
|
| 40 |
|
| 41 |
# Print result
|
| 42 |
print('label: {}'.format(result))
|
|
|
|
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
| 22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 26 |
+
try:
|
| 27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 30 |
+
help_msg_targets += "; {:d}: NPU"
|
| 31 |
+
except:
|
| 32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 33 |
+
|
| 34 |
parser = argparse.ArgumentParser(description='Deep Residual Learning for Image Recognition (https://arxiv.org/abs/1512.03385, https://github.com/PaddlePaddle/PaddleHub)')
|
| 35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
|
| 36 |
parser.add_argument('--model', '-m', type=str, default='image_classification_ppresnet50_2022jan.onnx', help='Path to the model.')
|
| 37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 39 |
parser.add_argument('--label', '-l', type=str, default='./imagenet_labels.txt', help='Path to the dataset labels.')
|
| 40 |
args = parser.parse_args()
|
| 41 |
|
| 42 |
if __name__ == '__main__':
|
| 43 |
# Instantiate ResNet
|
| 44 |
+
model = PPResNet(modelPath=args.model, labelPath=args.label, backendId=args.backend, targetId=args.target)
|
| 45 |
|
| 46 |
# Read image and get a 224x224 crop from a 256x256 resized
|
| 47 |
image = cv.imread(args.input)
|
|
|
|
| 54 |
|
| 55 |
# Print result
|
| 56 |
print('label: {}'.format(result))
|
| 57 |
+
|
models/image_classification_ppresnet/ppresnet.py
CHANGED
|
@@ -9,10 +9,15 @@ import numpy as np
|
|
| 9 |
import cv2 as cv
|
| 10 |
|
| 11 |
class PPResNet:
|
| 12 |
-
def __init__(self, modelPath, labelPath):
|
| 13 |
self._modelPath = modelPath
|
| 14 |
-
self._model = cv.dnn.readNet(self._modelPath)
|
| 15 |
self._labelPath = labelPath
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
self._inputNames = ''
|
| 18 |
self._outputNames = ['save_infer_model/scale_0.tmp_0']
|
|
@@ -35,10 +40,12 @@ class PPResNet:
|
|
| 35 |
return self.__class__.__name__
|
| 36 |
|
| 37 |
def setBackend(self, backend_id):
|
| 38 |
-
self.
|
|
|
|
| 39 |
|
| 40 |
def setTarget(self, target_id):
|
| 41 |
-
self.
|
|
|
|
| 42 |
|
| 43 |
def _preprocess(self, image):
|
| 44 |
image = image.astype(np.float32, copy=False) / 255.0
|
|
@@ -64,4 +71,5 @@ class PPResNet:
|
|
| 64 |
|
| 65 |
def _postprocess(self, outputBlob):
|
| 66 |
class_id = np.argmax(outputBlob[0])
|
| 67 |
-
return self._labels[class_id]
|
|
|
|
|
|
| 9 |
import cv2 as cv
|
| 10 |
|
| 11 |
class PPResNet:
|
| 12 |
+
def __init__(self, modelPath, labelPath, backendId=0, targetId=0):
|
| 13 |
self._modelPath = modelPath
|
|
|
|
| 14 |
self._labelPath = labelPath
|
| 15 |
+
self._backendId = backendId
|
| 16 |
+
self._targetId = targetId
|
| 17 |
+
|
| 18 |
+
self._model = cv.dnn.readNet(self._modelPath)
|
| 19 |
+
self._model.setPreferableBackend(self._backendId)
|
| 20 |
+
self._model.setPreferableTarget(self._targetId)
|
| 21 |
|
| 22 |
self._inputNames = ''
|
| 23 |
self._outputNames = ['save_infer_model/scale_0.tmp_0']
|
|
|
|
| 40 |
return self.__class__.__name__
|
| 41 |
|
| 42 |
def setBackend(self, backend_id):
|
| 43 |
+
self._backendId = backend_id
|
| 44 |
+
self._model.setPreferableBackend(self._backendId)
|
| 45 |
|
| 46 |
def setTarget(self, target_id):
|
| 47 |
+
self._targetId = target_id
|
| 48 |
+
self._model.setPreferableTarget(self._targetId)
|
| 49 |
|
| 50 |
def _preprocess(self, image):
|
| 51 |
image = image.astype(np.float32, copy=False) / 255.0
|
|
|
|
| 71 |
|
| 72 |
def _postprocess(self, outputBlob):
|
| 73 |
class_id = np.argmax(outputBlob[0])
|
| 74 |
+
return self._labels[class_id]
|
| 75 |
+
|
models/person_reid_youtureid/demo.py
CHANGED
|
@@ -20,10 +20,24 @@ def str2bool(v):
|
|
| 20 |
else:
|
| 21 |
raise NotImplementedError
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
parser = argparse.ArgumentParser(
|
| 24 |
description="ReID baseline models from Tencent Youtu Lab")
|
| 25 |
parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
|
| 26 |
parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
|
|
|
|
|
|
|
| 27 |
parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
|
| 28 |
parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
|
| 29 |
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
|
@@ -65,7 +79,7 @@ def visualize(results, query_dir, gallery_dir, output_size=(128, 384)):
|
|
| 65 |
|
| 66 |
if __name__ == '__main__':
|
| 67 |
# Instantiate YoutuReID for person ReID
|
| 68 |
-
net = YoutuReID(modelPath=args.model)
|
| 69 |
|
| 70 |
# Read images from dir
|
| 71 |
query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
|
|
@@ -97,4 +111,5 @@ if __name__ == '__main__':
|
|
| 97 |
cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
|
| 98 |
cv.imshow('result-{}'.format(f), img)
|
| 99 |
cv.waitKey(0)
|
| 100 |
-
cv.destroyAllWindows()
|
|
|
|
|
|
| 20 |
else:
|
| 21 |
raise NotImplementedError
|
| 22 |
|
| 23 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 24 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 25 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 26 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 27 |
+
try:
|
| 28 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 29 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 30 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 31 |
+
help_msg_targets += "; {:d}: NPU"
|
| 32 |
+
except:
|
| 33 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 34 |
+
|
| 35 |
parser = argparse.ArgumentParser(
|
| 36 |
description="ReID baseline models from Tencent Youtu Lab")
|
| 37 |
parser.add_argument('--query_dir', '-q', type=str, help='Query directory.')
|
| 38 |
parser.add_argument('--gallery_dir', '-g', type=str, help='Gallery directory.')
|
| 39 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 40 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 41 |
parser.add_argument('--topk', type=int, default=10, help='Top-K closest from gallery for each query.')
|
| 42 |
parser.add_argument('--model', '-m', type=str, default='person_reid_youtu_2021nov.onnx', help='Path to the model.')
|
| 43 |
parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
|
|
|
| 79 |
|
| 80 |
if __name__ == '__main__':
|
| 81 |
# Instantiate YoutuReID for person ReID
|
| 82 |
+
net = YoutuReID(modelPath=args.model, backendId=args.backend, targetId=args.target)
|
| 83 |
|
| 84 |
# Read images from dir
|
| 85 |
query_img_list, query_file_list = readImageFromDirectory(args.query_dir)
|
|
|
|
| 111 |
cv.namedWindow('result-{}'.format(f), cv.WINDOW_AUTOSIZE)
|
| 112 |
cv.imshow('result-{}'.format(f), img)
|
| 113 |
cv.waitKey(0)
|
| 114 |
+
cv.destroyAllWindows()
|
| 115 |
+
|
models/person_reid_youtureid/youtureid.py
CHANGED
|
@@ -8,8 +8,15 @@ import numpy as np
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class YoutuReID:
|
| 11 |
-
def __init__(self, modelPath):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
self._model = cv.dnn.readNet(modelPath)
|
|
|
|
|
|
|
|
|
|
| 13 |
self._input_size = (128, 256) # fixed
|
| 14 |
self._output_dim = 768
|
| 15 |
self._mean = (0.485, 0.456, 0.406)
|
|
@@ -20,10 +27,12 @@ class YoutuReID:
|
|
| 20 |
return self.__class__.__name__
|
| 21 |
|
| 22 |
def setBackend(self, backend_id):
|
| 23 |
-
self.
|
|
|
|
| 24 |
|
| 25 |
def setTarget(self, target_id):
|
| 26 |
-
self.
|
|
|
|
| 27 |
|
| 28 |
def _preprocess(self, image):
|
| 29 |
image = image[:, :, ::-1]
|
|
@@ -57,4 +66,5 @@ class YoutuReID:
|
|
| 57 |
|
| 58 |
dist = np.matmul(query_arr, gallery_arr.T)
|
| 59 |
idx = np.argsort(-dist, axis=1)
|
| 60 |
-
return [i[0:topK] for i in idx]
|
|
|
|
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class YoutuReID:
|
| 11 |
+
def __init__(self, modelPath, backendId=0, targetId=0):
|
| 12 |
+
self._modelPath = modelPath
|
| 13 |
+
self._backendId = backendId
|
| 14 |
+
self._targetId = targetId
|
| 15 |
+
|
| 16 |
self._model = cv.dnn.readNet(modelPath)
|
| 17 |
+
self._model.setPreferableBackend(self._backendId)
|
| 18 |
+
self._model.setPreferableTarget(self._targetId)
|
| 19 |
+
|
| 20 |
self._input_size = (128, 256) # fixed
|
| 21 |
self._output_dim = 768
|
| 22 |
self._mean = (0.485, 0.456, 0.406)
|
|
|
|
| 27 |
return self.__class__.__name__
|
| 28 |
|
| 29 |
def setBackend(self, backend_id):
|
| 30 |
+
self._backendId = backend_id
|
| 31 |
+
self._model.setPreferableBackend(self._backendId)
|
| 32 |
|
| 33 |
def setTarget(self, target_id):
|
| 34 |
+
self._targetId = target_id
|
| 35 |
+
self._model.setPreferableTarget(self._targetId)
|
| 36 |
|
| 37 |
def _preprocess(self, image):
|
| 38 |
image = image[:, :, ::-1]
|
|
|
|
| 66 |
|
| 67 |
dist = np.matmul(query_arr, gallery_arr.T)
|
| 68 |
idx = np.argsort(-dist, axis=1)
|
| 69 |
+
return [i[0:topK] for i in idx]
|
| 70 |
+
|
models/text_detection_db/db.py
CHANGED
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class DB:
|
| 11 |
-
def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0):
|
| 12 |
self._modelPath = modelPath
|
| 13 |
self._model = cv.dnn_TextDetectionModel_DB(
|
| 14 |
cv.dnn.readNet(self._modelPath)
|
|
@@ -21,6 +21,11 @@ class DB:
|
|
| 21 |
self._polygonThreshold = polygonThreshold
|
| 22 |
self._maxCandidates = maxCandidates
|
| 23 |
self._unclipRatio = unclipRatio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
self._model.setBinaryThreshold(self._binaryThreshold)
|
| 26 |
self._model.setPolygonThreshold(self._polygonThreshold)
|
|
@@ -34,10 +39,12 @@ class DB:
|
|
| 34 |
return self.__class__.__name__
|
| 35 |
|
| 36 |
def setBackend(self, backend):
|
| 37 |
-
self.
|
|
|
|
| 38 |
|
| 39 |
def setTarget(self, target):
|
| 40 |
-
self.
|
|
|
|
| 41 |
|
| 42 |
def setInputSize(self, input_size):
|
| 43 |
self._inputSize = tuple(input_size)
|
|
@@ -47,4 +54,5 @@ class DB:
|
|
| 47 |
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
|
| 48 |
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
| 49 |
|
| 50 |
-
return self._model.detect(image)
|
|
|
|
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class DB:
|
| 11 |
+
def __init__(self, modelPath, inputSize=[736, 736], binaryThreshold=0.3, polygonThreshold=0.5, maxCandidates=200, unclipRatio=2.0, backendId=0, targetId=0):
|
| 12 |
self._modelPath = modelPath
|
| 13 |
self._model = cv.dnn_TextDetectionModel_DB(
|
| 14 |
cv.dnn.readNet(self._modelPath)
|
|
|
|
| 21 |
self._polygonThreshold = polygonThreshold
|
| 22 |
self._maxCandidates = maxCandidates
|
| 23 |
self._unclipRatio = unclipRatio
|
| 24 |
+
self._backendId = backendId
|
| 25 |
+
self._targetId = targetId
|
| 26 |
+
|
| 27 |
+
self._model.setPreferableBackend(self._backendId)
|
| 28 |
+
self._model.setPreferableTarget(self._targetId)
|
| 29 |
|
| 30 |
self._model.setBinaryThreshold(self._binaryThreshold)
|
| 31 |
self._model.setPolygonThreshold(self._polygonThreshold)
|
|
|
|
| 39 |
return self.__class__.__name__
|
| 40 |
|
| 41 |
def setBackend(self, backend):
|
| 42 |
+
self._backendId = backend
|
| 43 |
+
self._model.setPreferableBackend(self._backendId)
|
| 44 |
|
| 45 |
def setTarget(self, target):
|
| 46 |
+
self._targetId = target
|
| 47 |
+
self._model.setPreferableTarget(self._targetId)
|
| 48 |
|
| 49 |
def setInputSize(self, input_size):
|
| 50 |
self._inputSize = tuple(input_size)
|
|
|
|
| 54 |
assert image.shape[0] == self._inputSize[1], '{} (height of input image) != {} (preset height)'.format(image.shape[0], self._inputSize[1])
|
| 55 |
assert image.shape[1] == self._inputSize[0], '{} (width of input image) != {} (preset width)'.format(image.shape[1], self._inputSize[0])
|
| 56 |
|
| 57 |
+
return self._model.detect(image)
|
| 58 |
+
|
models/text_detection_db/demo.py
CHANGED
|
@@ -19,9 +19,23 @@ def str2bool(v):
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
|
| 23 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 24 |
parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
|
|
|
|
|
|
|
| 25 |
parser.add_argument('--width', type=int, default=736,
|
| 26 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
| 27 |
parser.add_argument('--height', type=int, default=736,
|
|
@@ -53,6 +67,8 @@ if __name__ == '__main__':
|
|
| 53 |
polygonThreshold=args.polygon_threshold,
|
| 54 |
maxCandidates=args.max_candidates,
|
| 55 |
unclipRatio=args.unclip_ratio
|
|
|
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
# If input is an image
|
|
@@ -104,4 +120,5 @@ if __name__ == '__main__':
|
|
| 104 |
# Visualize results in a new Window
|
| 105 |
cv.imshow('{} Demo'.format(model.name), frame)
|
| 106 |
|
| 107 |
-
tm.reset()
|
|
|
|
|
|
| 19 |
else:
|
| 20 |
raise NotImplementedError
|
| 21 |
|
| 22 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 23 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 24 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 25 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 26 |
+
try:
|
| 27 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 28 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 29 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 30 |
+
help_msg_targets += "; {:d}: NPU"
|
| 31 |
+
except:
|
| 32 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 33 |
+
|
| 34 |
parser = argparse.ArgumentParser(description='Real-time Scene Text Detection with Differentiable Binarization (https://arxiv.org/abs/1911.08947).')
|
| 35 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 36 |
parser.add_argument('--model', '-m', type=str, default='text_detection_DB_TD500_resnet18_2021sep.onnx', help='Path to the model.')
|
| 37 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 38 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 39 |
parser.add_argument('--width', type=int, default=736,
|
| 40 |
help='Preprocess input image by resizing to a specific width. It should be multiple by 32.')
|
| 41 |
parser.add_argument('--height', type=int, default=736,
|
|
|
|
| 67 |
polygonThreshold=args.polygon_threshold,
|
| 68 |
maxCandidates=args.max_candidates,
|
| 69 |
unclipRatio=args.unclip_ratio
|
| 70 |
+
backendId=args.backend,
|
| 71 |
+
targetId=args.target
|
| 72 |
)
|
| 73 |
|
| 74 |
# If input is an image
|
|
|
|
| 120 |
# Visualize results in a new Window
|
| 121 |
cv.imshow('{} Demo'.format(model.name), frame)
|
| 122 |
|
| 123 |
+
tm.reset()
|
| 124 |
+
|
models/text_recognition_crnn/crnn.py
CHANGED
|
@@ -8,10 +8,17 @@ import numpy as np
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class CRNN:
|
| 11 |
-
def __init__(self, modelPath, charsetPath):
|
| 12 |
self._model_path = modelPath
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
self._model = cv.dnn.readNet(self._model_path)
|
| 14 |
-
self.
|
|
|
|
|
|
|
|
|
|
| 15 |
self._inputSize = [100, 32] # Fixed
|
| 16 |
self._targetVertices = np.array([
|
| 17 |
[0, self._inputSize[1] - 1],
|
|
@@ -33,10 +40,12 @@ class CRNN:
|
|
| 33 |
return charset
|
| 34 |
|
| 35 |
def setBackend(self, backend_id):
|
| 36 |
-
self.
|
|
|
|
| 37 |
|
| 38 |
def setTarget(self, target_id):
|
| 39 |
-
self.
|
|
|
|
| 40 |
|
| 41 |
def _preprocess(self, image, rbbox):
|
| 42 |
# Remove conf, reshape and ensure all is np.float32
|
|
@@ -81,4 +90,5 @@ class CRNN:
|
|
| 81 |
for i in range(len(text)):
|
| 82 |
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
|
| 83 |
char_list.append(text[i])
|
| 84 |
-
return ''.join(char_list)
|
|
|
|
|
|
| 8 |
import cv2 as cv
|
| 9 |
|
| 10 |
class CRNN:
|
| 11 |
+
def __init__(self, modelPath, charsetPath, backendId=0, targetId=0):
|
| 12 |
self._model_path = modelPath
|
| 13 |
+
self._charsetPath = charsetPath
|
| 14 |
+
self._backendId = backendId
|
| 15 |
+
self._targetId = targetId
|
| 16 |
+
|
| 17 |
self._model = cv.dnn.readNet(self._model_path)
|
| 18 |
+
self._model.setPreferableBackend(self._backendId)
|
| 19 |
+
self._model.setPreferableTarget(self._targetId)
|
| 20 |
+
|
| 21 |
+
self._charset = self._load_charset(self._charsetPath)
|
| 22 |
self._inputSize = [100, 32] # Fixed
|
| 23 |
self._targetVertices = np.array([
|
| 24 |
[0, self._inputSize[1] - 1],
|
|
|
|
| 40 |
return charset
|
| 41 |
|
| 42 |
def setBackend(self, backend_id):
|
| 43 |
+
self._backendId = backend_id
|
| 44 |
+
self._model.setPreferableBackend(self._backendId)
|
| 45 |
|
| 46 |
def setTarget(self, target_id):
|
| 47 |
+
self._targetId = target_id
|
| 48 |
+
self._model.setPreferableTarget(self._targetId)
|
| 49 |
|
| 50 |
def _preprocess(self, image, rbbox):
|
| 51 |
# Remove conf, reshape and ensure all is np.float32
|
|
|
|
| 90 |
for i in range(len(text)):
|
| 91 |
if text[i] != '-' and (not (i > 0 and text[i] == text[i - 1])):
|
| 92 |
char_list.append(text[i])
|
| 93 |
+
return ''.join(char_list)
|
| 94 |
+
|
models/text_recognition_crnn/demo.py
CHANGED
|
@@ -23,10 +23,24 @@ def str2bool(v):
|
|
| 23 |
else:
|
| 24 |
raise NotImplementedError
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
parser = argparse.ArgumentParser(
|
| 27 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
| 28 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 29 |
parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
|
|
|
|
|
|
|
| 30 |
parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
|
| 31 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
| 32 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
@@ -50,7 +64,9 @@ if __name__ == '__main__':
|
|
| 50 |
binaryThreshold=0.3,
|
| 51 |
polygonThreshold=0.5,
|
| 52 |
maxCandidates=200,
|
| 53 |
-
unclipRatio=2.0
|
|
|
|
|
|
|
| 54 |
)
|
| 55 |
|
| 56 |
# If input is an image
|
|
@@ -118,4 +134,5 @@ if __name__ == '__main__':
|
|
| 118 |
print(texts)
|
| 119 |
|
| 120 |
# Visualize results in a new Window
|
| 121 |
-
cv.imshow('{} Demo'.format(recognizer.name), frame)
|
|
|
|
|
|
| 23 |
else:
|
| 24 |
raise NotImplementedError
|
| 25 |
|
| 26 |
+
backends = [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_BACKEND_CUDA]
|
| 27 |
+
targets = [cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16]
|
| 28 |
+
help_msg_backends = "Choose one of the computation backends: {:d}: OpenCV implementation (default); {:d}: CUDA"
|
| 29 |
+
help_msg_targets = "Chose one of the target computation devices: {:d}: CPU (default); {:d}: CUDA; {:d}: CUDA fp16"
|
| 30 |
+
try:
|
| 31 |
+
backends += [cv.dnn.DNN_BACKEND_TIMVX]
|
| 32 |
+
targets += [cv.dnn.DNN_TARGET_NPU]
|
| 33 |
+
help_msg_backends += "; {:d}: TIMVX"
|
| 34 |
+
help_msg_targets += "; {:d}: NPU"
|
| 35 |
+
except:
|
| 36 |
+
print('This version of OpenCV does not support TIM-VX and NPU. Visit https://gist.github.com/fengyuentau/5a7a5ba36328f2b763aea026c43fa45f for more information.')
|
| 37 |
+
|
| 38 |
parser = argparse.ArgumentParser(
|
| 39 |
description="An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition (https://arxiv.org/abs/1507.05717)")
|
| 40 |
parser.add_argument('--input', '-i', type=str, help='Path to the input image. Omit for using default camera.')
|
| 41 |
parser.add_argument('--model', '-m', type=str, default='text_recognition_CRNN_EN_2021sep.onnx', help='Path to the model.')
|
| 42 |
+
parser.add_argument('--backend', '-b', type=int, default=backends[0], help=help_msg_backends.format(*backends))
|
| 43 |
+
parser.add_argument('--target', '-t', type=int, default=targets[0], help=help_msg_targets.format(*targets))
|
| 44 |
parser.add_argument('--charset', '-c', type=str, default='charset_36_EN.txt', help='Path to the charset file corresponding to the selected model.')
|
| 45 |
parser.add_argument('--save', '-s', type=str, default=False, help='Set true to save results. This flag is invalid when using camera.')
|
| 46 |
parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
|
|
|
|
| 64 |
binaryThreshold=0.3,
|
| 65 |
polygonThreshold=0.5,
|
| 66 |
maxCandidates=200,
|
| 67 |
+
unclipRatio=2.0,
|
| 68 |
+
backendId=args.backend,
|
| 69 |
+
targetId=args.target
|
| 70 |
)
|
| 71 |
|
| 72 |
# If input is an image
|
|
|
|
| 134 |
print(texts)
|
| 135 |
|
| 136 |
# Visualize results in a new Window
|
| 137 |
+
cv.imshow('{} Demo'.format(recognizer.name), frame)
|
| 138 |
+
|