|
|
import cv2
|
|
|
import copy
|
|
|
import re
|
|
|
import torch
|
|
|
import numpy as np
|
|
|
import axengine as ort
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from utils.general import (
|
|
|
non_max_suppression_face,
|
|
|
scale_coords,
|
|
|
scale_coords_landmarks,
|
|
|
letterbox,
|
|
|
)
|
|
|
|
|
|
|
|
|
def isListempty(inList):
|
|
|
if isinstance(inList, list):
|
|
|
return all(map(isListempty, inList))
|
|
|
return False
|
|
|
|
|
|
class YoloDetector:
|
|
|
def __init__(
|
|
|
self,
|
|
|
model_path='yolov5l-face.onnx',
|
|
|
min_face=10,
|
|
|
target_size=None,
|
|
|
):
|
|
|
"""
|
|
|
model_path: path to the .onnx model file.
|
|
|
min_face : minimal face size in pixels.
|
|
|
target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080.
|
|
|
None for original resolution.
|
|
|
"""
|
|
|
self._class_path = Path(__file__).parent.absolute()
|
|
|
self.target_size = target_size
|
|
|
self.min_face = min_face
|
|
|
self.session = ort.InferenceSession(model_path)
|
|
|
self.input_name = self.session.get_inputs()[0].name
|
|
|
self.output_names = [x.name for x in self.session.get_outputs()]
|
|
|
|
|
|
|
|
|
def _preprocess(self, imgs):
|
|
|
"""
|
|
|
Preprocessing image before passing through the network. Resize and conversion to torch tensor.
|
|
|
"""
|
|
|
pp_imgs = []
|
|
|
for img in imgs:
|
|
|
h0, w0 = img.shape[:2]
|
|
|
if self.target_size:
|
|
|
r = self.target_size / min(h0, w0)
|
|
|
if r < 1:
|
|
|
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_LINEAR)
|
|
|
|
|
|
|
|
|
imgsz = (640, 640)
|
|
|
img = letterbox(img, new_shape=imgsz)[0]
|
|
|
pp_imgs.append(img)
|
|
|
pp_imgs = np.array(pp_imgs)
|
|
|
|
|
|
|
|
|
pp_imgs = pp_imgs.astype(np.float32)
|
|
|
return pp_imgs
|
|
|
|
|
|
def _postprocess(self, imgs, origimgs, pred, conf_thres, iou_thres):
|
|
|
"""
|
|
|
Postprocessing of raw pytorch model output.
|
|
|
Returns:
|
|
|
bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
|
|
|
points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
|
|
|
"""
|
|
|
bboxes = [[] for _ in range(len(origimgs))]
|
|
|
landmarks = [[] for _ in range(len(origimgs))]
|
|
|
|
|
|
pred = non_max_suppression_face(pred, conf_thres, iou_thres)
|
|
|
|
|
|
for image_id, origimg in enumerate(origimgs):
|
|
|
img_shape = origimg.shape
|
|
|
image_height, image_width = img_shape[:2]
|
|
|
gn = torch.tensor(img_shape)[[1, 0, 1, 0]]
|
|
|
gn_lks = torch.tensor(img_shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]]
|
|
|
det = pred[image_id].cpu()
|
|
|
scale_coords(imgs[image_id].shape[0:], det[:, :4], img_shape).round()
|
|
|
scale_coords_landmarks(imgs[image_id].shape[0:], det[:, 5:15], img_shape).round()
|
|
|
|
|
|
for j in range(det.size()[0]):
|
|
|
box = (det[j, :4].view(1, 4) / gn).view(-1).tolist()
|
|
|
box = list(
|
|
|
map(int, [box[0] * image_width, box[1] * image_height, box[2] * image_width, box[3] * image_height])
|
|
|
)
|
|
|
if box[3] - box[1] < self.min_face:
|
|
|
continue
|
|
|
lm = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist()
|
|
|
lm = list(map(int, [i * image_width if j % 2 == 0 else i * image_height for j, i in enumerate(lm)]))
|
|
|
lm = [lm[i : i + 2] for i in range(0, len(lm), 2)]
|
|
|
bboxes[image_id].append(box)
|
|
|
landmarks[image_id].append(lm)
|
|
|
return bboxes, landmarks
|
|
|
|
|
|
def detect_faces(self, imgs, conf_thres=0.7, iou_thres=0.5):
|
|
|
"""
|
|
|
Get bbox coordinates and keypoints of faces on original image.
|
|
|
Params:
|
|
|
imgs: image or list of images to detect faces on with BGR order (convert to RGB order for inference)
|
|
|
conf_thres: confidence threshold for each prediction
|
|
|
iou_thres: threshold for NMS (filter of intersecting bboxes)
|
|
|
Returns:
|
|
|
bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2.
|
|
|
points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners).
|
|
|
"""
|
|
|
|
|
|
images = imgs if isinstance(imgs, list) else [imgs]
|
|
|
images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images]
|
|
|
origimgs = copy.deepcopy(images)
|
|
|
|
|
|
images = self._preprocess(images)
|
|
|
|
|
|
|
|
|
pred = self.session.run(self.output_names, {self.input_name: images})[0]
|
|
|
pred = torch.from_numpy(pred)
|
|
|
|
|
|
|
|
|
bboxes, points = self._postprocess(images, origimgs, pred, conf_thres, iou_thres)
|
|
|
|
|
|
|
|
|
if not isListempty(points):
|
|
|
bboxes = np.array(bboxes).reshape(-1,4)
|
|
|
points = np.array(points).reshape(-1,10)
|
|
|
padding = bboxes[:,0].reshape(-1,1)
|
|
|
return np.concatenate((bboxes, padding, points), axis=1)
|
|
|
else:
|
|
|
return None
|
|
|
|
|
|
def __call__(self, *args):
|
|
|
return self.predict(*args)
|
|
|
|