import cv2 import copy import re import torch import numpy as np import axengine as ort from pathlib import Path #from lib.datasets import letterbox from utils.general import ( non_max_suppression_face, scale_coords, scale_coords_landmarks, letterbox, ) def isListempty(inList): if isinstance(inList, list): # Is a list return all(map(isListempty, inList)) return False # Not a list class YoloDetector: def __init__( self, model_path='yolov5l-face.onnx', min_face=10, target_size=None, ): """ model_path: path to the .onnx model file. min_face : minimal face size in pixels. target_size : target size of smaller image axis (choose lower for faster work). e.g. 480, 720, 1080. None for original resolution. """ self._class_path = Path(__file__).parent.absolute() self.target_size = target_size self.min_face = min_face self.session = ort.InferenceSession(model_path) self.input_name = self.session.get_inputs()[0].name self.output_names = [x.name for x in self.session.get_outputs()] def _preprocess(self, imgs): """ Preprocessing image before passing through the network. Resize and conversion to torch tensor. """ pp_imgs = [] for img in imgs: h0, w0 = img.shape[:2] # orig hw if self.target_size: r = self.target_size / min(h0, w0) # resize image to img_size if r < 1: img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=cv2.INTER_LINEAR) #imgsz = check_img_size(max(img.shape[:2]), s=self.detector.stride.max()) # check img_size imgsz = (640, 640) img = letterbox(img, new_shape=imgsz)[0] pp_imgs.append(img) pp_imgs = np.array(pp_imgs) #pp_imgs = pp_imgs.transpose(0, 3, 1, 2) pp_imgs = pp_imgs.astype(np.float32) # uint8 to fp16/32 return pp_imgs def _postprocess(self, imgs, origimgs, pred, conf_thres, iou_thres): """ Postprocessing of raw pytorch model output. Returns: bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2. points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners). """ bboxes = [[] for _ in range(len(origimgs))] landmarks = [[] for _ in range(len(origimgs))] pred = non_max_suppression_face(pred, conf_thres, iou_thres) for image_id, origimg in enumerate(origimgs): img_shape = origimg.shape image_height, image_width = img_shape[:2] gn = torch.tensor(img_shape)[[1, 0, 1, 0]] # normalization gain whwh gn_lks = torch.tensor(img_shape)[[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]] # normalization gain landmarks det = pred[image_id].cpu() scale_coords(imgs[image_id].shape[0:], det[:, :4], img_shape).round() scale_coords_landmarks(imgs[image_id].shape[0:], det[:, 5:15], img_shape).round() for j in range(det.size()[0]): box = (det[j, :4].view(1, 4) / gn).view(-1).tolist() box = list( map(int, [box[0] * image_width, box[1] * image_height, box[2] * image_width, box[3] * image_height]) ) if box[3] - box[1] < self.min_face: continue lm = (det[j, 5:15].view(1, 10) / gn_lks).view(-1).tolist() lm = list(map(int, [i * image_width if j % 2 == 0 else i * image_height for j, i in enumerate(lm)])) lm = [lm[i : i + 2] for i in range(0, len(lm), 2)] bboxes[image_id].append(box) landmarks[image_id].append(lm) return bboxes, landmarks def detect_faces(self, imgs, conf_thres=0.7, iou_thres=0.5): """ Get bbox coordinates and keypoints of faces on original image. Params: imgs: image or list of images to detect faces on with BGR order (convert to RGB order for inference) conf_thres: confidence threshold for each prediction iou_thres: threshold for NMS (filter of intersecting bboxes) Returns: bboxes: list of arrays with 4 coordinates of bounding boxes with format x1,y1,x2,y2. points: list of arrays with coordinates of 5 facial keypoints (eyes, nose, lips corners). """ # Pass input images through face detector images = imgs if isinstance(imgs, list) else [imgs] images = [cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for img in images] origimgs = copy.deepcopy(images) images = self._preprocess(images) # process ONNX model pred = self.session.run(self.output_names, {self.input_name: images})[0] pred = torch.from_numpy(pred) # postprocess the output bboxes, points = self._postprocess(images, origimgs, pred, conf_thres, iou_thres) # return bboxes, points if not isListempty(points): bboxes = np.array(bboxes).reshape(-1,4) points = np.array(points).reshape(-1,10) padding = bboxes[:,0].reshape(-1,1) return np.concatenate((bboxes, padding, points), axis=1) else: return None def __call__(self, *args): return self.predict(*args)