opencv
/

handpose_estimation_mediapipe

ONNX

Model card Files Files and versions

xet

Community

Wanli commited on Mar 8, 2023

Commit

576ce22

1 Parent(s): 549df9a

Update hadnpose preprocess (#141)

Browse files

* update preprocess of handpose estimation to handle some conner cases

* back PALM_BOX_ENLARGE_FACTOR to default

Files changed (1) hide show

mp_handpose.py +64 -36

mp_handpose.py CHANGED Viewed

@@ -13,6 +13,8 @@ class MPHandPose:
         self.PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]
         self.PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0
         self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2
         self.PALM_BOX_SHIFT_VECTOR = [0, -0.4]
         self.PALM_BOX_ENLARGE_FACTOR = 3
         self.HAND_BOX_SHIFT_VECTOR = [0, -0.1]
@@ -34,6 +36,48 @@ class MPHandPose:
         self.target_id = targetId
         self.model.setPreferableTarget(self.target_id)
     def _preprocess(self, image, palm):
         '''
         Rotate input for inference.
@@ -43,14 +87,22 @@ class MPHandPose:
           palm_landmarks - 7 landmarks (5 finger base points, 2 palm base points) of shape [7, 2]
         Returns:
           rotated_hand - rotated hand image for inference
           rotation_matrix - matrix for rotation and de-rotation
         '''
-        # Rotate input to have vertically oriented hand image
-        #  compute rotation
         palm_bbox = palm[0:4].reshape(2, 2)
-        palm_landmarks = palm[4:18].reshape(7, 2)
         image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
         p1 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_PALM_BASE]
         p2 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]
         radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0])
@@ -72,49 +124,25 @@ class MPHandPose:
             np.amin(rotated_palm_landmarks, axis=1),
             np.amax(rotated_palm_landmarks, axis=1)])  # [top-left, bottom-right]
-        #  shift bounding box
-        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
-        shift_vector = self.PALM_BOX_SHIFT_VECTOR * wh_rotated_palm_bbox
-        rotated_palm_bbox = rotated_palm_bbox + shift_vector
-        #  squarify bounding boxx
-        center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
-        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
-        new_half_size = np.amax(wh_rotated_palm_bbox) / 2
-        rotated_palm_bbox = np.array([
-            center_rotated_plam_bbox - new_half_size,
-            center_rotated_plam_bbox + new_half_size])
-        #  enlarge bounding box
-        center_rotated_plam_bbox = np.sum(rotated_palm_bbox, axis=0) / 2
-        wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
-        new_half_size = wh_rotated_palm_bbox * self.PALM_BOX_ENLARGE_FACTOR / 2
-        rotated_palm_bbox = np.array([
-            center_rotated_plam_bbox - new_half_size,
-            center_rotated_plam_bbox + new_half_size])
-        # Crop and resize the rotated image by the bounding box
-        [[x1, y1], [x2, y2]] = rotated_palm_bbox.astype(np.int32)
-        diff = np.maximum([-x1, -y1, x2 - rotated_image.shape[1], y2 - rotated_image.shape[0]], 0)
-        [x1, y1, x2, y2] = [x1, y1, x2, y2] + diff
-        crop = rotated_image[y1:y2, x1:x2, :]
-        crop = cv.copyMakeBorder(crop, diff[1], diff[3], diff[0], diff[2], cv.BORDER_CONSTANT, value=(0, 0, 0))
-        blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32) / 255.0
-        return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix
     def infer(self, image, palm):
         # Preprocess
-        input_blob, rotated_palm_bbox, angle, rotation_matrix = self._preprocess(image, palm)
         # Forward
         self.model.setInput(input_blob)
         output_blob = self.model.forward(self.model.getUnconnectedOutLayersNames())
         # Postprocess
-        results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix)
         return results # [bbox_coords, landmarks_coords, conf]
-    def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix):
         landmarks, conf, handedness, landmarks_word = blob
         conf = conf[0][0]
@@ -127,7 +155,7 @@ class MPHandPose:
         # transform coords back to the input coords
         wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
         scale_factor = wh_rotated_palm_bbox / self.input_size
-        landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * scale_factor
         landmarks[:, 2] = landmarks[:, 2] * max(scale_factor) # depth scaling
         coords_rotation_matrix = cv.getRotationMatrix2D((0, 0), angle, 1.0)
         rotated_landmarks = np.dot(landmarks[:, :2], coords_rotation_matrix[:, :2])
@@ -149,7 +177,7 @@ class MPHandPose:
         original_center = np.array([
             np.dot(center, inverse_rotation_matrix[0]),
             np.dot(center, inverse_rotation_matrix[1])])
-        landmarks[:, :2] = rotated_landmarks[:, :2] + original_center
         # get bounding box from rotated_landmarks
         bbox = np.array([

         self.PALM_LANDMARK_IDS = [0, 5, 9, 13, 17, 1, 2]
         self.PALM_LANDMARKS_INDEX_OF_PALM_BASE = 0
         self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE = 2
+        self.PALM_BOX_PRE_SHIFT_VECTOR = [0, 0]
+        self.PALM_BOX_PRE_ENLARGE_FACTOR = 4
         self.PALM_BOX_SHIFT_VECTOR = [0, -0.4]
         self.PALM_BOX_ENLARGE_FACTOR = 3
         self.HAND_BOX_SHIFT_VECTOR = [0, -0.1]
         self.target_id = targetId
         self.model.setPreferableTarget(self.target_id)
+    def _cropAndPadFromPalm(self, image, palm_bbox, for_rotation = False):
+        # shift bounding box
+        wh_palm_bbox = palm_bbox[1] - palm_bbox[0]
+        if for_rotation:
+            shift_vector = self.PALM_BOX_PRE_SHIFT_VECTOR
+        else:
+            shift_vector = self.PALM_BOX_SHIFT_VECTOR
+        shift_vector = shift_vector * wh_palm_bbox
+        palm_bbox = palm_bbox + shift_vector
+        # enlarge bounding box
+        center_palm_bbox = np.sum(palm_bbox, axis=0) / 2
+        wh_palm_bbox = palm_bbox[1] - palm_bbox[0]
+        if for_rotation:
+            enlarge_scale = self.PALM_BOX_PRE_ENLARGE_FACTOR
+        else:
+            enlarge_scale = self.PALM_BOX_ENLARGE_FACTOR
+        new_half_size = wh_palm_bbox * enlarge_scale / 2
+        palm_bbox = np.array([
+            center_palm_bbox - new_half_size,
+            center_palm_bbox + new_half_size])
+        palm_bbox = palm_bbox.astype(np.int32)
+        palm_bbox[:][0] = np.clip(palm_bbox[:][0], 0, image.shape[0])
+        palm_bbox[:][1] = np.clip(palm_bbox[:][1], 0, image.shape[1])
+        # crop to the size of interest
+        image = image[palm_bbox[0][1]:palm_bbox[1][1], palm_bbox[0][0]:palm_bbox[1][0], :]
+        # pad to ensure conner pixels won't be cropped
+        if for_rotation:
+            side_len = np.linalg.norm(image.shape[:2])
+        else:
+            side_len = max(image.shape[:2])
+        side_len = int(side_len)
+        pad_h = side_len - image.shape[0]
+        pad_w = side_len - image.shape[1]
+        left = pad_w // 2
+        top = pad_h // 2
+        right = pad_w - left
+        bottom = pad_h - top
+        image = cv.copyMakeBorder(image, top, bottom, left, right, cv.BORDER_CONSTANT, None, (0, 0, 0))
+        bias = palm_bbox[0] - [left, top]
+        return image, palm_bbox, bias
     def _preprocess(self, image, palm):
         '''
         Rotate input for inference.
           palm_landmarks - 7 landmarks (5 finger base points, 2 palm base points) of shape [7, 2]
         Returns:
           rotated_hand - rotated hand image for inference
+          rotate_palm_bbox - palm box of interest range
+          angle - rotate angle for hand
           rotation_matrix - matrix for rotation and de-rotation
+          pad_bias - pad pixels of interest range
         '''
+        # crop and pad image to interest range
+        pad_bias = np.array([0, 0], dtype=np.int32)  # left, top
         palm_bbox = palm[0:4].reshape(2, 2)
+        image, palm_bbox, bias = self._cropAndPadFromPalm(image, palm_bbox, True)
         image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
+        pad_bias += bias
+        # Rotate input to have vertically oriented hand image
+        # compute rotation
+        palm_bbox -= pad_bias
+        palm_landmarks = palm[4:18].reshape(7, 2) - pad_bias
         p1 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_PALM_BASE]
         p2 = palm_landmarks[self.PALM_LANDMARKS_INDEX_OF_MIDDLE_FINGER_BASE]
         radians = np.pi / 2 - np.arctan2(-(p2[1] - p1[1]), p2[0] - p1[0])
             np.amin(rotated_palm_landmarks, axis=1),
             np.amax(rotated_palm_landmarks, axis=1)])  # [top-left, bottom-right]
+        crop, rotated_palm_bbox, _ = self._cropAndPadFromPalm(rotated_image, rotated_palm_bbox)
+        blob = cv.resize(crop, dsize=self.input_size, interpolation=cv.INTER_AREA).astype(np.float32)
+        blob = blob / 255.
+        return blob[np.newaxis, :, :, :], rotated_palm_bbox, angle, rotation_matrix, pad_bias
     def infer(self, image, palm):
         # Preprocess
+        input_blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias = self._preprocess(image, palm)
         # Forward
         self.model.setInput(input_blob)
         output_blob = self.model.forward(self.model.getUnconnectedOutLayersNames())
         # Postprocess
+        results = self._postprocess(output_blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias)
         return results # [bbox_coords, landmarks_coords, conf]
+    def _postprocess(self, blob, rotated_palm_bbox, angle, rotation_matrix, pad_bias):
         landmarks, conf, handedness, landmarks_word = blob
         conf = conf[0][0]
         # transform coords back to the input coords
         wh_rotated_palm_bbox = rotated_palm_bbox[1] - rotated_palm_bbox[0]
         scale_factor = wh_rotated_palm_bbox / self.input_size
+        landmarks[:, :2] = (landmarks[:, :2] - self.input_size / 2) * max(scale_factor)
         landmarks[:, 2] = landmarks[:, 2] * max(scale_factor) # depth scaling
         coords_rotation_matrix = cv.getRotationMatrix2D((0, 0), angle, 1.0)
         rotated_landmarks = np.dot(landmarks[:, :2], coords_rotation_matrix[:, :2])
         original_center = np.array([
             np.dot(center, inverse_rotation_matrix[0]),
             np.dot(center, inverse_rotation_matrix[1])])
+        landmarks[:, :2] = rotated_landmarks[:, :2] + original_center + pad_bias
         # get bounding box from rotated_landmarks
         bbox = np.array([