Wanli
commited on
Commit
·
6f815fd
1
Parent(s):
866ff8d
Add gesture classification for handpose estimation (#168)
Browse files
models/handpose_estimation_mediapipe/README.md
CHANGED
|
@@ -4,6 +4,9 @@ This model estimates 21 hand keypoints per detected hand from [palm detector](..
|
|
| 4 |
|
| 5 |

|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
This model is converted from TFlite to ONNX using following tools:
|
| 8 |
- TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
|
| 9 |
- simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
|
|
|
| 4 |
|
| 5 |

|
| 6 |
|
| 7 |
+
Hand gesture classification demo (0-9)
|
| 8 |
+

|
| 9 |
+
|
| 10 |
This model is converted from TFlite to ONNX using following tools:
|
| 11 |
- TFLite model to ONNX: https://github.com/onnx/tensorflow-onnx
|
| 12 |
- simplified by [onnx-simplifier](https://github.com/daquexian/onnx-simplifier)
|
models/handpose_estimation_mediapipe/demo.py
CHANGED
|
@@ -85,6 +85,9 @@ def visualize(image, hands, print_result=False):
|
|
| 85 |
for p in landmarks:
|
| 86 |
cv.circle(image, p, thickness, (0, 0, 255), -1)
|
| 87 |
|
|
|
|
|
|
|
|
|
|
| 88 |
for idx, handpose in enumerate(hands):
|
| 89 |
conf = handpose[-1]
|
| 90 |
bbox = handpose[0:4].astype(np.int32)
|
|
@@ -96,11 +99,14 @@ def visualize(image, hands, print_result=False):
|
|
| 96 |
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
|
| 97 |
landmarks_word = handpose[67:130].reshape(21, 3)
|
| 98 |
|
|
|
|
|
|
|
| 99 |
# Print results
|
| 100 |
if print_result:
|
| 101 |
print('-----------hand {}-----------'.format(idx + 1))
|
| 102 |
print('conf: {:.2f}'.format(conf))
|
| 103 |
print('handedness: {}'.format(handedness_text))
|
|
|
|
| 104 |
print('hand box: {}'.format(bbox))
|
| 105 |
print('hand landmarks: ')
|
| 106 |
for l in landmarks_screen:
|
|
@@ -113,6 +119,8 @@ def visualize(image, hands, print_result=False):
|
|
| 113 |
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
|
| 114 |
# draw handedness
|
| 115 |
cv.putText(display_screen, '{}'.format(handedness_text), (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
|
|
|
|
|
|
|
| 116 |
# Draw line between each key points
|
| 117 |
landmarks_xy = landmarks_screen[:, 0:2]
|
| 118 |
draw_lines(display_screen, landmarks_xy, is_draw_point=False)
|
|
@@ -149,6 +157,118 @@ def visualize(image, hands, print_result=False):
|
|
| 149 |
|
| 150 |
return display_screen, display_3d
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
if __name__ == '__main__':
|
| 154 |
backend_id = backend_target_pairs[args.backend_target][0]
|
|
|
|
| 85 |
for p in landmarks:
|
| 86 |
cv.circle(image, p, thickness, (0, 0, 255), -1)
|
| 87 |
|
| 88 |
+
# used for gesture classification
|
| 89 |
+
gc = GestureClassification()
|
| 90 |
+
|
| 91 |
for idx, handpose in enumerate(hands):
|
| 92 |
conf = handpose[-1]
|
| 93 |
bbox = handpose[0:4].astype(np.int32)
|
|
|
|
| 99 |
landmarks_screen = handpose[4:67].reshape(21, 3).astype(np.int32)
|
| 100 |
landmarks_word = handpose[67:130].reshape(21, 3)
|
| 101 |
|
| 102 |
+
gesture = gc.classify(landmarks_screen)
|
| 103 |
+
|
| 104 |
# Print results
|
| 105 |
if print_result:
|
| 106 |
print('-----------hand {}-----------'.format(idx + 1))
|
| 107 |
print('conf: {:.2f}'.format(conf))
|
| 108 |
print('handedness: {}'.format(handedness_text))
|
| 109 |
+
print('gesture: {}'.format(gesture))
|
| 110 |
print('hand box: {}'.format(bbox))
|
| 111 |
print('hand landmarks: ')
|
| 112 |
for l in landmarks_screen:
|
|
|
|
| 119 |
cv.rectangle(display_screen, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)
|
| 120 |
# draw handedness
|
| 121 |
cv.putText(display_screen, '{}'.format(handedness_text), (bbox[0], bbox[1] + 12), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
|
| 122 |
+
# draw gesture
|
| 123 |
+
cv.putText(display_screen, '{}'.format(gesture), (bbox[0], bbox[1] + 30), cv.FONT_HERSHEY_DUPLEX, 0.5, (0, 0, 255))
|
| 124 |
# Draw line between each key points
|
| 125 |
landmarks_xy = landmarks_screen[:, 0:2]
|
| 126 |
draw_lines(display_screen, landmarks_xy, is_draw_point=False)
|
|
|
|
| 157 |
|
| 158 |
return display_screen, display_3d
|
| 159 |
|
| 160 |
+
class GestureClassification:
|
| 161 |
+
def _vector_2_angle(self, v1, v2):
|
| 162 |
+
uv1 = v1 / np.linalg.norm(v1)
|
| 163 |
+
uv2 = v2 / np.linalg.norm(v2)
|
| 164 |
+
angle = np.degrees(np.arccos(np.dot(uv1, uv2)))
|
| 165 |
+
return angle
|
| 166 |
+
|
| 167 |
+
def _hand_angle(self, hand):
|
| 168 |
+
angle_list = []
|
| 169 |
+
# thumb
|
| 170 |
+
angle_ = self._vector_2_angle(
|
| 171 |
+
np.array([hand[0][0] - hand[2][0], hand[0][1] - hand[2][1]]),
|
| 172 |
+
np.array([hand[3][0] - hand[4][0], hand[3][1] - hand[4][1]])
|
| 173 |
+
)
|
| 174 |
+
angle_list.append(angle_)
|
| 175 |
+
# index
|
| 176 |
+
angle_ = self._vector_2_angle(
|
| 177 |
+
np.array([hand[0][0] - hand[6][0], hand[0][1] - hand[6][1]]),
|
| 178 |
+
np.array([hand[7][0] - hand[8][0], hand[7][1] - hand[8][1]])
|
| 179 |
+
)
|
| 180 |
+
angle_list.append(angle_)
|
| 181 |
+
# middle
|
| 182 |
+
angle_ = self._vector_2_angle(
|
| 183 |
+
np.array([hand[0][0] - hand[10][0], hand[0][1] - hand[10][1]]),
|
| 184 |
+
np.array([hand[11][0] - hand[12][0], hand[11][1] - hand[12][1]])
|
| 185 |
+
)
|
| 186 |
+
angle_list.append(angle_)
|
| 187 |
+
# ring
|
| 188 |
+
angle_ = self._vector_2_angle(
|
| 189 |
+
np.array([hand[0][0] - hand[14][0], hand[0][1] - hand[14][1]]),
|
| 190 |
+
np.array([hand[15][0] - hand[16][0], hand[15][1] - hand[16][1]])
|
| 191 |
+
)
|
| 192 |
+
angle_list.append(angle_)
|
| 193 |
+
# pink
|
| 194 |
+
angle_ = self._vector_2_angle(
|
| 195 |
+
np.array([hand[0][0] - hand[18][0], hand[0][1] - hand[18][1]]),
|
| 196 |
+
np.array([hand[19][0] - hand[20][0], hand[19][1] - hand[20][1]])
|
| 197 |
+
)
|
| 198 |
+
angle_list.append(angle_)
|
| 199 |
+
return angle_list
|
| 200 |
+
|
| 201 |
+
def _finger_status(self, lmList):
|
| 202 |
+
fingerList = []
|
| 203 |
+
originx, originy = lmList[0]
|
| 204 |
+
keypoint_list = [[5, 4], [6, 8], [10, 12], [14, 16], [18, 20]]
|
| 205 |
+
for point in keypoint_list:
|
| 206 |
+
x1, y1 = lmList[point[0]]
|
| 207 |
+
x2, y2 = lmList[point[1]]
|
| 208 |
+
if np.hypot(x2 - originx, y2 - originy) > np.hypot(x1 - originx, y1 - originy):
|
| 209 |
+
fingerList.append(True)
|
| 210 |
+
else:
|
| 211 |
+
fingerList.append(False)
|
| 212 |
+
|
| 213 |
+
return fingerList
|
| 214 |
+
|
| 215 |
+
def _classify(self, hand):
|
| 216 |
+
thr_angle = 65.
|
| 217 |
+
thr_angle_thumb = 30.
|
| 218 |
+
thr_angle_s = 49.
|
| 219 |
+
gesture_str = "Undefined"
|
| 220 |
+
|
| 221 |
+
angle_list = self._hand_angle(hand)
|
| 222 |
+
|
| 223 |
+
thumbOpen, firstOpen, secondOpen, thirdOpen, fourthOpen = self._finger_status(hand)
|
| 224 |
+
# Number
|
| 225 |
+
if (angle_list[0] > thr_angle_thumb) and (angle_list[1] > thr_angle) and (angle_list[2] > thr_angle) and (
|
| 226 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
|
| 227 |
+
not firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
|
| 228 |
+
gesture_str = "Zero"
|
| 229 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] > thr_angle) and (
|
| 230 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
|
| 231 |
+
firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
|
| 232 |
+
gesture_str = "One"
|
| 233 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
| 234 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle) and \
|
| 235 |
+
not thumbOpen and firstOpen and secondOpen and not thirdOpen and not fourthOpen:
|
| 236 |
+
gesture_str = "Two"
|
| 237 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
| 238 |
+
angle_list[3] < thr_angle_s) and (angle_list[4] > thr_angle) and \
|
| 239 |
+
not thumbOpen and firstOpen and secondOpen and thirdOpen and not fourthOpen:
|
| 240 |
+
gesture_str = "Three"
|
| 241 |
+
elif (angle_list[0] > thr_angle_thumb) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
| 242 |
+
angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle) and \
|
| 243 |
+
firstOpen and secondOpen and thirdOpen and fourthOpen:
|
| 244 |
+
gesture_str = "Four"
|
| 245 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle_s) and (angle_list[2] < thr_angle_s) and (
|
| 246 |
+
angle_list[3] < thr_angle_s) and (angle_list[4] < thr_angle_s) and \
|
| 247 |
+
thumbOpen and firstOpen and secondOpen and thirdOpen and fourthOpen:
|
| 248 |
+
gesture_str = "Five"
|
| 249 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] > thr_angle) and (angle_list[2] > thr_angle) and (
|
| 250 |
+
angle_list[3] > thr_angle) and (angle_list[4] < thr_angle_s) and \
|
| 251 |
+
thumbOpen and not firstOpen and not secondOpen and not thirdOpen and fourthOpen:
|
| 252 |
+
gesture_str = "Six"
|
| 253 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] > thr_angle) and (
|
| 254 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle_s) and \
|
| 255 |
+
thumbOpen and firstOpen and not secondOpen and not thirdOpen and not fourthOpen:
|
| 256 |
+
gesture_str = "Seven"
|
| 257 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] < thr_angle) and (
|
| 258 |
+
angle_list[3] > thr_angle) and (angle_list[4] > thr_angle_s) and \
|
| 259 |
+
thumbOpen and firstOpen and secondOpen and not thirdOpen and not fourthOpen:
|
| 260 |
+
gesture_str = "Eight"
|
| 261 |
+
elif (angle_list[0] < thr_angle_s) and (angle_list[1] < thr_angle) and (angle_list[2] < thr_angle) and (
|
| 262 |
+
angle_list[3] < thr_angle) and (angle_list[4] > thr_angle_s) and \
|
| 263 |
+
thumbOpen and firstOpen and secondOpen and thirdOpen and not fourthOpen:
|
| 264 |
+
gesture_str = "Nine"
|
| 265 |
+
|
| 266 |
+
return gesture_str
|
| 267 |
+
|
| 268 |
+
def classify(self, landmarks):
|
| 269 |
+
hand = landmarks[:21, :2]
|
| 270 |
+
gesture = self._classify(hand)
|
| 271 |
+
return gesture
|
| 272 |
|
| 273 |
if __name__ == '__main__':
|
| 274 |
backend_id = backend_target_pairs[args.backend_target][0]
|