Spaces:
Runtime error
Runtime error
| # Copyright (C) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # This work is made available under the Nvidia Source Code License-NC. | |
| # To view a copy of this license, check out LICENSE.md | |
| import numpy as np | |
| import random | |
| import importlib | |
| from .common import tensor2im, tensor2label | |
| from .face import draw_edge, interp_points | |
| from imaginaire.model_utils.fs_vid2vid import extract_valid_pose_labels | |
| def draw_openpose_npy(resize_h, resize_w, crop_h, crop_w, original_h, | |
| original_w, is_flipped, cfgdata, keypoints_npy): | |
| r"""Connect the OpenPose keypoints to edges and draw the pose map. | |
| Args: | |
| resize_h (int): Height the input image was resized to. | |
| resize_w (int): Width the input image was resized to. | |
| crop_h (int): Height the input image was cropped. | |
| crop_w (int): Width the input image was cropped. | |
| original_h (int): Original height of the input image. | |
| original_w (int): Original width of the input image. | |
| is_flipped (bool): Is the input image flipped. | |
| cfgdata (obj): Data configuration. | |
| keypoints_npy (dict): OpenPose keypoint dict. | |
| Returns: | |
| (list of HxWxC numpy array): Drawn label map. | |
| """ | |
| pose_cfg = cfgdata.for_pose_dataset | |
| # Whether to draw only the basic keypoints. | |
| basic_points_only = getattr(pose_cfg, 'basic_points_only', False) | |
| # Whether to remove the face labels to avoid overfitting. | |
| remove_face_labels = getattr(pose_cfg, 'remove_face_labels', False) | |
| # Whether to randomly drop some keypoints to avoid overfitting. | |
| random_drop_prob = getattr(pose_cfg, 'random_drop_prob', 0) | |
| # Get the list of edges to draw. | |
| edge_lists = define_edge_lists(basic_points_only) | |
| op_key = cfgdata.keypoint_data_types[0] | |
| for input_type in cfgdata.input_types: | |
| if op_key in input_type: | |
| nc = input_type[op_key].num_channels | |
| if crop_h is not None: | |
| h, w = crop_h, crop_w | |
| else: | |
| h, w = resize_h, resize_w | |
| outputs = [] | |
| for keypoint_npy in keypoints_npy: | |
| person_keypoints = np.asarray(keypoint_npy).reshape(-1, 137, 3)[0] | |
| # Separate out the keypoint array to different parts. | |
| pose_pts = person_keypoints[:25] | |
| face_pts = person_keypoints[25: (25 + 70)] | |
| hand_pts_l = person_keypoints[(25 + 70): (25 + 70 + 21)] | |
| hand_pts_r = person_keypoints[-21:] | |
| all_pts = [pose_pts, face_pts, hand_pts_l, hand_pts_r] | |
| # Remove the keypoints with low confidence. | |
| all_pts = [extract_valid_keypoints(pts, edge_lists) | |
| for pts in all_pts] | |
| # Connect the keypoints to form the label map. | |
| pose_img = connect_pose_keypoints(all_pts, edge_lists, | |
| (h, w, nc), | |
| basic_points_only, | |
| remove_face_labels, | |
| random_drop_prob) | |
| pose_img = pose_img.astype(np.float32) / 255.0 | |
| outputs.append(pose_img) | |
| return outputs | |
| def openpose_to_npy_largest_only(inputs): | |
| r"""Convert OpenPose dicts to numpy arrays of keypoints. Only return the | |
| largest/tallest person in each dict. | |
| Args: | |
| inputs (list of dicts): List of OpenPose dicts. | |
| Returns: | |
| (list of numpy arrays): Keypoints. | |
| """ | |
| return base_openpose_to_npy(inputs, return_largest_only=True) | |
| def openpose_to_npy(inputs): | |
| r"""Conver OpenPose dicts to numpy arrays of keypoints. | |
| Args: | |
| inputs (list of dicts): List of OpenPose dicts. | |
| Returns: | |
| (list of numpy arrays): Keypoints. | |
| """ | |
| return base_openpose_to_npy(inputs, return_largest_only=False) | |
| def base_openpose_to_npy(inputs, return_largest_only=False): | |
| r"""Convert OpenPose dicts to numpy arrays of keypoints. | |
| Args: | |
| inputs (list of dicts): List of OpenPose dicts. | |
| return_largest_only (bool): Whether to return only the largest person. | |
| Returns: | |
| (list of numpy arrays): Keypoints. | |
| """ | |
| outputs_npy = [] | |
| for input in inputs: | |
| people_dict = input['people'] | |
| n_ppl = max(1, len(people_dict)) | |
| output_npy = np.zeros((n_ppl, 25 + 70 + 21 + 21, 3), dtype=np.float32) | |
| y_len_max = 0 | |
| for i, person_dict in enumerate(people_dict): | |
| # Extract corresponding keypoints from the dict. | |
| pose_pts = np.array(person_dict["pose_keypoints_2d"]).reshape(25, 3) | |
| face_pts = np.array(person_dict["face_keypoints_2d"]).reshape(70, 3) | |
| hand_pts_l = np.array(person_dict["hand_left_keypoints_2d"] | |
| ).reshape(21, 3) | |
| hand_pts_r = np.array(person_dict["hand_right_keypoints_2d"] | |
| ).reshape(21, 3) | |
| if return_largest_only: | |
| # Get the body length. | |
| y = pose_pts[pose_pts[:, 2] > 0.01, 1] | |
| y_len = y.max() - y.min() | |
| if y_len > y_len_max: | |
| y_len_max = y_len | |
| max_ind = i | |
| # Concatenate all keypoint together. | |
| output_npy[i] = np.vstack([pose_pts, face_pts, | |
| hand_pts_l, hand_pts_r]) | |
| if return_largest_only: | |
| # Only return the largest person in the dict. | |
| output_npy = output_npy[max_ind: max_ind + 1] | |
| outputs_npy += [output_npy.astype(np.float32)] | |
| return outputs_npy | |
| def extract_valid_keypoints(pts, edge_lists): | |
| r"""Use only the valid keypoints by looking at the detection confidences. | |
| If the confidences for all keypoints in an edge are above threshold, | |
| keep the keypoints. Otherwise, their coordinates will be set to zero. | |
| Args: | |
| pts (Px3 numpy array): Keypoint xy coordinates + confidence. | |
| edge_lists (nested list of ints): List of keypoint indices for edges. | |
| Returns: | |
| (Px2 numpy array): Output keypoints. | |
| """ | |
| pose_edge_list, _, hand_edge_list, _, face_list = edge_lists | |
| p = pts.shape[0] | |
| thre = 0.1 if p == 70 else 0.01 | |
| output = np.zeros((p, 2)) | |
| if p == 70: # ai_emoji | |
| for edge_list in face_list: | |
| for edge in edge_list: | |
| if (pts[edge, 2] > thre).all(): | |
| output[edge, :] = pts[edge, :2] | |
| elif p == 21: # hand | |
| for edge in hand_edge_list: | |
| if (pts[edge, 2] > thre).all(): | |
| output[edge, :] = pts[edge, :2] | |
| else: # pose | |
| valid = (pts[:, 2] > thre) | |
| output[valid, :] = pts[valid, :2] | |
| return output | |
| def connect_pose_keypoints(pts, edge_lists, size, basic_points_only, | |
| remove_face_labels, random_drop_prob): | |
| r"""Draw edges by connecting the keypoints onto the label map. | |
| Args: | |
| pts (Px3 numpy array): Keypoint xy coordinates + confidence. | |
| edge_lists (nested list of ints): List of keypoint indices for edges. | |
| size (tuple of int): Output size. | |
| basic_points_only (bool): Whether to use only the basic keypoints. | |
| remove_face_labels (bool): Whether to remove face labels. | |
| random_drop_prob (float): Probability to randomly drop keypoints. | |
| Returns: | |
| (HxWxC numpy array): Output label map. | |
| """ | |
| pose_pts, face_pts, hand_pts_l, hand_pts_r = pts | |
| h, w, c = size | |
| body_edges = np.zeros((h, w, c), np.uint8) | |
| # If using one-hot, different parts of the body will be drawn to | |
| # different channels. | |
| use_one_hot = c > 3 | |
| if use_one_hot: | |
| assert c == 27 | |
| pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, \ | |
| face_list = edge_lists | |
| # Draw pose edges. | |
| h = int(pose_pts[:, 1].max() - pose_pts[:, 1].min()) | |
| bw = max(1, h // 150) # Stroke width. | |
| body_edges = draw_edges(body_edges, pose_pts, [pose_edge_list], bw, | |
| use_one_hot, random_drop_prob, | |
| colors=pose_color_list, draw_end_points=True) | |
| if not basic_points_only: | |
| # Draw hand edges. | |
| bw = max(1, h // 450) | |
| for i, hand_pts in enumerate([hand_pts_l, hand_pts_r]): | |
| if use_one_hot: | |
| k = 24 + i | |
| body_edges[:, :, k] = draw_edges(body_edges[:, :, k], hand_pts, | |
| [hand_edge_list], | |
| bw, False, random_drop_prob, | |
| colors=[255] * len(hand_pts)) | |
| else: | |
| body_edges = draw_edges(body_edges, hand_pts, [hand_edge_list], | |
| bw, False, random_drop_prob, | |
| colors=hand_color_list) | |
| # Draw face edges. | |
| if not remove_face_labels: | |
| if use_one_hot: | |
| k = 26 | |
| body_edges[:, :, k] = draw_edges(body_edges[:, :, k], face_pts, | |
| face_list, bw, False, | |
| random_drop_prob) | |
| else: | |
| body_edges = draw_edges(body_edges, face_pts, face_list, bw, | |
| False, random_drop_prob) | |
| return body_edges | |
| def draw_edges(canvas, keypoints, edges_list, bw, use_one_hot, | |
| random_drop_prob=0, edge_len=2, colors=None, | |
| draw_end_points=False): | |
| r"""Draw all the edges in the edge list on the canvas. | |
| Args: | |
| canvas (HxWxK numpy array): Canvas to draw. | |
| keypoints (Px2 numpy array): Keypoints. | |
| edge_list (nested list of ints): List of keypoint indices for edges. | |
| bw (int): Stroke width. | |
| use_one_hot (bool): Use one-hot encoding or not. | |
| random_drop_prob (float): Probability to randomly drop keypoints. | |
| edge_len (int): Number of keypoints in an edge. | |
| colors (tuple of int): Color to draw. | |
| draw_end_points (bool): Whether to draw end points for edges. | |
| Returns: | |
| (HxWxK numpy array): Output. | |
| """ | |
| k = 0 | |
| for edge_list in edges_list: | |
| for i, edge in enumerate(edge_list): | |
| for j in range(0, max(1, len(edge) - 1), edge_len - 1): | |
| if random.random() > random_drop_prob: | |
| sub_edge = edge[j:j + edge_len] | |
| x, y = keypoints[sub_edge, 0], keypoints[sub_edge, 1] | |
| if 0 not in x: # Get rid of invalid keypoints. | |
| curve_x, curve_y = interp_points(x, y) | |
| if use_one_hot: | |
| # If using one-hot, draw to different channels of | |
| # the canvas. | |
| draw_edge(canvas[:, :, k], curve_x, curve_y, | |
| bw=bw, color=255, | |
| draw_end_points=draw_end_points) | |
| else: | |
| color = colors[i] if colors is not None \ | |
| else (255, 255, 255) | |
| draw_edge(canvas, curve_x, curve_y, | |
| bw=bw, color=color, | |
| draw_end_points=draw_end_points) | |
| k += 1 | |
| return canvas | |
| def define_edge_lists(basic_points_only): | |
| r"""Define the list of keypoints that should be connected to form the edges. | |
| Args: | |
| basic_points_only (bool): Whether to use only the basic keypoints. | |
| """ | |
| # Pose edges and corresponding colors. | |
| pose_edge_list = [ | |
| [17, 15], [15, 0], [0, 16], [16, 18], # head | |
| [0, 1], [1, 8], # body | |
| [1, 2], [2, 3], [3, 4], # right arm | |
| [1, 5], [5, 6], [6, 7], # left arm | |
| [8, 9], [9, 10], [10, 11], # right leg | |
| [8, 12], [12, 13], [13, 14] # left leg | |
| ] | |
| pose_color_list = [ | |
| [153, 0, 153], [153, 0, 102], [102, 0, 153], [51, 0, 153], | |
| [153, 0, 51], [153, 0, 0], | |
| [153, 51, 0], [153, 102, 0], [153, 153, 0], | |
| [102, 153, 0], [51, 153, 0], [0, 153, 0], | |
| [0, 153, 51], [0, 153, 102], [0, 153, 153], | |
| [0, 102, 153], [0, 51, 153], [0, 0, 153], | |
| ] | |
| if not basic_points_only: | |
| pose_edge_list += [ | |
| [11, 24], [11, 22], [22, 23], # right foot | |
| [14, 21], [14, 19], [19, 20] # left foot | |
| ] | |
| pose_color_list += [ | |
| [0, 153, 153], [0, 153, 153], [0, 153, 153], | |
| [0, 0, 153], [0, 0, 153], [0, 0, 153] | |
| ] | |
| # Hand edges and corresponding colors. | |
| hand_edge_list = [ | |
| [0, 1, 2, 3, 4], | |
| [0, 5, 6, 7, 8], | |
| [0, 9, 10, 11, 12], | |
| [0, 13, 14, 15, 16], | |
| [0, 17, 18, 19, 20] | |
| ] | |
| hand_color_list = [ | |
| [204, 0, 0], [163, 204, 0], [0, 204, 82], [0, 82, 204], [163, 0, 204] | |
| ] | |
| # Face edges. | |
| face_list = [ | |
| [range(0, 17)], # face contour | |
| [range(17, 22)], # left eyebrow | |
| [range(22, 27)], # right eyebrow | |
| [[28, 31], range(31, 36), [35, 28]], # nose | |
| [[36, 37, 38, 39], [39, 40, 41, 36]], # left eye | |
| [[42, 43, 44, 45], [45, 46, 47, 42]], # right eye | |
| [range(48, 55), [54, 55, 56, 57, 58, 59, 48]], # mouth | |
| ] | |
| return pose_edge_list, pose_color_list, hand_edge_list, hand_color_list, \ | |
| face_list | |
| def tensor2pose(cfg, label_tensor): | |
| r"""Convert output tensor to a numpy pose map. | |
| Args: | |
| label_tensor (3D/4D/5D tensor): Label tensor. | |
| Returns: | |
| (HxWx3 numpy array or list of numpy arrays): Pose map. | |
| """ | |
| if label_tensor.dim() == 5 or label_tensor.dim() == 4: | |
| return [tensor2pose(cfg, label_tensor[idx]) | |
| for idx in range(label_tensor.size(0))] | |
| # If adding additional discriminators, draw the bbox for the regions | |
| # (e.g. faces) too. | |
| add_dis_cfg = getattr(cfg.dis, 'additional_discriminators', None) | |
| if add_dis_cfg is not None: | |
| crop_coords = [] | |
| for name in add_dis_cfg: | |
| v = add_dis_cfg[name].vis | |
| file, crop_func = v.split('::') | |
| file = importlib.import_module(file) | |
| crop_func = getattr(file, crop_func) | |
| crop_coord = crop_func(cfg.data, label_tensor) | |
| if len(crop_coord) > 0: | |
| if type(crop_coord[0]) == list: | |
| crop_coords.extend(crop_coord) | |
| else: | |
| crop_coords.append(crop_coord) | |
| pose_cfg = cfg.data.for_pose_dataset | |
| pose_type = getattr(pose_cfg, 'pose_type', 'both') | |
| remove_face_labels = getattr(pose_cfg, 'remove_face_labels', False) | |
| label_tensor = extract_valid_pose_labels(label_tensor, pose_type, | |
| remove_face_labels) | |
| # If using both DensePose and OpenPose, overlay one image onto the other | |
| # to get the visualization map. | |
| dp_key = 'pose_maps-densepose' | |
| op_key = 'poses-openpose' | |
| use_densepose = use_openpose = False | |
| for input_type in cfg.data.input_types: | |
| if dp_key in input_type: | |
| dp_ch = input_type[dp_key].num_channels | |
| use_densepose = True | |
| elif op_key in input_type: | |
| op_ch = input_type[op_key].num_channels | |
| use_openpose = True | |
| if use_densepose: | |
| label_img = tensor2im(label_tensor[:dp_ch]) | |
| if use_openpose: | |
| openpose = label_tensor[-op_ch:] | |
| openpose = tensor2im(openpose) if op_ch == 3 else \ | |
| tensor2label(openpose, op_ch) | |
| if use_densepose: | |
| label_img[openpose != 0] = openpose[openpose != 0] | |
| else: | |
| label_img = openpose | |
| # Draw the bbox for the regions for the additional discriminator. | |
| if add_dis_cfg is not None: | |
| for crop_coord in crop_coords: | |
| ys, ye, xs, xe = crop_coord | |
| label_img[ys, xs:xe, :] = label_img[ye - 1, xs:xe, :] \ | |
| = label_img[ys:ye, xs, :] = label_img[ys:ye, xe - 1, :] = 255 | |
| if len(label_img.shape) == 2: | |
| label_img = np.repeat(label_img[:, :, np.newaxis], 3, axis=2) | |
| return label_img | |