Grounded-Segment-Anything
/
grounded-sam-osx
/transformer_utils
/mmpose
/models
/detectors
/poseur.py
| import warnings | |
| import mmcv | |
| import numpy as np | |
| from mmcv.image import imwrite | |
| from mmcv.visualization.image import imshow | |
| from mmpose.core import imshow_keypoints | |
| from .. import builder | |
| from ..builder import POSENETS | |
| from .base import BasePose | |
| import torch | |
| from config import cfg | |
| try: | |
| from mmcv.runner import auto_fp16 | |
| except ImportError: | |
| warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0' | |
| 'Please install mmcv>=1.1.4') | |
| from mmpose.core import auto_fp16 | |
| from .top_down import TopDown | |
| class Poseur(TopDown): | |
| def __init__(self, *args, **kwargs): | |
| if 'filp_fuse_type' in kwargs: | |
| self.filp_fuse_type = kwargs.pop('filp_fuse_type') | |
| else: | |
| self.filp_fuse_type = 'default' | |
| super().__init__(*args, **kwargs) | |
| def init_weights(self, pretrained=None): | |
| """Weight initialization for model.""" | |
| self.backbone.init_weights(pretrained) | |
| if self.with_neck: | |
| self.neck.init_weights() | |
| if self.with_keypoint: | |
| self.keypoint_head.init_weights() | |
| def forward(self, | |
| img, | |
| coord_target=None, | |
| coord_target_weight=None, | |
| bbox_target=None, | |
| bbox_target_weight=None, | |
| hp_target=None, | |
| hp_target_weight=None, | |
| img_metas=None, | |
| return_loss=True, | |
| return_heatmap=False, | |
| coord_init=None, | |
| query_init=None, | |
| **kwargs): | |
| """Calls either forward_train or forward_test depending on whether | |
| return_loss=True. Note this setting will change the expected inputs. | |
| When `return_loss=True`, img and img_meta are single-nested (i.e. | |
| Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta | |
| should be double nested (i.e. List[Tensor], List[List[dict]]), with | |
| the outer list indicating test time augmentations. | |
| Note: | |
| batch_size: N | |
| num_keypoints: K | |
| num_img_channel: C (Default: 3) | |
| img height: imgH | |
| img weight: imgW | |
| heatmaps height: H | |
| heatmaps weight: W | |
| Args: | |
| img (torch.Tensor[NxCximgHximgW]): Input images. | |
| target (torch.Tensor[NxKxHxW]): Target heatmaps. | |
| target_weight (torch.Tensor[NxKx1]): Weights across | |
| different joint types. | |
| img_metas (list(dict)): Information about data augmentation | |
| By default this includes: | |
| - "image_file: path to the image file | |
| - "center": center of the bbox | |
| - "scale": scale of the bbox | |
| - "rotation": rotation of the bbox | |
| - "bbox_score": score of bbox | |
| return_loss (bool): Option to `return loss`. `return loss=True` | |
| for training, `return loss=False` for validation & test. | |
| return_heatmap (bool) : Option to return heatmap. | |
| Returns: | |
| dict|tuple: if `return loss` is true, then return losses. | |
| Otherwise, return predicted poses, boxes, image paths | |
| and heatmaps. | |
| """ | |
| return self.forward_mesh_recovery(img, coord_init=coord_init, query_init=query_init, | |
| **kwargs) | |
| # if return_loss: | |
| # return self.forward_train(img, | |
| # coord_target, coord_target_weight, | |
| # hp_target, hp_target_weight, img_metas, | |
| # **kwargs) | |
| # return self.forward_test( | |
| # img, img_metas, return_heatmap=return_heatmap, **kwargs) | |
| def forward_train(self, img, coord_target, coord_target_weight, | |
| hp_target, hp_target_weight, img_metas, **kwargs): | |
| """ | |
| :param img: | |
| :param coord_target: [2, 17, 2] | |
| :param coord_target_weight: [2, 17, 2] | |
| :param hp_target: [2, 4, 17, 64, 48] | |
| :param hp_target_weight: [2, 4, 17, 1] | |
| :param img_metas: | |
| :param kwargs: | |
| :return: | |
| """ | |
| """Defines the computation performed at every call when training.""" | |
| output = self.backbone(img) | |
| img_feat = output[-1] | |
| if self.with_neck: | |
| output = self.neck(output) | |
| if self.with_keypoint: | |
| # output = self.keypoint_head(output, img_metas) | |
| enc_output, dec_output = self.keypoint_head(output) | |
| return img_feat, enc_output, dec_output, None | |
| def seperate_sigma_from_score(self, score): | |
| if score.shape[2] == 3: | |
| sigma = score[:, :, [1, 2]] | |
| score = score[:, :, [0]] | |
| return score, sigma | |
| elif score.shape[2] == 1: | |
| return score, None | |
| else: | |
| raise | |
| def forward_mesh_recovery(self, output, coord_init=None, query_init=None, **kwargs): | |
| """ | |
| :param img: | |
| :param coord_target: [2, 17, 2] | |
| :param coord_target_weight: [2, 17, 2] | |
| :param hp_target: [2, 4, 17, 64, 48] | |
| :param hp_target_weight: [2, 4, 17, 1] | |
| :param img_metas: | |
| :param kwargs: | |
| :return: | |
| """ | |
| """Defines the computation performed at every call when training.""" | |
| # output = self.backbone(img) | |
| img_feat = output[-1] | |
| # print(len(output)) | |
| if self.with_neck: | |
| output = self.neck(output) | |
| if self.with_keypoint: | |
| # output = self.keypoint_head(output, img_metas) | |
| enc_output, dec_output = self.keypoint_head(output, coord_init=coord_init, query_init=query_init) | |
| return dec_output.feat[-1] | |
| def forward_test(self, img, img_metas, return_heatmap=False, **kwargs): | |
| """Defines the computation performed at every call when testing.""" | |
| assert img.size(0) == len(img_metas) | |
| batch_size, _, img_height, img_width = img.shape | |
| if batch_size > 1: | |
| assert 'bbox_id' in img_metas[0] | |
| result = {} | |
| features = self.backbone(img) | |
| if self.with_neck: | |
| features = self.neck(features) | |
| if self.with_keypoint: | |
| output_regression, output_regression_score = self.keypoint_head.inference_model( | |
| features, flip_pairs=None) | |
| output_regression_score, output_regression_sigma = self.seperate_sigma_from_score(output_regression_score) | |
| if self.test_cfg['flip_test']: | |
| img_flipped = img.flip(3) | |
| features_flipped = self.backbone(img_flipped) | |
| if self.with_neck: | |
| features_flipped = self.neck(features_flipped) | |
| if self.with_keypoint: | |
| output_regression_flipped, output_regression_score_flipped = self.keypoint_head.inference_model( | |
| features_flipped, img_metas[0]['flip_pairs']) | |
| output_regression_score_flipped, output_regression_sigma_flipped = \ | |
| self.seperate_sigma_from_score(output_regression_score_flipped) | |
| if self.filp_fuse_type == 'default': | |
| output_regression = (output_regression + | |
| output_regression_flipped) * 0.5 | |
| output_regression_score = (output_regression_score + | |
| output_regression_score_flipped) * 0.5 | |
| elif self.filp_fuse_type == 'type1': | |
| # output_regression = (output_regression * output_regression_score + output_regression_flipped * output_regression_score_flipped)\ | |
| # /(output_regression_score + output_regression_score_flipped+1e-9) | |
| output_regression, output_regression_flipped = \ | |
| torch.from_numpy(output_regression), torch.from_numpy(output_regression_flipped) | |
| output_regression_score, output_regression_score_flipped = \ | |
| torch.from_numpy(output_regression_score), torch.from_numpy(output_regression_score_flipped) | |
| output_regression = ( | |
| output_regression * output_regression_score + output_regression_flipped * output_regression_score_flipped) \ | |
| / (output_regression_score + output_regression_score_flipped + 1e-9) | |
| diff = 1 - (output_regression_score - output_regression_score_flipped).abs() | |
| output_regression_score = (output_regression_score * output_regression_score_flipped * diff) ** 2 | |
| output_regression = output_regression.numpy() | |
| output_regression_score = output_regression_score.numpy() | |
| elif self.filp_fuse_type == 'type2': | |
| # output_regression = (output_regression * output_regression_score + output_regression_flipped * output_regression_score_flipped)\ | |
| # /(output_regression_score + output_regression_score_flipped+1e-9) | |
| output_regression, output_regression_flipped = \ | |
| torch.from_numpy(output_regression), torch.from_numpy(output_regression_flipped) | |
| output_regression_sigma, output_regression_sigma_flipped = \ | |
| torch.from_numpy(output_regression_sigma), torch.from_numpy(output_regression_sigma_flipped) | |
| output_regression_p, output_regression_p_flipped = \ | |
| self.get_p(output_regression_sigma), self.get_p(output_regression_sigma_flipped) | |
| p_to_coord_index = 5 | |
| output_regression = ( | |
| output_regression * output_regression_p ** p_to_coord_index + output_regression_flipped * output_regression_p_flipped ** p_to_coord_index) \ | |
| / ( | |
| output_regression_p ** p_to_coord_index + output_regression_p_flipped ** p_to_coord_index + 1e-10) | |
| output_regression_score = (output_regression_p + output_regression_p_flipped) * 0.5 | |
| output_regression = output_regression.numpy() | |
| output_regression_score = output_regression_score.numpy() | |
| else: | |
| NotImplementedError | |
| if self.with_keypoint: | |
| keypoint_result = self.keypoint_head.decode_keypoints( | |
| img_metas, output_regression, output_regression_score, [img_width, img_height]) | |
| result.update(keypoint_result) | |
| if not return_heatmap: | |
| output_heatmap = None | |
| result['output_heatmap'] = output_heatmap | |
| return result | |
| def get_p(self, output_regression_sigma, p_x=0.2): | |
| output_regression_p = (1 - np.exp(-(p_x / output_regression_sigma))) | |
| output_regression_p = output_regression_p[:, :, 0] * output_regression_p[:, :, 1] | |
| output_regression_p = output_regression_p[:, :, None] | |
| return output_regression_p * 0.7 | |
| # 0.2 0.7 7421 | |
| # 0.2 0.7 7610 | |
| # 0.17 0.7 | |
| def forward_dummy(self, img): | |
| """Used for computing network FLOPs. | |
| See ``tools/get_flops.py``. | |
| Args: | |
| img (torch.Tensor): Input image. | |
| Returns: | |
| Tensor: Output heatmaps. | |
| """ | |
| output = self.backbone(img) | |
| if self.with_neck: | |
| output = self.neck(output) | |
| if self.with_keypoint: | |
| img_h, img_w = 256, 192 | |
| img_metas = [{}] | |
| img_metas[0]['batch_input_shape'] = (img_h, img_w) | |
| img_metas[0]['img_shape'] = (img_h, img_w, 3) | |
| # output = self.keypoint_head(output, img_metas) | |
| output = self.keypoint_head(output) | |
| return output | |