Spaces:
Runtime error
Runtime error
| # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. | |
| import cv2 | |
| import random | |
| import numpy as np | |
| import math | |
| import torch | |
| import torchvision | |
| from torchvision.transforms import functional as F | |
| from maskrcnn_benchmark.structures.bounding_box import BoxList | |
| def matrix_iou(a, b, relative=False): | |
| """ | |
| return iou of a and b, numpy version for data augenmentation | |
| """ | |
| lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) | |
| rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) | |
| area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) | |
| area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) | |
| area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) | |
| if relative: | |
| ious = area_i / (area_b[:, np.newaxis]+1e-12) | |
| else: | |
| ious = area_i / (area_a[:, np.newaxis] + area_b - area_i+1e-12) | |
| return ious | |
| class RACompose(object): | |
| def __init__(self, pre_transforms, rand_transforms, post_transforms, concurrent=2): | |
| self.preprocess = pre_transforms | |
| self.transforms = post_transforms | |
| self.rand_transforms = rand_transforms | |
| self.concurrent = concurrent | |
| def __call__(self, image, target): | |
| for t in self.preprocess: | |
| image, target = t(image, target) | |
| for t in random.choices(self.rand_transforms, k=self.concurrent): | |
| image = np.array(image) | |
| image, target = t(image, target) | |
| for t in self.transforms: | |
| image, target = t(image, target) | |
| return image, target | |
| def __repr__(self): | |
| format_string = self.__class__.__name__ + "(" | |
| for t in self.preprocess: | |
| format_string += "\n" | |
| format_string += " {0}".format(t) | |
| format_string += "\nRandom select {0} from: (".format(self.concurrent) | |
| for t in self.rand_transforms: | |
| format_string += "\n" | |
| format_string += " {0}".format(t) | |
| format_string += ")\nThen, apply:" | |
| for t in self.transforms: | |
| format_string += "\n" | |
| format_string += " {0}".format(t) | |
| format_string += "\n)" | |
| return format_string | |
| class Compose(object): | |
| def __init__(self, transforms): | |
| self.transforms = transforms | |
| def __call__(self, image, target=None): | |
| for t in self.transforms: | |
| image, target = t(image, target) | |
| if target is None: | |
| return image | |
| return image, target | |
| def __repr__(self): | |
| format_string = self.__class__.__name__ + "(" | |
| for t in self.transforms: | |
| format_string += "\n" | |
| format_string += " {0}".format(t) | |
| format_string += "\n)" | |
| return format_string | |
| class Resize(object): | |
| def __init__(self, min_size, max_size, restrict=False): | |
| if not isinstance(min_size, (list, tuple)): | |
| min_size = (min_size,) | |
| self.min_size = min_size | |
| self.max_size = max_size | |
| self.restrict = restrict | |
| # modified from torchvision to add support for max size | |
| def get_size(self, image_size): | |
| w, h = image_size | |
| size = random.choice(self.min_size) | |
| max_size = self.max_size | |
| if self.restrict: | |
| return (size, max_size) | |
| if max_size is not None: | |
| min_original_size = float(min((w, h))) | |
| max_original_size = float(max((w, h))) | |
| if max_original_size / min_original_size * size > max_size: | |
| size = int(round(max_size * min_original_size / max_original_size)) | |
| if (w <= h and w == size) or (h <= w and h == size): | |
| return (h, w) | |
| if w < h: | |
| ow = size | |
| oh = int(size * h / w) | |
| else: | |
| oh = size | |
| ow = int(size * w / h) | |
| return (oh, ow) | |
| def __call__(self, image, target): | |
| if isinstance(image, np.ndarray): | |
| image_size = self.get_size(image.shape[:2]) | |
| image = cv2.resize(image, image_size) | |
| new_size = image_size | |
| else: | |
| image = F.resize(image, self.get_size(image.size)) | |
| new_size = image.size | |
| if target is not None: | |
| target = target.resize(new_size) | |
| return image, target | |
| class RandomHorizontalFlip(object): | |
| def __init__(self, prob=0.5): | |
| self.prob = prob | |
| def __call__(self, image, target): | |
| if random.random() < self.prob: | |
| if isinstance(image, np.ndarray): | |
| image = np.fliplr(image) | |
| else: | |
| image = F.hflip(image) | |
| if target is not None: | |
| target = target.transpose(0) | |
| return image, target | |
| class RandomVerticalFlip(object): | |
| def __init__(self, prob=0.5): | |
| self.prob = prob | |
| def __call__(self, image, target): | |
| if random.random() < self.prob: | |
| if isinstance(image, np.ndarray): | |
| image = np.flipud(image) | |
| else: | |
| image = F.vflip(image) | |
| target = target.transpose(1) | |
| return image, target | |
| class ToTensor(object): | |
| def __call__(self, image, target): | |
| return F.to_tensor(image), target | |
| class Normalize(object): | |
| def __init__(self, mean, std, format='rgb'): | |
| self.mean = mean | |
| self.std = std | |
| self.format = format.lower() | |
| def __call__(self, image, target): | |
| if 'bgr' in self.format: | |
| image = image[[2, 1, 0]] | |
| if '255' in self.format: | |
| image = image * 255 | |
| image = F.normalize(image, mean=self.mean, std=self.std) | |
| return image, target | |
| class ColorJitter(object): | |
| def __init__(self, | |
| brightness=0.0, | |
| contrast=0.0, | |
| saturation=0.0, | |
| hue=0.0, | |
| ): | |
| self.color_jitter = torchvision.transforms.ColorJitter( | |
| brightness=brightness, | |
| contrast=contrast, | |
| saturation=saturation, | |
| hue=hue,) | |
| def __call__(self, image, target): | |
| image = self.color_jitter(image) | |
| return image, target | |
| class RandomCrop(object): | |
| def __init__(self, prob=0.5, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3): | |
| # 1: return ori img | |
| self.prob = prob | |
| self.sample_mode = (1, *min_ious, 0) | |
| self.min_crop_size = min_crop_size | |
| def __call__(self, img, target): | |
| if random.random() > self.prob: | |
| return img, target | |
| h, w, c = img.shape | |
| boxes = target.bbox.numpy() | |
| labels = target.get_field('labels') | |
| while True: | |
| mode = random.choice(self.sample_mode) | |
| if mode == 1: | |
| return img, target | |
| min_iou = mode | |
| new_w = random.uniform(self.min_crop_size * w, w) | |
| new_h = random.uniform(self.min_crop_size * h, h) | |
| # h / w in [0.5, 2] | |
| if new_h / new_w < 0.5 or new_h / new_w > 2: | |
| continue | |
| left = random.uniform(0, w - new_w) | |
| top = random.uniform(0, h - new_h) | |
| patch = np.array([left, top, left + new_w, top + new_h]) | |
| overlaps = matrix_iou(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) | |
| if overlaps.min() < min_iou: | |
| continue | |
| # center of boxes should inside the crop img | |
| center = (boxes[:, :2] + boxes[:, 2:]) / 2 | |
| mask = (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( center[:, 1] < patch[3]) | |
| if not mask.any(): | |
| continue | |
| boxes = boxes[mask] | |
| labels = labels[mask] | |
| # adjust boxes | |
| img = img[int(patch[1]):int(patch[3]), int(patch[0]):int(patch[2])] | |
| boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) | |
| boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) | |
| boxes -= np.tile(patch[:2], 2) | |
| new_target = BoxList(boxes, (img.shape[1], img.shape[0]), mode='xyxy') | |
| new_target.add_field('labels', labels) | |
| return img, new_target | |
| class RandomAffine(object): | |
| def __init__(self, prob=0.5, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), | |
| borderValue=(127.5, 127.5, 127.5)): | |
| self.prob = prob | |
| self.degrees = degrees | |
| self.translate = translate | |
| self.scale = scale | |
| self.shear = shear | |
| self.borderValue = borderValue | |
| def __call__(self, img, targets=None): | |
| if random.random() > self.prob: | |
| return img, targets | |
| # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) | |
| # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 | |
| border = 0 # width of added border (optional) | |
| #height = max(img.shape[0], img.shape[1]) + border * 2 | |
| height, width, _ = img.shape | |
| bbox = targets.bbox | |
| # Rotation and Scale | |
| R = np.eye(3) | |
| a = random.random() * (self.degrees[1] - self.degrees[0]) + self.degrees[0] | |
| # a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations | |
| s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0] | |
| R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) | |
| # Translation | |
| T = np.eye(3) | |
| T[0, 2] = (random.random() * 2 - 1) * self.translate[0] * img.shape[0] + border # x translation (pixels) | |
| T[1, 2] = (random.random() * 2 - 1) * self.translate[1] * img.shape[1] + border # y translation (pixels) | |
| # Shear | |
| S = np.eye(3) | |
| S[0, 1] = math.tan((random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180) # x shear (deg) | |
| S[1, 0] = math.tan((random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180) # y shear (deg) | |
| M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! | |
| imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, | |
| borderValue=self.borderValue) # BGR order borderValue | |
| # Return warped points also | |
| if targets: | |
| n = bbox.shape[0] | |
| points = bbox[:, 0:4] | |
| area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) | |
| # warp points | |
| xy = np.ones((n * 4, 3)) | |
| xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 | |
| xy = (xy @ M.T)[:, :2].reshape(n, 8) | |
| # create new boxes | |
| x = xy[:, [0, 2, 4, 6]] | |
| y = xy[:, [1, 3, 5, 7]] | |
| xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T | |
| # apply angle-based reduction | |
| radians = a * math.pi / 180 | |
| reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 | |
| x = (xy[:, 2] + xy[:, 0]) / 2 | |
| y = (xy[:, 3] + xy[:, 1]) / 2 | |
| w = (xy[:, 2] - xy[:, 0]) * reduction | |
| h = (xy[:, 3] - xy[:, 1]) * reduction | |
| xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T | |
| # reject warped points outside of image | |
| x1 = np.clip(xy[:,0], 0, width) | |
| y1 = np.clip(xy[:,1], 0, height) | |
| x2 = np.clip(xy[:,2], 0, width) | |
| y2 = np.clip(xy[:,3], 0, height) | |
| new_bbox = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T | |
| targets.bbox = torch.as_tensor(new_bbox, dtype=torch.float32) | |
| return imw, targets | |
| class RandomErasing: | |
| def __init__(self, prob=0.5, era_l=0.02, era_h=1/3, min_aspect=0.3, | |
| mode='const', max_count=1, max_overlap=0.3, max_value=255): | |
| self.prob = prob | |
| self.era_l = era_l | |
| self.era_h = era_h | |
| self.min_aspect = min_aspect | |
| self.min_count = 1 | |
| self.max_count = max_count | |
| self.max_overlap = max_overlap | |
| self.max_value = max_value | |
| self.mode = mode.lower() | |
| assert self.mode in ['const', 'rand', 'pixel'], 'invalid erase mode: %s' % self.mode | |
| def _get_pixels(self, patch_size): | |
| if self.mode == 'pixel': | |
| return np.random.random(patch_size)*self.max_value | |
| elif self.mode == 'rand': | |
| return np.random.random((1, 1, patch_size[-1]))*self.max_value | |
| else: | |
| return np.zeros((1, 1, patch_size[-1])) | |
| def __call__(self, image, target): | |
| if random.random() > self.prob: | |
| return image, target | |
| ih, iw, ic = image.shape | |
| ia = ih * iw | |
| count = self.min_count if self.min_count == self.max_count else \ | |
| random.randint(self.min_count, self.max_count) | |
| erase_boxes = [] | |
| for _ in range(count): | |
| for try_idx in range(10): | |
| erase_area = random.uniform(self.era_l, self.era_h) * ia / count | |
| aspect_ratio = math.exp(random.uniform(math.log(self.min_aspect), math.log(1/self.min_aspect))) | |
| eh = int(round(math.sqrt(erase_area * aspect_ratio))) | |
| ew = int(round(math.sqrt(erase_area / aspect_ratio))) | |
| if eh < ih and ew < iw: | |
| x = random.randint(0, iw - ew) | |
| y = random.randint(0, ih - eh) | |
| image[y:y+eh, x:x+ew, :] = self._get_pixels((eh, ew, ic)) | |
| erase_boxes.append([x,y,x+ew,y+eh]) | |
| break | |
| if target is not None and len(erase_boxes)>0: | |
| boxes = target.bbox.numpy() | |
| labels = target.get_field('labels') | |
| overlap = matrix_iou(np.array(erase_boxes), boxes, relative=True) | |
| mask = overlap.max(axis=0)<self.max_overlap | |
| boxes = boxes[mask] | |
| labels = labels[mask] | |
| target.bbox = torch.as_tensor(boxes, dtype=torch.float32) | |
| target.add_field('labels', labels) | |
| return image, target | |