Spaces:

Pinwheel
/

GLIP-BLIP-Object-Detection-VQA

Runtime error

App Files Files Community

GLIP-BLIP-Object-Detection-VQA / maskrcnn_benchmark /data /transforms /transforms.py

Pinwheel

HF Demo

128757a about 3 years ago

raw

history blame contribute delete

14.4 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
	import cv2
	import random
	import numpy as np
	import math
	import torch
	import torchvision
	from torchvision.transforms import functional as F

	from maskrcnn_benchmark.structures.bounding_box import BoxList

	def matrix_iou(a, b, relative=False):
	"""
	return iou of a and b, numpy version for data augenmentation
	"""
	lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
	rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])

	area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
	area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
	area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
	if relative:
	ious = area_i / (area_b[:, np.newaxis]+1e-12)
	else:
	ious = area_i / (area_a[:, np.newaxis] + area_b - area_i+1e-12)
	return ious


	class RACompose(object):
	def __init__(self, pre_transforms, rand_transforms, post_transforms, concurrent=2):
	self.preprocess = pre_transforms
	self.transforms = post_transforms
	self.rand_transforms = rand_transforms
	self.concurrent = concurrent

	def __call__(self, image, target):
	for t in self.preprocess:
	image, target = t(image, target)
	for t in random.choices(self.rand_transforms, k=self.concurrent):
	image = np.array(image)
	image, target = t(image, target)
	for t in self.transforms:
	image, target = t(image, target)

	return image, target

	def __repr__(self):
	format_string = self.__class__.__name__ + "("
	for t in self.preprocess:
	format_string += "\n"
	format_string += " {0}".format(t)
	format_string += "\nRandom select {0} from: (".format(self.concurrent)
	for t in self.rand_transforms:
	format_string += "\n"
	format_string += " {0}".format(t)
	format_string += ")\nThen, apply:"
	for t in self.transforms:
	format_string += "\n"
	format_string += " {0}".format(t)
	format_string += "\n)"
	return format_string


	class Compose(object):
	def __init__(self, transforms):
	self.transforms = transforms

	def __call__(self, image, target=None):
	for t in self.transforms:
	image, target = t(image, target)
	if target is None:
	return image
	return image, target

	def __repr__(self):
	format_string = self.__class__.__name__ + "("
	for t in self.transforms:
	format_string += "\n"
	format_string += " {0}".format(t)
	format_string += "\n)"
	return format_string


	class Resize(object):
	def __init__(self, min_size, max_size, restrict=False):
	if not isinstance(min_size, (list, tuple)):
	min_size = (min_size,)
	self.min_size = min_size
	self.max_size = max_size
	self.restrict = restrict

	# modified from torchvision to add support for max size
	def get_size(self, image_size):
	w, h = image_size
	size = random.choice(self.min_size)
	max_size = self.max_size
	if self.restrict:
	return (size, max_size)
	if max_size is not None:
	min_original_size = float(min((w, h)))
	max_original_size = float(max((w, h)))
	if max_original_size / min_original_size * size > max_size:
	size = int(round(max_size * min_original_size / max_original_size))

	if (w <= h and w == size) or (h <= w and h == size):
	return (h, w)

	if w < h:
	ow = size
	oh = int(size * h / w)
	else:
	oh = size
	ow = int(size * w / h)

	return (oh, ow)

	def __call__(self, image, target):
	if isinstance(image, np.ndarray):
	image_size = self.get_size(image.shape[:2])
	image = cv2.resize(image, image_size)
	new_size = image_size
	else:
	image = F.resize(image, self.get_size(image.size))
	new_size = image.size
	if target is not None:
	target = target.resize(new_size)
	return image, target


	class RandomHorizontalFlip(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	if isinstance(image, np.ndarray):
	image = np.fliplr(image)
	else:
	image = F.hflip(image)
	if target is not None:
	target = target.transpose(0)
	return image, target


	class RandomVerticalFlip(object):
	def __init__(self, prob=0.5):
	self.prob = prob

	def __call__(self, image, target):
	if random.random() < self.prob:
	if isinstance(image, np.ndarray):
	image = np.flipud(image)
	else:
	image = F.vflip(image)
	target = target.transpose(1)
	return image, target

	class ToTensor(object):
	def __call__(self, image, target):
	return F.to_tensor(image), target


	class Normalize(object):
	def __init__(self, mean, std, format='rgb'):
	self.mean = mean
	self.std = std
	self.format = format.lower()

	def __call__(self, image, target):
	if 'bgr' in self.format:
	image = image[[2, 1, 0]]
	if '255' in self.format:
	image = image * 255
	image = F.normalize(image, mean=self.mean, std=self.std)
	return image, target


	class ColorJitter(object):
	def __init__(self,
	brightness=0.0,
	contrast=0.0,
	saturation=0.0,
	hue=0.0,
	):
	self.color_jitter = torchvision.transforms.ColorJitter(
	brightness=brightness,
	contrast=contrast,
	saturation=saturation,
	hue=hue,)

	def __call__(self, image, target):
	image = self.color_jitter(image)
	return image, target


	class RandomCrop(object):
	def __init__(self, prob=0.5, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
	# 1: return ori img
	self.prob = prob
	self.sample_mode = (1, *min_ious, 0)
	self.min_crop_size = min_crop_size

	def __call__(self, img, target):
	if random.random() > self.prob:
	return img, target

	h, w, c = img.shape
	boxes = target.bbox.numpy()
	labels = target.get_field('labels')

	while True:
	mode = random.choice(self.sample_mode)
	if mode == 1:
	return img, target

	min_iou = mode

	new_w = random.uniform(self.min_crop_size * w, w)
	new_h = random.uniform(self.min_crop_size * h, h)

	# h / w in [0.5, 2]
	if new_h / new_w < 0.5 or new_h / new_w > 2:
	continue

	left = random.uniform(0, w - new_w)
	top = random.uniform(0, h - new_h)

	patch = np.array([left, top, left + new_w, top + new_h])
	overlaps = matrix_iou(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
	if overlaps.min() < min_iou:
	continue

	# center of boxes should inside the crop img
	center = (boxes[:, :2] + boxes[:, 2:]) / 2
	mask = (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * ( center[:, 1] < patch[3])
	if not mask.any():
	continue

	boxes = boxes[mask]
	labels = labels[mask]

	# adjust boxes
	img = img[int(patch[1]):int(patch[3]), int(patch[0]):int(patch[2])]

	boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
	boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
	boxes -= np.tile(patch[:2], 2)

	new_target = BoxList(boxes, (img.shape[1], img.shape[0]), mode='xyxy')
	new_target.add_field('labels', labels)
	return img, new_target


	class RandomAffine(object):
	def __init__(self, prob=0.5, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2),
	borderValue=(127.5, 127.5, 127.5)):
	self.prob = prob
	self.degrees = degrees
	self.translate = translate
	self.scale = scale
	self.shear = shear
	self.borderValue = borderValue

	def __call__(self, img, targets=None):
	if random.random() > self.prob:
	return img, targets
	# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
	# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4

	border = 0 # width of added border (optional)
	#height = max(img.shape[0], img.shape[1]) + border * 2
	height, width, _ = img.shape
	bbox = targets.bbox

	# Rotation and Scale
	R = np.eye(3)
	a = random.random() * (self.degrees[1] - self.degrees[0]) + self.degrees[0]
	# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
	s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
	R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)

	# Translation
	T = np.eye(3)
	T[0, 2] = (random.random() * 2 - 1) * self.translate[0] * img.shape[0] + border # x translation (pixels)
	T[1, 2] = (random.random() * 2 - 1) * self.translate[1] * img.shape[1] + border # y translation (pixels)

	# Shear
	S = np.eye(3)
	S[0, 1] = math.tan((random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180) # x shear (deg)
	S[1, 0] = math.tan((random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180) # y shear (deg)

	M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
	imw = cv2.warpPerspective(img, M, dsize=(width, height), flags=cv2.INTER_LINEAR,
	borderValue=self.borderValue) # BGR order borderValue

	# Return warped points also
	if targets:
	n = bbox.shape[0]
	points = bbox[:, 0:4]
	area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])

	# warp points
	xy = np.ones((n * 4, 3))
	xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
	xy = (xy @ M.T)[:, :2].reshape(n, 8)

	# create new boxes
	x = xy[:, [0, 2, 4, 6]]
	y = xy[:, [1, 3, 5, 7]]
	xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T

	# apply angle-based reduction
	radians = a * math.pi / 180
	reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
	x = (xy[:, 2] + xy[:, 0]) / 2
	y = (xy[:, 3] + xy[:, 1]) / 2
	w = (xy[:, 2] - xy[:, 0]) * reduction
	h = (xy[:, 3] - xy[:, 1]) * reduction
	xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T

	# reject warped points outside of image
	x1 = np.clip(xy[:,0], 0, width)
	y1 = np.clip(xy[:,1], 0, height)
	x2 = np.clip(xy[:,2], 0, width)
	y2 = np.clip(xy[:,3], 0, height)
	new_bbox = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T
	targets.bbox = torch.as_tensor(new_bbox, dtype=torch.float32)

	return imw, targets


	class RandomErasing:
	def __init__(self, prob=0.5, era_l=0.02, era_h=1/3, min_aspect=0.3,
	mode='const', max_count=1, max_overlap=0.3, max_value=255):
	self.prob = prob
	self.era_l = era_l
	self.era_h = era_h
	self.min_aspect = min_aspect
	self.min_count = 1
	self.max_count = max_count
	self.max_overlap = max_overlap
	self.max_value = max_value
	self.mode = mode.lower()
	assert self.mode in ['const', 'rand', 'pixel'], 'invalid erase mode: %s' % self.mode

	def _get_pixels(self, patch_size):
	if self.mode == 'pixel':
	return np.random.random(patch_size)*self.max_value
	elif self.mode == 'rand':
	return np.random.random((1, 1, patch_size[-1]))*self.max_value
	else:
	return np.zeros((1, 1, patch_size[-1]))

	def __call__(self, image, target):
	if random.random() > self.prob:
	return image, target
	ih, iw, ic = image.shape
	ia = ih * iw
	count = self.min_count if self.min_count == self.max_count else \
	random.randint(self.min_count, self.max_count)
	erase_boxes = []
	for _ in range(count):
	for try_idx in range(10):
	erase_area = random.uniform(self.era_l, self.era_h) * ia / count
	aspect_ratio = math.exp(random.uniform(math.log(self.min_aspect), math.log(1/self.min_aspect)))
	eh = int(round(math.sqrt(erase_area * aspect_ratio)))
	ew = int(round(math.sqrt(erase_area / aspect_ratio)))
	if eh < ih and ew < iw:
	x = random.randint(0, iw - ew)
	y = random.randint(0, ih - eh)
	image[y:y+eh, x:x+ew, :] = self._get_pixels((eh, ew, ic))
	erase_boxes.append([x,y,x+ew,y+eh])
	break

	if target is not None and len(erase_boxes)>0:
	boxes = target.bbox.numpy()
	labels = target.get_field('labels')
	overlap = matrix_iou(np.array(erase_boxes), boxes, relative=True)
	mask = overlap.max(axis=0)<self.max_overlap
	boxes = boxes[mask]
	labels = labels[mask]
	target.bbox = torch.as_tensor(boxes, dtype=torch.float32)
	target.add_field('labels', labels)

	return image, target