Spaces:

An-619
/

FastSAM

Runtime error

App Files Files Community

FastSAM / ultralytics /tracker /utils /gmc.py

zxairdeep

Upload 161 files

e5dd705 verified over 1 year ago

raw

history blame

12.2 kB

	# Ultralytics YOLO 🚀, AGPL-3.0 license

	import copy

	import cv2
	import numpy as np

	from ultralytics.yolo.utils import LOGGER


	class GMC:

	def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):
	"""Initialize a video tracker with specified parameters."""
	super().__init__()

	self.method = method
	self.downscale = max(1, int(downscale))

	if self.method == 'orb':
	self.detector = cv2.FastFeatureDetector_create(20)
	self.extractor = cv2.ORB_create()
	self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)

	elif self.method == 'sift':
	self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
	self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
	self.matcher = cv2.BFMatcher(cv2.NORM_L2)

	elif self.method == 'ecc':
	number_of_iterations = 5000
	termination_eps = 1e-6
	self.warp_mode = cv2.MOTION_EUCLIDEAN
	self.criteria = (cv2.TERM_CRITERIA_EPS \| cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)

	elif self.method == 'sparseOptFlow':
	self.feature_params = dict(maxCorners=1000,
	qualityLevel=0.01,
	minDistance=1,
	blockSize=3,
	useHarrisDetector=False,
	k=0.04)
	# self.gmc_file = open('GMC_results.txt', 'w')

	elif self.method in ['file', 'files']:
	seqName = verbose[0]
	ablation = verbose[1]
	if ablation:
	filePath = r'tracker/GMC_files/MOT17_ablation'
	else:
	filePath = r'tracker/GMC_files/MOTChallenge'

	if '-FRCNN' in seqName:
	seqName = seqName[:-6]
	elif '-DPM' in seqName or '-SDP' in seqName:
	seqName = seqName[:-4]
	self.gmcFile = open(f'{filePath}/GMC-{seqName}.txt')

	if self.gmcFile is None:
	raise ValueError(f'Error: Unable to open GMC file in directory:{filePath}')
	elif self.method in ['none', 'None']:
	self.method = 'none'
	else:
	raise ValueError(f'Error: Unknown CMC method:{method}')

	self.prevFrame = None
	self.prevKeyPoints = None
	self.prevDescriptors = None

	self.initializedFirstFrame = False

	def apply(self, raw_frame, detections=None):
	"""Apply object detection on a raw frame using specified method."""
	if self.method in ['orb', 'sift']:
	return self.applyFeatures(raw_frame, detections)
	elif self.method == 'ecc':
	return self.applyEcc(raw_frame, detections)
	elif self.method == 'sparseOptFlow':
	return self.applySparseOptFlow(raw_frame, detections)
	elif self.method == 'file':
	return self.applyFile(raw_frame, detections)
	elif self.method == 'none':
	return np.eye(2, 3)
	else:
	return np.eye(2, 3)

	def applyEcc(self, raw_frame, detections=None):
	"""Initialize."""
	height, width, _ = raw_frame.shape
	frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
	H = np.eye(2, 3, dtype=np.float32)

	# Downscale image (TODO: consider using pyramids)
	if self.downscale > 1.0:
	frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
	frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
	width = width // self.downscale
	height = height // self.downscale

	# Handle first frame
	if not self.initializedFirstFrame:
	# Initialize data
	self.prevFrame = frame.copy()

	# Initialization done
	self.initializedFirstFrame = True

	return H

	# Run the ECC algorithm. The results are stored in warp_matrix.
	# (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
	try:
	(cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
	except Exception as e:
	LOGGER.warning(f'WARNING: find transform failed. Set warp as identity {e}')

	return H

	def applyFeatures(self, raw_frame, detections=None):
	"""Initialize."""
	height, width, _ = raw_frame.shape
	frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
	H = np.eye(2, 3)

	# Downscale image (TODO: consider using pyramids)
	if self.downscale > 1.0:
	# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
	frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
	width = width // self.downscale
	height = height // self.downscale

	# Find the keypoints
	mask = np.zeros_like(frame)
	# mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255
	mask[int(0.02 * height):int(0.98 * height), int(0.02 * width):int(0.98 * width)] = 255
	if detections is not None:
	for det in detections:
	tlbr = (det[:4] / self.downscale).astype(np.int_)
	mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0

	keypoints = self.detector.detect(frame, mask)

	# Compute the descriptors
	keypoints, descriptors = self.extractor.compute(frame, keypoints)

	# Handle first frame
	if not self.initializedFirstFrame:
	# Initialize data
	self.prevFrame = frame.copy()
	self.prevKeyPoints = copy.copy(keypoints)
	self.prevDescriptors = copy.copy(descriptors)

	# Initialization done
	self.initializedFirstFrame = True

	return H

	# Match descriptors.
	knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)

	# Filtered matches based on smallest spatial distance
	matches = []
	spatialDistances = []

	maxSpatialDistance = 0.25 * np.array([width, height])

	# Handle empty matches case
	if len(knnMatches) == 0:
	# Store to next iteration
	self.prevFrame = frame.copy()
	self.prevKeyPoints = copy.copy(keypoints)
	self.prevDescriptors = copy.copy(descriptors)

	return H

	for m, n in knnMatches:
	if m.distance < 0.9 * n.distance:
	prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
	currKeyPointLocation = keypoints[m.trainIdx].pt

	spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
	prevKeyPointLocation[1] - currKeyPointLocation[1])

	if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \
	(np.abs(spatialDistance[1]) < maxSpatialDistance[1]):
	spatialDistances.append(spatialDistance)
	matches.append(m)

	meanSpatialDistances = np.mean(spatialDistances, 0)
	stdSpatialDistances = np.std(spatialDistances, 0)

	inliers = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances

	goodMatches = []
	prevPoints = []
	currPoints = []
	for i in range(len(matches)):
	if inliers[i, 0] and inliers[i, 1]:
	goodMatches.append(matches[i])
	prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
	currPoints.append(keypoints[matches[i].trainIdx].pt)

	prevPoints = np.array(prevPoints)
	currPoints = np.array(currPoints)

	# Draw the keypoint matches on the output image
	# if False:
	# import matplotlib.pyplot as plt
	# matches_img = np.hstack((self.prevFrame, frame))
	# matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
	# W = np.size(self.prevFrame, 1)
	# for m in goodMatches:
	# prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
	# curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
	# curr_pt[0] += W
	# color = np.random.randint(0, 255, 3)
	# color = (int(color[0]), int(color[1]), int(color[2]))
	#
	# matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
	# matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
	# matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
	#
	# plt.figure()
	# plt.imshow(matches_img)
	# plt.show()

	# Find rigid matrix
	if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
	H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

	# Handle downscale
	if self.downscale > 1.0:
	H[0, 2] *= self.downscale
	H[1, 2] *= self.downscale
	else:
	LOGGER.warning('WARNING: not enough matching points')

	# Store to next iteration
	self.prevFrame = frame.copy()
	self.prevKeyPoints = copy.copy(keypoints)
	self.prevDescriptors = copy.copy(descriptors)

	return H

	def applySparseOptFlow(self, raw_frame, detections=None):
	"""Initialize."""
	# t0 = time.time()
	height, width, _ = raw_frame.shape
	frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
	H = np.eye(2, 3)

	# Downscale image
	if self.downscale > 1.0:
	# frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
	frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))

	# Find the keypoints
	keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)

	# Handle first frame
	if not self.initializedFirstFrame:
	# Initialize data
	self.prevFrame = frame.copy()
	self.prevKeyPoints = copy.copy(keypoints)

	# Initialization done
	self.initializedFirstFrame = True

	return H

	# Find correspondences
	matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)

	# Leave good correspondences only
	prevPoints = []
	currPoints = []

	for i in range(len(status)):
	if status[i]:
	prevPoints.append(self.prevKeyPoints[i])
	currPoints.append(matchedKeypoints[i])

	prevPoints = np.array(prevPoints)
	currPoints = np.array(currPoints)

	# Find rigid matrix
	if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
	H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

	# Handle downscale
	if self.downscale > 1.0:
	H[0, 2] *= self.downscale
	H[1, 2] *= self.downscale
	else:
	LOGGER.warning('WARNING: not enough matching points')

	# Store to next iteration
	self.prevFrame = frame.copy()
	self.prevKeyPoints = copy.copy(keypoints)

	# gmc_line = str(1000 * (time.time() - t0)) + "\t" + str(H[0, 0]) + "\t" + str(H[0, 1]) + "\t" + str(
	# H[0, 2]) + "\t" + str(H[1, 0]) + "\t" + str(H[1, 1]) + "\t" + str(H[1, 2]) + "\n"
	# self.gmc_file.write(gmc_line)

	return H

	def applyFile(self, raw_frame, detections=None):
	"""Return the homography matrix based on the GCPs in the next line of the input GMC file."""
	line = self.gmcFile.readline()
	tokens = line.split('\t')
	H = np.eye(2, 3, dtype=np.float_)
	H[0, 0] = float(tokens[1])
	H[0, 1] = float(tokens[2])
	H[0, 2] = float(tokens[3])
	H[1, 0] = float(tokens[4])
	H[1, 1] = float(tokens[5])
	H[1, 2] = float(tokens[6])

	return H