Spaces:

hanquansanren
/

DvD

Sleeping

App Files Files Community

DvD / utils_data /io.py

hanquansanren

Add application file

05fb4ab about 1 month ago

raw

history blame contribute delete

12.5 kB

	#!/usr/bin/env python3.7

	import os
	import re
	import sys
	import uuid

	import imageio
	import numpy as np
	import h5py
	import cv2
	# from scipy.io import loadmat
	# import hdf5storage as h5
	import torch
	import torch.nn.functional as F
	import random

	# import kornia.augmentation as KA
	# import kornia.geometry.transform as KG

	def tight_crop(img, mask, bm): # [512,512,3]unit8 [512,512]unit8 [448,448,2] float64
	# msk=((img[:,:,0]!=0)&(img[:,:,1]!=0)&(img[:,:,2]!=0)).astype(np.uint8)
	size=mask.shape
	[y, x] = (mask[:,:,0]).nonzero()
	minx = min(x)
	maxx = max(x)
	miny = min(y)
	maxy = max(y)
	img = img[miny : maxy + 1, minx : maxx + 1, :]
	mask = mask[miny : maxy + 1, minx : maxx + 1, :]

	# hw_rate = (maxy-miny)/(maxx-minx) # 不需要考虑长宽比，因为测试时都是裁剪好的图片

	s = 45
	img = np.pad(img, ((s, s), (s, s), (0, 0)), 'constant')
	mask = np.pad(mask, ((s, s), (s, s), (0, 0)), 'constant')
	cx1 = random.randint(5, s - 5)
	cx2 = random.randint(5, s - 5) + 1
	cy1 = random.randint(5, s - 5)
	cy2 = random.randint(5, s - 5) + 1

	img = img[cy1 : -cy2, cx1 : -cx2, :]
	mask = mask[cy1 : -cy2, cx1 : -cx2, :]
	t=miny-s+cy1
	b=size[0]-maxy-s+cy2
	l=minx-s+cx1
	r=size[1]-maxx-s+cx2


	bm[:,:,1]=bm[:,:,1]-t
	bm[:,:,0]=bm[:,:,0]-l
	bm=511*bm/np.array([511.0-l-r, 511.0-t-b]) # 0~1
	# bm0=cv2.resize(bm[:,:,0],(512,512))
	# bm1=cv2.resize(bm[:,:,1],(512,512))
	# bm=np.stack([bm0,bm1],axis=-1)

	return img, mask, bm

	# 这是一个用于裁剪图片的函数，图片中间是一个拍照文档，现有的函数
	# 因为使用了“img[miny : maxy + 1, minx : maxx + 1, :]” 背景被过度裁剪了，我想在裁剪后保留完整的背景，如何修改函数
	def tight_crop_new(img, mask, bm):
	# img [512,512,3]unit8
	# mask [512,512]unit8
	# bm [448,448,2] float64

	size = mask.shape
	[y, x] = (mask[:, :, 0]).nonzero()
	minx = min(x)
	maxx = max(x)
	miny = min(y)
	maxy = max(y)

	# # 为了保留背景，直接操作原图，不裁剪图像尺寸
	# new_img = img.copy()
	# new_mask = mask.copy()

	# 随机添加边界内偏移（确保不超出图像边界）
	offset = 25
	cx1 = random.randint(5, offset)
	cx2 = random.randint(5, offset)
	cy1 = random.randint(5, offset)
	cy2 = random.randint(5, offset)

	# 调整裁剪范围并保持图像背景完整
	final_minx = max(0, minx - cx1)
	final_maxx = min(size[1], maxx + cx2)
	final_miny = max(0, miny - cy1)
	final_maxy = min(size[0], maxy + cy2)

	# 裁剪出包含文档的区域，但保留背景尺寸
	cropped_img = img[final_miny:final_maxy, final_minx:final_maxx, :]
	cropped_mask = mask[final_miny:final_maxy, final_minx:final_maxx, :]

	# 更新 bm 的坐标
	t = final_miny
	b = size[0] - final_maxy
	l = final_minx
	r = size[1] - final_maxx

	bm[:, :, 1] = bm[:, :, 1] - t
	bm[:, :, 0] = bm[:, :, 0] - l
	bm = 511 * bm / np.array([511.0 - l - r, 511.0 - t - b]) # 0~1

	return cropped_img, cropped_mask/255., bm


	def augmentation(img, mask, bm, bg=None): # [512,512,3]unit8 [512,512,1]unit8 [448,448,2] float64 [512,512,3] unit8
	# tight crop
	img, mask, bm = tight_crop_new(img, mask, bm)
	# replace bg
	[fh, fw, _] = img.shape
	chance=random.random()
	# chance = 0.25
	if chance > 0.3:
	bg = cv2.resize(bg, (200, 200))
	bg = np.tile(bg, (3, 3, 1)) # (600, 600, 3)
	bg = bg[: fh, : fw, :]
	msk = mask
	elif chance < 0.3 and chance> 0.2:
	c = np.array([random.random(), random.random(), random.random()])
	bg = np.ones((fh, fw, 3)) * c
	msk = mask
	# cv2.imwrite("vis_hp/debug_vis/tex2.png", bg)
	else:
	bg=np.zeros((fh, fw, 3))
	msk=np.ones((fh, fw, 3))
	img = bg * (1 - msk) + img * msk
	# cv2.imwrite("vis_hp/debug_vis/replace.png", img)
	mask = cv2.resize(mask, (512, 512))
	img = cv2.resize(img, (512, 512))
	# msk=((bm[:,:,0]!=0)&(bm[:,:,1]!=0)&(bm[:,:,2]!=0)).astype(np.uint8)
	return img, mask, bm





	# Argument parsing
	def boolean_string(s):
	if s not in {'False', 'True'}:
	raise ValueError('Not a valid boolean string')
	return s == 'True'


	def read(file):
	if file.endswith('.float3'): return readFloat(file)
	elif file.endswith('.flo'): return readFlow(file)
	elif file.endswith('.ppm'): return readImage(file)
	elif file.endswith('.pgm'): return readImage(file)
	elif file.endswith('.png'): return readImage(file)
	elif file.endswith('.jpg'): return readImage(file)
	elif file.endswith('.pfm'): return readPFM(file)[0]
	else: raise Exception('don\'t know how to read %s' % file)


	def write(file, data):
	if file.endswith('.float3'): return writeFloat(file, data)
	elif file.endswith('.flo'): return writeFlow(file, data)
	elif file.endswith('.ppm'): return writeImage(file, data)
	elif file.endswith('.pgm'): return writeImage(file, data)
	elif file.endswith('.png'): return writeImage(file, data)
	elif file.endswith('.jpg'): return writeImage(file, data)
	elif file.endswith('.pfm'): return writePFM(file, data)
	else: raise Exception('don\'t know how to write %s' % file)

	def load_gt_flow_npz(bm_path):
	# # bm = np.transpose(h5py.File(bm_path,'r',libver='latest', swmr=True)["bm"])
	# try:
	# bm = h5.loadmat(bm_path)['bm'] # (1024, 1024, 2) from 0~1024
	# except:
	# print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
	# print(bm_path)
	# bm = (bm/np.array([1024,1024])).astype(np.float32) # (1024, 1024, 2) from 0~1
	# bm[:,:,0] = bm[:,:,0]*512 # (1024, 1024, 2) from 0~512
	# bm[:,:,1] = bm[:,:,1]*384
	# bm = torch.from_numpy(bm.transpose(2,0,1)).unsqueeze(0) # (1,2,384,512)
	# bm = F.interpolate(bm,size=(384,512),mode='bilinear',
	# align_corners=True) # (1,2,384,512)
	try:
	bm = np.load(bm_path)['warped_BM'][:447,:447,:]*511 + 0.4# (448, 448, 2) range[0-1] # 先y后x，行序优先
	# bm[:,:,0] = bm[:,:,0]*447 # (448, 448, 2) from 0~448
	# bm[:,:,1] = bm[:,:,1]*447
	bm0=cv2.resize(bm[:,:,0],(512,512))
	bm1=cv2.resize(bm[:,:,1],(512,512))
	bm=np.stack([bm0,bm1],axis=-1)
	bm = np.roll(bm, shift=1, axis=-1) # # 先x后y，行序优先, 绝对位置bm
	# bm = bm.transpose((2,0,1))
	except:
	print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
	print(bm_path)
	# bm = (bm/np.array([1024,1024])).astype(np.float32) # (1024, 1024, 2) from 0~1
	# bm[:,:,0] = bm[:,:,0]*520 # (1024, 1024, 2) from 0~512
	# bm[:,:,1] = bm[:,:,1]*520
	# bm = torch.from_numpy(bm.transpose(2,0,1)).unsqueeze(0) # [1, 2, 1024, 1024]
	# bm = F.interpolate(bm,size=(384,512),mode='bilinear',
	# align_corners=True) # (1,2,384,512)
	return bm

	def load_gt_flow_mat(bm_path):
	try:
	# bm = h5.loadmat(bm_path)['bm']# (448, 448, 2) range[0-1] # 先y后x，行序优先
	with h5py.File(bm_path, 'r') as f:
	bm = f['bm'][:].transpose((2,1,0))[:447,:447,:]*(511/447) - 1.2 # (447, 447, 2)
	bm0=cv2.resize(bm[:,:,0],(512,512))
	bm1=cv2.resize(bm[:,:,1],(512,512))
	bm=np.stack([bm0,bm1],axis=-1)
	# bm[:,:,0] = bm[:,:,0]*448 # (448, 448, 2) from 0~448
	# bm[:,:,1] = bm[:,:,1]*448
	# bm = np.roll(bm, shift=1, axis=-1)
	except:
	print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
	print(bm_path)
	return bm # 先x后y，行序优先, 绝对位置bm (448, 448, 2) from 0~448




	def load_flo(path):
	with open(path, 'rb') as f:
	magic = np.fromfile(f, np.float32, count=1)
	assert(202021.25 == magic),'Magic number incorrect. Invalid .flo file'
	w = np.fromfile(f, np.int32, count=1)[0]
	h = np.fromfile(f, np.int32, count=1)[0]
	data = np.fromfile(f, np.float32, count=2wh)

	# Reshape data into 3D array (columns, rows, bands)
	data2D = np.resize(data, (h, w, 2))
	return data2D


	def readPFM(file):
	file = open(file, 'rb')

	color = None
	width = None
	height = None
	scale = None
	endian = None

	header = file.readline().rstrip()
	if header.decode("ascii") == 'PF':
	color = True
	elif header.decode("ascii") == 'Pf':
	color = False
	else:
	raise Exception('Not a PFM file.')

	dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii"))
	if dim_match:
	width, height = list(map(int, dim_match.groups()))
	else:
	raise Exception('Malformed PFM header.')

	scale = float(file.readline().decode("ascii").rstrip())
	if scale < 0: # little-endian
	endian = '<'
	scale = -scale
	else:
	endian = '>' # big-endian

	data = np.fromfile(file, endian + 'f')
	shape = (height, width, 3) if color else (height, width)

	data = np.reshape(data, shape)
	data = np.flipud(data)
	return data, scale


	def writePFM(file, image, scale=1):
	file = open(file, 'wb')

	color = None

	if image.dtype.name != 'float32':
	raise Exception('Image dtype must be float32.')

	image = np.flipud(image)

	if len(image.shape) == 3 and image.shape[2] == 3: # color image
	color = True
	elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
	color = False
	else:
	raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')

	file.write('PF\n' if color else 'Pf\n'.encode())
	file.write('%d %d\n'.encode() % (image.shape[1], image.shape[0]))

	endian = image.dtype.byteorder

	if endian == '<' or endian == '=' and sys.byteorder == 'little':
	scale = -scale

	file.write('%f\n'.encode() % scale)

	image.tofile(file)


	def readFlow(path):
	with open(path, 'rb') as f:
	magic = np.fromfile(f, np.float32, count=1)
	assert(202021.25 == magic),'Magic number incorrect. Invalid .flo file'
	w = np.fromfile(f, np.int32, count=1)[0]
	h = np.fromfile(f, np.int32, count=1)[0]
	data = np.fromfile(f, np.float32, count=2wh)

	# Reshape data into 3D array (columns, rows, bands)
	data2D = np.resize(data, (h, w, 2))
	return data2D.astype(np.float32)


	def readImage(name):
	if name.endswith('.pfm') or name.endswith('.PFM'):
	data = readPFM(name)[0]
	if len(data.shape)==3:
	return data[:,:,0:3]
	else:
	return data

	return imageio.imread(name)


	def writeImage(name, data):
	if name.endswith('.pfm') or name.endswith('.PFM'):
	return writePFM(name, data, 1)

	return imageio.imwrite(name, data)


	def writeFlow(flow, name_to_save, save_dir):
	name=os.path.join(save_dir, name_to_save)
	f = open(name, 'wb')
	magic=202021.25
	np.array([magic], dtype=np.float32).tofile(f)
	np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f)
	flow = flow.astype(np.float32)
	flow.tofile(f)


	def writeMask(mask, name_to_save, save_dir):
	name = os.path.join(save_dir, name_to_save)
	mask = mask.astype(np.uint8)
	if mask.max() != 255:
	mask *= 255
	imageio.imwrite(name, mask.astype(np.uint8))


	def readFloat(name):
	f = open(name, 'rb')

	if(f.readline().decode("utf-8")) != 'float\n':
	raise Exception('float file %s did not contain <float> keyword' % name)

	dim = int(f.readline())

	dims = []
	count = 1
	for i in range(0, dim):
	d = int(f.readline())
	dims.append(d)
	count *= d

	dims = list(reversed(dims))

	data = np.fromfile(f, np.float32, count).reshape(dims)
	if dim > 2:
	data = np.transpose(data, (2, 1, 0))
	data = np.transpose(data, (1, 0, 2))

	return data


	def writeFloat(name, data):
	f = open(name, 'wb')

	dim=len(data.shape)
	if dim>3:
	raise Exception('bad float file dimension: %d' % dim)

	f.write(('float\n').encode('ascii'))
	f.write(('%d\n' % dim).encode('ascii'))

	if dim == 1:
	f.write(('%d\n' % data.shape[0]).encode('ascii'))
	else:
	f.write(('%d\n' % data.shape[1]).encode('ascii'))
	f.write(('%d\n' % data.shape[0]).encode('ascii'))
	for i in range(2, dim):
	f.write(('%d\n' % data.shape[i]).encode('ascii'))

	data = data.astype(np.float32)
	if dim==2:
	data.tofile(f)

	else:
	np.transpose(data, (2, 0, 1)).tofile(f)