DvD / utils_data /io.py
hanquansanren's picture
Add application file
05fb4ab
#!/usr/bin/env python3.7
import os
import re
import sys
import uuid
import imageio
import numpy as np
import h5py
import cv2
# from scipy.io import loadmat
# import hdf5storage as h5
import torch
import torch.nn.functional as F
import random
# import kornia.augmentation as KA
# import kornia.geometry.transform as KG
def tight_crop(img, mask, bm): # [512,512,3]unit8 [512,512]unit8 [448,448,2] float64
# msk=((img[:,:,0]!=0)&(img[:,:,1]!=0)&(img[:,:,2]!=0)).astype(np.uint8)
size=mask.shape
[y, x] = (mask[:,:,0]).nonzero()
minx = min(x)
maxx = max(x)
miny = min(y)
maxy = max(y)
img = img[miny : maxy + 1, minx : maxx + 1, :]
mask = mask[miny : maxy + 1, minx : maxx + 1, :]
# hw_rate = (maxy-miny)/(maxx-minx) # 不需要考虑长宽比,因为测试时都是裁剪好的图片
s = 45
img = np.pad(img, ((s, s), (s, s), (0, 0)), 'constant')
mask = np.pad(mask, ((s, s), (s, s), (0, 0)), 'constant')
cx1 = random.randint(5, s - 5)
cx2 = random.randint(5, s - 5) + 1
cy1 = random.randint(5, s - 5)
cy2 = random.randint(5, s - 5) + 1
img = img[cy1 : -cy2, cx1 : -cx2, :]
mask = mask[cy1 : -cy2, cx1 : -cx2, :]
t=miny-s+cy1
b=size[0]-maxy-s+cy2
l=minx-s+cx1
r=size[1]-maxx-s+cx2
bm[:,:,1]=bm[:,:,1]-t
bm[:,:,0]=bm[:,:,0]-l
bm=511*bm/np.array([511.0-l-r, 511.0-t-b]) # 0~1
# bm0=cv2.resize(bm[:,:,0],(512,512))
# bm1=cv2.resize(bm[:,:,1],(512,512))
# bm=np.stack([bm0,bm1],axis=-1)
return img, mask, bm
# 这是一个用于裁剪图片的函数,图片中间是一个拍照文档,现有的函数
# 因为使用了“img[miny : maxy + 1, minx : maxx + 1, :]” 背景被过度裁剪了,我想在裁剪后保留完整的背景,如何修改函数
def tight_crop_new(img, mask, bm):
# img [512,512,3]unit8
# mask [512,512]unit8
# bm [448,448,2] float64
size = mask.shape
[y, x] = (mask[:, :, 0]).nonzero()
minx = min(x)
maxx = max(x)
miny = min(y)
maxy = max(y)
# # 为了保留背景,直接操作原图,不裁剪图像尺寸
# new_img = img.copy()
# new_mask = mask.copy()
# 随机添加边界内偏移(确保不超出图像边界)
offset = 25
cx1 = random.randint(5, offset)
cx2 = random.randint(5, offset)
cy1 = random.randint(5, offset)
cy2 = random.randint(5, offset)
# 调整裁剪范围并保持图像背景完整
final_minx = max(0, minx - cx1)
final_maxx = min(size[1], maxx + cx2)
final_miny = max(0, miny - cy1)
final_maxy = min(size[0], maxy + cy2)
# 裁剪出包含文档的区域,但保留背景尺寸
cropped_img = img[final_miny:final_maxy, final_minx:final_maxx, :]
cropped_mask = mask[final_miny:final_maxy, final_minx:final_maxx, :]
# 更新 bm 的坐标
t = final_miny
b = size[0] - final_maxy
l = final_minx
r = size[1] - final_maxx
bm[:, :, 1] = bm[:, :, 1] - t
bm[:, :, 0] = bm[:, :, 0] - l
bm = 511 * bm / np.array([511.0 - l - r, 511.0 - t - b]) # 0~1
return cropped_img, cropped_mask/255., bm
def augmentation(img, mask, bm, bg=None): # [512,512,3]unit8 [512,512,1]unit8 [448,448,2] float64 [512,512,3] unit8
# tight crop
img, mask, bm = tight_crop_new(img, mask, bm)
# replace bg
[fh, fw, _] = img.shape
chance=random.random()
# chance = 0.25
if chance > 0.3:
bg = cv2.resize(bg, (200, 200))
bg = np.tile(bg, (3, 3, 1)) # (600, 600, 3)
bg = bg[: fh, : fw, :]
msk = mask
elif chance < 0.3 and chance> 0.2:
c = np.array([random.random(), random.random(), random.random()])
bg = np.ones((fh, fw, 3)) * c
msk = mask
# cv2.imwrite("vis_hp/debug_vis/tex2.png", bg)
else:
bg=np.zeros((fh, fw, 3))
msk=np.ones((fh, fw, 3))
img = bg * (1 - msk) + img * msk
# cv2.imwrite("vis_hp/debug_vis/replace.png", img)
mask = cv2.resize(mask, (512, 512))
img = cv2.resize(img, (512, 512))
# msk=((bm[:,:,0]!=0)&(bm[:,:,1]!=0)&(bm[:,:,2]!=0)).astype(np.uint8)
return img, mask, bm
# Argument parsing
def boolean_string(s):
if s not in {'False', 'True'}:
raise ValueError('Not a valid boolean string')
return s == 'True'
def read(file):
if file.endswith('.float3'): return readFloat(file)
elif file.endswith('.flo'): return readFlow(file)
elif file.endswith('.ppm'): return readImage(file)
elif file.endswith('.pgm'): return readImage(file)
elif file.endswith('.png'): return readImage(file)
elif file.endswith('.jpg'): return readImage(file)
elif file.endswith('.pfm'): return readPFM(file)[0]
else: raise Exception('don\'t know how to read %s' % file)
def write(file, data):
if file.endswith('.float3'): return writeFloat(file, data)
elif file.endswith('.flo'): return writeFlow(file, data)
elif file.endswith('.ppm'): return writeImage(file, data)
elif file.endswith('.pgm'): return writeImage(file, data)
elif file.endswith('.png'): return writeImage(file, data)
elif file.endswith('.jpg'): return writeImage(file, data)
elif file.endswith('.pfm'): return writePFM(file, data)
else: raise Exception('don\'t know how to write %s' % file)
def load_gt_flow_npz(bm_path):
# # bm = np.transpose(h5py.File(bm_path,'r',libver='latest', swmr=True)["bm"])
# try:
# bm = h5.loadmat(bm_path)['bm'] # (1024, 1024, 2) from 0~1024
# except:
# print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
# print(bm_path)
# bm = (bm/np.array([1024,1024])).astype(np.float32) # (1024, 1024, 2) from 0~1
# bm[:,:,0] = bm[:,:,0]*512 # (1024, 1024, 2) from 0~512
# bm[:,:,1] = bm[:,:,1]*384
# bm = torch.from_numpy(bm.transpose(2,0,1)).unsqueeze(0) # (1,2,384,512)
# bm = F.interpolate(bm,size=(384,512),mode='bilinear',
# align_corners=True) # (1,2,384,512)
try:
bm = np.load(bm_path)['warped_BM'][:447,:447,:]*511 + 0.4# (448, 448, 2) range[0-1] # 先y后x,行序优先
# bm[:,:,0] = bm[:,:,0]*447 # (448, 448, 2) from 0~448
# bm[:,:,1] = bm[:,:,1]*447
bm0=cv2.resize(bm[:,:,0],(512,512))
bm1=cv2.resize(bm[:,:,1],(512,512))
bm=np.stack([bm0,bm1],axis=-1)
bm = np.roll(bm, shift=1, axis=-1) # # 先x后y,行序优先, 绝对位置bm
# bm = bm.transpose((2,0,1))
except:
print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
print(bm_path)
# bm = (bm/np.array([1024,1024])).astype(np.float32) # (1024, 1024, 2) from 0~1
# bm[:,:,0] = bm[:,:,0]*520 # (1024, 1024, 2) from 0~512
# bm[:,:,1] = bm[:,:,1]*520
# bm = torch.from_numpy(bm.transpose(2,0,1)).unsqueeze(0) # [1, 2, 1024, 1024]
# bm = F.interpolate(bm,size=(384,512),mode='bilinear',
# align_corners=True) # (1,2,384,512)
return bm
def load_gt_flow_mat(bm_path):
try:
# bm = h5.loadmat(bm_path)['bm']# (448, 448, 2) range[0-1] # 先y后x,行序优先
with h5py.File(bm_path, 'r') as f:
bm = f['bm'][:].transpose((2,1,0))[:447,:447,:]*(511/447) - 1.2 # (447, 447, 2)
bm0=cv2.resize(bm[:,:,0],(512,512))
bm1=cv2.resize(bm[:,:,1],(512,512))
bm=np.stack([bm0,bm1],axis=-1)
# bm[:,:,0] = bm[:,:,0]*448 # (448, 448, 2) from 0~448
# bm[:,:,1] = bm[:,:,1]*448
# bm = np.roll(bm, shift=1, axis=-1)
except:
print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
print(bm_path)
return bm # 先x后y,行序优先, 绝对位置bm (448, 448, 2) from 0~448
def load_flo(path):
with open(path, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)
assert(202021.25 == magic),'Magic number incorrect. Invalid .flo file'
w = np.fromfile(f, np.int32, count=1)[0]
h = np.fromfile(f, np.int32, count=1)[0]
data = np.fromfile(f, np.float32, count=2*w*h)
# Reshape data into 3D array (columns, rows, bands)
data2D = np.resize(data, (h, w, 2))
return data2D
def readPFM(file):
file = open(file, 'rb')
color = None
width = None
height = None
scale = None
endian = None
header = file.readline().rstrip()
if header.decode("ascii") == 'PF':
color = True
elif header.decode("ascii") == 'Pf':
color = False
else:
raise Exception('Not a PFM file.')
dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii"))
if dim_match:
width, height = list(map(int, dim_match.groups()))
else:
raise Exception('Malformed PFM header.')
scale = float(file.readline().decode("ascii").rstrip())
if scale < 0: # little-endian
endian = '<'
scale = -scale
else:
endian = '>' # big-endian
data = np.fromfile(file, endian + 'f')
shape = (height, width, 3) if color else (height, width)
data = np.reshape(data, shape)
data = np.flipud(data)
return data, scale
def writePFM(file, image, scale=1):
file = open(file, 'wb')
color = None
if image.dtype.name != 'float32':
raise Exception('Image dtype must be float32.')
image = np.flipud(image)
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
elif len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1: # greyscale
color = False
else:
raise Exception('Image must have H x W x 3, H x W x 1 or H x W dimensions.')
file.write('PF\n' if color else 'Pf\n'.encode())
file.write('%d %d\n'.encode() % (image.shape[1], image.shape[0]))
endian = image.dtype.byteorder
if endian == '<' or endian == '=' and sys.byteorder == 'little':
scale = -scale
file.write('%f\n'.encode() % scale)
image.tofile(file)
def readFlow(path):
with open(path, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)
assert(202021.25 == magic),'Magic number incorrect. Invalid .flo file'
w = np.fromfile(f, np.int32, count=1)[0]
h = np.fromfile(f, np.int32, count=1)[0]
data = np.fromfile(f, np.float32, count=2*w*h)
# Reshape data into 3D array (columns, rows, bands)
data2D = np.resize(data, (h, w, 2))
return data2D.astype(np.float32)
def readImage(name):
if name.endswith('.pfm') or name.endswith('.PFM'):
data = readPFM(name)[0]
if len(data.shape)==3:
return data[:,:,0:3]
else:
return data
return imageio.imread(name)
def writeImage(name, data):
if name.endswith('.pfm') or name.endswith('.PFM'):
return writePFM(name, data, 1)
return imageio.imwrite(name, data)
def writeFlow(flow, name_to_save, save_dir):
name=os.path.join(save_dir, name_to_save)
f = open(name, 'wb')
magic=202021.25
np.array([magic], dtype=np.float32).tofile(f)
np.array([flow.shape[1], flow.shape[0]], dtype=np.int32).tofile(f)
flow = flow.astype(np.float32)
flow.tofile(f)
def writeMask(mask, name_to_save, save_dir):
name = os.path.join(save_dir, name_to_save)
mask = mask.astype(np.uint8)
if mask.max() != 255:
mask *= 255
imageio.imwrite(name, mask.astype(np.uint8))
def readFloat(name):
f = open(name, 'rb')
if(f.readline().decode("utf-8")) != 'float\n':
raise Exception('float file %s did not contain <float> keyword' % name)
dim = int(f.readline())
dims = []
count = 1
for i in range(0, dim):
d = int(f.readline())
dims.append(d)
count *= d
dims = list(reversed(dims))
data = np.fromfile(f, np.float32, count).reshape(dims)
if dim > 2:
data = np.transpose(data, (2, 1, 0))
data = np.transpose(data, (1, 0, 2))
return data
def writeFloat(name, data):
f = open(name, 'wb')
dim=len(data.shape)
if dim>3:
raise Exception('bad float file dimension: %d' % dim)
f.write(('float\n').encode('ascii'))
f.write(('%d\n' % dim).encode('ascii'))
if dim == 1:
f.write(('%d\n' % data.shape[0]).encode('ascii'))
else:
f.write(('%d\n' % data.shape[1]).encode('ascii'))
f.write(('%d\n' % data.shape[0]).encode('ascii'))
for i in range(2, dim):
f.write(('%d\n' % data.shape[i]).encode('ascii'))
data = data.astype(np.float32)
if dim==2:
data.tofile(f)
else:
np.transpose(data, (2, 0, 1)).tofile(f)