Spaces:
Build error
Build error
| # このスクリプトのライセンスは、train_dreambooth.pyと同じくApache License 2.0とします | |
| # (c) 2022 Kohya S. @kohya_ss | |
| # 横長の画像から顔検出して正立するように回転し、そこを中心に正方形に切り出す | |
| # v2: extract max face if multiple faces are found | |
| # v3: add crop_ratio option | |
| # v4: add multiple faces extraction and min/max size | |
| import argparse | |
| import math | |
| import cv2 | |
| import glob | |
| import os | |
| from anime_face_detector import create_detector | |
| from tqdm import tqdm | |
| import numpy as np | |
| KP_REYE = 11 | |
| KP_LEYE = 19 | |
| SCORE_THRES = 0.90 | |
| def detect_faces(detector, image, min_size): | |
| preds = detector(image) # bgr | |
| # print(len(preds)) | |
| faces = [] | |
| for pred in preds: | |
| bb = pred['bbox'] | |
| score = bb[-1] | |
| if score < SCORE_THRES: | |
| continue | |
| left, top, right, bottom = bb[:4] | |
| cx = int((left + right) / 2) | |
| cy = int((top + bottom) / 2) | |
| fw = int(right - left) | |
| fh = int(bottom - top) | |
| lex, ley = pred['keypoints'][KP_LEYE, 0:2] | |
| rex, rey = pred['keypoints'][KP_REYE, 0:2] | |
| angle = math.atan2(ley - rey, lex - rex) | |
| angle = angle / math.pi * 180 | |
| faces.append((cx, cy, fw, fh, angle)) | |
| faces.sort(key=lambda x: max(x[2], x[3]), reverse=True) # 大きい順 | |
| return faces | |
| def rotate_image(image, angle, cx, cy): | |
| h, w = image.shape[0:2] | |
| rot_mat = cv2.getRotationMatrix2D((cx, cy), angle, 1.0) | |
| # # 回転する分、すこし画像サイズを大きくする→とりあえず無効化 | |
| # nh = max(h, int(w * math.sin(angle))) | |
| # nw = max(w, int(h * math.sin(angle))) | |
| # if nh > h or nw > w: | |
| # pad_y = nh - h | |
| # pad_t = pad_y // 2 | |
| # pad_x = nw - w | |
| # pad_l = pad_x // 2 | |
| # m = np.array([[0, 0, pad_l], | |
| # [0, 0, pad_t]]) | |
| # rot_mat = rot_mat + m | |
| # h, w = nh, nw | |
| # cx += pad_l | |
| # cy += pad_t | |
| result = cv2.warpAffine(image, rot_mat, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT) | |
| return result, cx, cy | |
| def process(args): | |
| assert (not args.resize_fit) or args.resize_face_size is None, f"resize_fit and resize_face_size can't be specified both / resize_fitとresize_face_sizeはどちらか片方しか指定できません" | |
| assert args.crop_ratio is None or args.resize_face_size is None, f"crop_ratio指定時はresize_face_sizeは指定できません" | |
| # アニメ顔検出モデルを読み込む | |
| print("loading face detector.") | |
| detector = create_detector('yolov3') | |
| # cropの引数を解析する | |
| if args.crop_size is None: | |
| crop_width = crop_height = None | |
| else: | |
| tokens = args.crop_size.split(',') | |
| assert len(tokens) == 2, f"crop_size must be 'width,height' / crop_sizeは'幅,高さ'で指定してください" | |
| crop_width, crop_height = [int(t) for t in tokens] | |
| if args.crop_ratio is None: | |
| crop_h_ratio = crop_v_ratio = None | |
| else: | |
| tokens = args.crop_ratio.split(',') | |
| assert len(tokens) == 2, f"crop_ratio must be 'horizontal,vertical' / crop_ratioは'幅,高さ'の倍率で指定してください" | |
| crop_h_ratio, crop_v_ratio = [float(t) for t in tokens] | |
| # 画像を処理する | |
| print("processing.") | |
| output_extension = ".png" | |
| os.makedirs(args.dst_dir, exist_ok=True) | |
| paths = glob.glob(os.path.join(args.src_dir, "*.png")) + glob.glob(os.path.join(args.src_dir, "*.jpg")) + \ | |
| glob.glob(os.path.join(args.src_dir, "*.webp")) | |
| for path in tqdm(paths): | |
| basename = os.path.splitext(os.path.basename(path))[0] | |
| # image = cv2.imread(path) # 日本語ファイル名でエラーになる | |
| image = cv2.imdecode(np.fromfile(path, np.uint8), cv2.IMREAD_UNCHANGED) | |
| if len(image.shape) == 2: | |
| image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR) | |
| if image.shape[2] == 4: | |
| print(f"image has alpha. ignore / 画像の透明度が設定されているため無視します: {path}") | |
| image = image[:, :, :3].copy() # copyをしないと内部的に透明度情報が付いたままになるらしい | |
| h, w = image.shape[:2] | |
| faces = detect_faces(detector, image, args.multiple_faces) | |
| for i, face in enumerate(faces): | |
| cx, cy, fw, fh, angle = face | |
| face_size = max(fw, fh) | |
| if args.min_size is not None and face_size < args.min_size: | |
| continue | |
| if args.max_size is not None and face_size >= args.max_size: | |
| continue | |
| face_suffix = f"_{i+1:02d}" if args.multiple_faces else "" | |
| # オプション指定があれば回転する | |
| face_img = image | |
| if args.rotate: | |
| face_img, cx, cy = rotate_image(face_img, angle, cx, cy) | |
| # オプション指定があれば顔を中心に切り出す | |
| if crop_width is not None or crop_h_ratio is not None: | |
| cur_crop_width, cur_crop_height = crop_width, crop_height | |
| if crop_h_ratio is not None: | |
| cur_crop_width = int(face_size * crop_h_ratio + .5) | |
| cur_crop_height = int(face_size * crop_v_ratio + .5) | |
| # リサイズを必要なら行う | |
| scale = 1.0 | |
| if args.resize_face_size is not None: | |
| # 顔サイズを基準にリサイズする | |
| scale = args.resize_face_size / face_size | |
| if scale < cur_crop_width / w: | |
| print( | |
| f"image width too small in face size based resizing / 顔を基準にリサイズすると画像の幅がcrop sizeより小さい(顔が相対的に大きすぎる)ので顔サイズが変わります: {path}") | |
| scale = cur_crop_width / w | |
| if scale < cur_crop_height / h: | |
| print( | |
| f"image height too small in face size based resizing / 顔を基準にリサイズすると画像の高さがcrop sizeより小さい(顔が相対的に大きすぎる)ので顔サイズが変わります: {path}") | |
| scale = cur_crop_height / h | |
| elif crop_h_ratio is not None: | |
| # 倍率指定の時にはリサイズしない | |
| pass | |
| else: | |
| # 切り出しサイズ指定あり | |
| if w < cur_crop_width: | |
| print(f"image width too small/ 画像の幅がcrop sizeより小さいので画質が劣化します: {path}") | |
| scale = cur_crop_width / w | |
| if h < cur_crop_height: | |
| print(f"image height too small/ 画像の高さがcrop sizeより小さいので画質が劣化します: {path}") | |
| scale = cur_crop_height / h | |
| if args.resize_fit: | |
| scale = max(cur_crop_width / w, cur_crop_height / h) | |
| if scale != 1.0: | |
| w = int(w * scale + .5) | |
| h = int(h * scale + .5) | |
| face_img = cv2.resize(face_img, (w, h), interpolation=cv2.INTER_AREA if scale < 1.0 else cv2.INTER_LANCZOS4) | |
| cx = int(cx * scale + .5) | |
| cy = int(cy * scale + .5) | |
| fw = int(fw * scale + .5) | |
| fh = int(fh * scale + .5) | |
| cur_crop_width = min(cur_crop_width, face_img.shape[1]) | |
| cur_crop_height = min(cur_crop_height, face_img.shape[0]) | |
| x = cx - cur_crop_width // 2 | |
| cx = cur_crop_width // 2 | |
| if x < 0: | |
| cx = cx + x | |
| x = 0 | |
| elif x + cur_crop_width > w: | |
| cx = cx + (x + cur_crop_width - w) | |
| x = w - cur_crop_width | |
| face_img = face_img[:, x:x+cur_crop_width] | |
| y = cy - cur_crop_height // 2 | |
| cy = cur_crop_height // 2 | |
| if y < 0: | |
| cy = cy + y | |
| y = 0 | |
| elif y + cur_crop_height > h: | |
| cy = cy + (y + cur_crop_height - h) | |
| y = h - cur_crop_height | |
| face_img = face_img[y:y + cur_crop_height] | |
| # # debug | |
| # print(path, cx, cy, angle) | |
| # crp = cv2.resize(image, (image.shape[1]//8, image.shape[0]//8)) | |
| # cv2.imshow("image", crp) | |
| # if cv2.waitKey() == 27: | |
| # break | |
| # cv2.destroyAllWindows() | |
| # debug | |
| if args.debug: | |
| cv2.rectangle(face_img, (cx-fw//2, cy-fh//2), (cx+fw//2, cy+fh//2), (255, 0, 255), fw//20) | |
| _, buf = cv2.imencode(output_extension, face_img) | |
| with open(os.path.join(args.dst_dir, f"{basename}{face_suffix}_{cx:04d}_{cy:04d}_{fw:04d}_{fh:04d}{output_extension}"), "wb") as f: | |
| buf.tofile(f) | |
| def setup_parser() -> argparse.ArgumentParser: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--src_dir", type=str, help="directory to load images / 画像を読み込むディレクトリ") | |
| parser.add_argument("--dst_dir", type=str, help="directory to save images / 画像を保存するディレクトリ") | |
| parser.add_argument("--rotate", action="store_true", help="rotate images to align faces / 顔が正立するように画像を回転する") | |
| parser.add_argument("--resize_fit", action="store_true", | |
| help="resize to fit smaller side after cropping / 切り出し後の画像の短辺がcrop_sizeにあうようにリサイズする") | |
| parser.add_argument("--resize_face_size", type=int, default=None, | |
| help="resize image before cropping by face size / 切り出し前に顔がこのサイズになるようにリサイズする") | |
| parser.add_argument("--crop_size", type=str, default=None, | |
| help="crop images with 'width,height' pixels, face centered / 顔を中心として'幅,高さ'のサイズで切り出す") | |
| parser.add_argument("--crop_ratio", type=str, default=None, | |
| help="crop images with 'horizontal,vertical' ratio to face, face centered / 顔を中心として顔サイズの'幅倍率,高さ倍率'のサイズで切り出す") | |
| parser.add_argument("--min_size", type=int, default=None, | |
| help="minimum face size to output (included) / 処理対象とする顔の最小サイズ(この値以上)") | |
| parser.add_argument("--max_size", type=int, default=None, | |
| help="maximum face size to output (excluded) / 処理対象とする顔の最大サイズ(この値未満)") | |
| parser.add_argument("--multiple_faces", action="store_true", | |
| help="output each faces / 複数の顔が見つかった場合、それぞれを切り出す") | |
| parser.add_argument("--debug", action="store_true", help="render rect for face / 処理後画像の顔位置に矩形を描画します") | |
| return parser | |
| if __name__ == '__main__': | |
| parser = setup_parser() | |
| args = parser.parse_args() | |
| process(args) | |