Spaces:

ygtxr1997
/

ReliableSwap_Demo

Running

App Files Files Community

gavinyuan commited on Jun 6, 2023

Commit

de68d44

1 Parent(s): d252b8a

add: app.py

Browse files

Files changed (1) hide show

app.py +470 -0

app.py ADDED Viewed

	@@ -0,0 +1,470 @@

+import os
+import uuid
+import glob
+import shutil
+from pathlib import Path
+from multiprocessing.pool import Pool
+import gradio as gr
+import torch
+from torchvision import transforms
+import cv2
+import numpy as np
+from PIL import Image
+import tqdm
+# from modules.networks.faceshifter import FSGenerator
+# from inference.alignment import norm_crop, norm_crop_with_M, paste_back
+# from inference.utils import save, get_5_from_98, get_detector, get_lmk
+# from inference.PIPNet.lib.tools import get_lmk_model, demo_image
+# from inference.landmark_smooth import kalman_filter_landmark, savgol_filter_landmark
+# from tricks import Trick
+# make_abs_path = lambda fn: os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), fn))
+#
+#
+# fs_model_name = 'faceshifter'
+# in_size = 512
+#
+# mouth_net_param = {
+#     "use": True,
+#     "feature_dim": 128,
+#     "crop_param": (28, 56, 84, 112),
+#     "weight_path": "../../modules/third_party/arcface/weights/mouth_net_28_56_84_112.pth",
+# }
+# trick = Trick()
+#
+# T = transforms.Compose(
+#         [
+#             transforms.ToTensor(),
+#             transforms.Normalize(0.5, 0.5),
+#         ]
+#     )
+# tensor2pil_transform = transforms.ToPILImage()
+#
+#
+# def extract_generator(ckpt: str, pt: str):
+#     print(f'[extract_generator] loading ckpt...')
+#     from trainer.faceshifter.faceshifter_pl import FaceshifterPL512, FaceshifterPL
+#     import yaml
+#     with open(make_abs_path('../../trainer/faceshifter/config.yaml'), 'r') as f:
+#         config = yaml.load(f, Loader=yaml.FullLoader)
+#     config['mouth_net'] = mouth_net_param
+#
+#     if in_size == 256:
+#         net = FaceshifterPL(n_layers=3, num_D=3, config=config)
+#     elif in_size == 512:
+#         net = FaceshifterPL512(n_layers=3, num_D=3, config=config, verbose=False)
+#     else:
+#         raise ValueError('Not supported in_size.')
+#     checkpoint = torch.load(ckpt, map_location="cpu", )
+#     net.load_state_dict(checkpoint["state_dict"], strict=False)
+#     net.eval()
+#
+#     G = net.generator
+#     torch.save(G.state_dict(), pt)
+#     print(f'[extract_generator] extracted from {ckpt}, pth saved to {pt}')
+#
+#
+# ''' load model '''
+# if fs_model_name == 'faceshifter':
+#     # pt_path = make_abs_path("../ffplus/extracted_ckpt/G_mouth1_t38.pth")
+#     # pt_path = make_abs_path("../ffplus/extracted_ckpt/G_mouth1_t512_6.pth")
+#     # ckpt_path = "/apdcephfs/share_1290939/gavinyuan/out/triplet512_6/epoch=3-step=128999.ckpt"
+#     pt_path = make_abs_path("../ffplus/extracted_ckpt/G_mouth1_t512_4.pth")
+#     ckpt_path = "/apdcephfs/share_1290939/gavinyuan/out/triplet512_4/epoch=2-step=185999.ckpt"
+#     if not os.path.exists(pt_path) or 't512' in pt_path:
+#         extract_generator(ckpt_path, pt_path)
+#     fs_model = FSGenerator(
+#         make_abs_path("../../modules/third_party/arcface/weights/ms1mv3_arcface_r100_fp16/backbone.pth"),
+#         mouth_net_param=mouth_net_param,
+#         in_size=in_size,
+#         downup=in_size == 512,
+#     )
+#     fs_model.load_state_dict(torch.load(pt_path, "cpu"), strict=True)
+#     fs_model.eval()
+#
+#     @torch.no_grad()
+#     def infer_batch_to_img(i_s, i_t, post: bool = False):
+#         i_r = fs_model(i_s, i_t)[0]  # x, id_vector, att
+#
+#         if post:
+#             target_hair_mask = trick.get_any_mask(i_t, par=[0, 17])
+#             target_hair_mask = trick.smooth_mask(target_hair_mask)
+#             i_r = target_hair_mask * i_t + (target_hair_mask * (-1) + 1) * i_r
+#             i_r = trick.finetune_mouth(i_s, i_t, i_r) if in_size == 256 else i_r
+#
+#         img_r = trick.tensor_to_arr(i_r)[0]
+#         return img_r
+#
+# elif fs_model_name == 'simswap_triplet' or fs_model_name == 'simswap_vanilla':
+#     from modules.networks.simswap import Generator_Adain_Upsample
+#     sw_model = Generator_Adain_Upsample(
+#         input_nc=3, output_nc=3, latent_size=512, n_blocks=9, deep=False,
+#         mouth_net_param=mouth_net_param
+#     )
+#     if fs_model_name == 'simswap_triplet':
+#         pt_path = make_abs_path("../ffplus/extracted_ckpt/G_mouth1_st5.pth")
+#         ckpt_path = make_abs_path("/apdcephfs/share_1290939/gavinyuan/out/"
+#                                   "simswap_triplet_5/epoch=12-step=782999.ckpt")
+#     elif fs_model_name == 'simswap_vanilla':
+#         pt_path = make_abs_path("../ffplus/extracted_ckpt/G_tmp_sv4_off.pth")
+#         ckpt_path = make_abs_path("/apdcephfs/share_1290939/gavinyuan/out/"
+#                                   "simswap_vanilla_4/epoch=694-step=1487999.ckpt")
+#     else:
+#         pt_path = None
+#         ckpt_path = None
+#     sw_model.load_state_dict(torch.load(pt_path, "cpu"), strict=False)
+#     sw_model.eval()
+#     fs_model = sw_model
+#
+#     from trainer.simswap.simswap_pl import SimSwapPL
+#     import yaml
+#     with open(make_abs_path('../../trainer/simswap/config.yaml'), 'r') as f:
+#         config = yaml.load(f, Loader=yaml.FullLoader)
+#     config['mouth_net'] = mouth_net_param
+#     net = SimSwapPL(config=config, use_official_arc='off' in pt_path)
+#
+#     checkpoint = torch.load(ckpt_path, map_location="cpu")
+#     net.load_state_dict(checkpoint["state_dict"], strict=False)
+#     net.eval()
+#     sw_mouth_net = net.mouth_net  # maybe None
+#     sw_netArc = net.netArc
+#     fs_model = fs_model.cuda()
+#     sw_mouth_net = sw_mouth_net.cuda() if sw_mouth_net is not None else sw_mouth_net
+#     sw_netArc = sw_netArc.cuda()
+#
+#     @torch.no_grad()
+#     def infer_batch_to_img(i_s, i_t, post: bool = False):
+#         i_r = fs_model(source=i_s, target=i_t, net_arc=sw_netArc, mouth_net=sw_mouth_net,)
+#         if post:
+#             target_hair_mask = trick.get_any_mask(i_t, par=[0, 17])
+#             target_hair_mask = trick.smooth_mask(target_hair_mask)
+#             i_r = target_hair_mask * i_t + (target_hair_mask * (-1) + 1) * i_r
+#         i_r = i_r.clamp(-1, 1)
+#         i_r = trick.tensor_to_arr(i_r)[0]
+#         return i_r
+#
+# elif fs_model_name == 'simswap_official':
+#     from simswap.image_infer import SimSwapOfficialImageInfer
+#     fs_model = SimSwapOfficialImageInfer()
+#     pt_path = 'Simswap Official'
+#     mouth_net_param = {
+#         "use": False
+#     }
+#
+#     @torch.no_grad()
+#     def infer_batch_to_img(i_s, i_t):
+#         i_r = fs_model.image_infer(source_tensor=i_s, target_tensor=i_t)
+#         i_r = i_r.clamp(-1, 1)
+#         return i_r
+#
+# else:
+#     raise ValueError('Not supported fs_model_name.')
+#
+#
+# print(f'[demo] model loaded from {pt_path}')
+def swap_image(
+    source_image,
+    target_path,
+    out_path,
+    transform,
+    G,
+    align_source="arcface",
+    align_target="set1",
+    gpu_mode=True,
+    paste_back=True,
+    use_post=False,
+    use_gpen=False,
+    in_size=256,
+):
+    name = target_path.split("/")[-1]
+    name = "out_" + name
+    if isinstance(G, torch.nn.Module):
+        G.eval()
+        if gpu_mode:
+            G = G.cuda()
+    source_img = np.array(Image.open(source_image).convert("RGB"))
+    net, detector = get_lmk_model()
+    lmk = get_5_from_98(demo_image(source_img, net, detector)[0])
+    source_img = norm_crop(source_img, lmk, in_size, mode=align_source, borderValue=0.0)
+    source_img = transform(source_img).unsqueeze(0)
+    target = np.array(Image.open(target_path).convert("RGB"))
+    original_target = target.copy()
+    lmk = get_5_from_98(demo_image(target, net, detector)[0])
+    target, M = norm_crop_with_M(target, lmk, in_size, mode=align_target, borderValue=0.0)
+    target = transform(target).unsqueeze(0)
+    if gpu_mode:
+        target = target.cuda()
+        source_img = source_img.cuda()
+    cv2.imwrite('cropped_source.png', trick.tensor_to_arr(source_img)[0, :, :, ::-1])
+    cv2.imwrite('cropped_target.png', trick.tensor_to_arr(target)[0, :, :, ::-1])
+    # both inputs should be 512
+    result = infer_batch_to_img(source_img, target, post=use_post)
+    cv2.imwrite('result.png', result[:, :, ::-1])
+    os.makedirs(out_path, exist_ok=True)
+    Image.fromarray(result.astype(np.uint8)).save(os.path.join(out_path, name))
+    save((result, M, original_target, os.path.join(out_path, "paste_back_" + name), None),
+         trick=trick, use_post=use_gpen)
+def process_video(
+    source_image,
+    target_path,
+    out_path,
+    transform,
+    G,
+    align_source="arcface",
+    align_target="set1",
+    gpu_mode=True,
+    frames=9999999,
+    use_tddfav2=False,
+    landmark_smooth="kalman",
+):
+    if isinstance(G, torch.nn.Module):
+        G.eval()
+        if gpu_mode:
+            G = G.cuda()
+    ''' Target video to frames (.png) '''
+    fps = 25.0
+    if not os.path.isdir(target_path):
+        vidcap = cv2.VideoCapture(target_path)
+        fps = vidcap.get(cv2.CAP_PROP_FPS)
+        try:
+            for match in glob.glob(os.path.join("./tmp/", "*.png")):
+                os.remove(match)
+            for match in glob.glob(os.path.join(out_path, "*.png")):
+                os.remove(match)
+        except Exception as e:
+            print(e)
+        os.makedirs("./tmp/", exist_ok=True)
+        os.system(
+            f"ffmpeg -i {target_path} -qscale:v 1 -qmin 1 -qmax 1 -vsync 0  ./tmp/frame_%05d.png"
+        )
+        target_path = "./tmp/"
+    globbed_images = sorted(glob.glob(os.path.join(target_path, "*.png")))
+    ''' Get target landmarks '''
+    print('[Extracting target landmarks...]')
+    if not use_tddfav2:
+        align_net, align_detector = get_lmk_model()
+    else:
+        align_net, align_detector = get_detector(gpu_mode=gpu_mode)
+    target_lmks = []
+    for frame_path in tqdm.tqdm(globbed_images):
+        target = np.array(Image.open(frame_path).convert("RGB"))
+        lmk = demo_image(target, align_net, align_detector)
+        lmk = lmk[0]
+        target_lmks.append(lmk)
+    ''' Landmark smoothing '''
+    target_lmks = np.array(target_lmks, np.float32)  # (#frames, 98, 2)
+    if landmark_smooth == 'kalman':
+        target_lmks = kalman_filter_landmark(target_lmks,
+                                             process_noise=0.01,
+                                             measure_noise=0.01).astype(np.int)
+    elif landmark_smooth == 'savgol':
+        target_lmks = savgol_filter_landmark(target_lmks).astype(np.int)
+    elif landmark_smooth == 'cancel':
+        target_lmks = target_lmks.astype(np.int)
+    else:
+        raise KeyError('Not supported landmark_smooth choice')
+    ''' Crop source image '''
+    source_img = np.array(Image.open(source_image).convert("RGB"))
+    if not use_tddfav2:
+        lmk = get_5_from_98(demo_image(source_img, align_net, align_detector)[0])
+    else:
+        lmk = get_lmk(source_img, align_net, align_detector)
+    source_img = norm_crop(source_img, lmk, in_size, mode=align_source, borderValue=0.0)
+    source_img = transform(source_img).unsqueeze(0)
+    if gpu_mode:
+        source_img = source_img.cuda()
+    ''' Process by frames '''
+    targets = []
+    t_facial_masks = []
+    Ms = []
+    original_frames = []
+    names = []
+    count = 0
+    for image in tqdm.tqdm(globbed_images):
+        names.append(os.path.join(out_path, Path(image).name))
+        target = np.array(Image.open(image).convert("RGB"))
+        original_frames.append(target)
+        ''' Crop target frames '''
+        lmk = get_5_from_98(target_lmks[count])
+        target, M = norm_crop_with_M(target, lmk, in_size, mode=align_target, borderValue=0.0)
+        target = transform(target).unsqueeze(0)  # in [-1,1]
+        if gpu_mode:
+            target = target.cuda()
+        ''' Finetune paste masks '''
+        target_facial_mask = trick.get_any_mask(target,
+                                                par=[1, 2, 3, 4, 5, 6, 10, 11, 12, 13]).squeeze()  # in [0,1]
+        target_facial_mask = target_facial_mask.cpu().numpy().astype(np.float)
+        target_facial_mask = trick.finetune_mask(target_facial_mask, target_lmks)  # in [0,1]
+        t_facial_masks.append(target_facial_mask)
+        ''' Face swapping '''
+        with torch.no_grad():
+            if 'faceshifter' in fs_model_name:
+                output = G(source_img, target)
+                target_hair_mask = trick.get_any_mask(target, par=[0, 17])
+                target_hair_mask = trick.smooth_mask(target_hair_mask)
+                output = target_hair_mask * target + (target_hair_mask * (-1) + 1) * output
+                output = trick.finetune_mouth(source_img, target, output)
+            elif 'simswap' in fs_model_name and 'official' not in fs_model_name:
+                output = fs_model(source=source_img, target=target,
+                                  net_arc=sw_netArc, mouth_net=sw_mouth_net,)
+                if 'vanilla' not in fs_model_name:
+                    target_hair_mask = trick.get_any_mask(target, par=[0, 17])
+                    target_hair_mask = trick.smooth_mask(target_hair_mask)
+                    output = target_hair_mask * target + (target_hair_mask * (-1) + 1) * output
+                    output = trick.finetune_mouth(source_img, target, output)
+                output = output.clamp(-1, 1)
+            elif 'simswap_official' in fs_model_name:
+                output = fs_model.image_infer(source_tensor=source_img, target_tensor=target)
+                output = output.clamp(-1, 1)
+            if isinstance(output, tuple):
+                target = output[0][0] * 0.5 + 0.5
+            else:
+                target = output[0] * 0.5 + 0.5
+        targets.append(np.array(tensor2pil_transform(target)))
+        Ms.append(M)
+        count += 1
+        if count > frames:
+            break
+    os.makedirs(out_path, exist_ok=True)
+    return targets, t_facial_masks, Ms, original_frames, names, fps
+def swap_image_gr(img1, img2, use_post=False, use_gpen=False, gpu_mode=True):
+    root_dir = make_abs_path("./online_data")
+    req_id = uuid.uuid1().hex
+    data_dir = os.path.join(root_dir, req_id)
+    os.makedirs(data_dir, exist_ok=True)
+    source_path = os.path.join(data_dir, "source.png")
+    target_path = os.path.join(data_dir, "target.png")
+    filename = "paste_back_out_target.png"
+    out_path = os.path.join(data_dir, filename)
+    cv2.imwrite(source_path, img1[:, :, ::-1])
+    cv2.imwrite(target_path, img2[:, :, ::-1])
+    swap_image(
+        source_path,
+        target_path,
+        data_dir,
+        T,
+        fs_model,
+        gpu_mode=gpu_mode,
+        align_target='ffhq',
+        align_source='ffhq',
+        use_post=use_post,
+        use_gpen=use_gpen,
+        in_size=in_size,
+    )
+    out = cv2.imread(out_path)[..., ::-1]
+    return out
+def swap_video_gr(img1, target_path, use_gpu=True, frames=9999999):
+    root_dir = make_abs_path("./online_data")
+    req_id = uuid.uuid1().hex
+    data_dir = os.path.join(root_dir, req_id)
+    os.makedirs(data_dir, exist_ok=True)
+    source_path = os.path.join(data_dir, "source.png")
+    cv2.imwrite(source_path, img1[:, :, ::-1])
+    out_dir = os.path.join(data_dir, "out")
+    out_name = "output.mp4"
+    targets, t_facial_masks, Ms, original_frames, names, fps = process_video(
+        source_path,
+        target_path,
+        out_dir,
+        T,
+        fs_model,
+        gpu_mode=use_gpu,
+        frames=frames,
+        align_target='ffhq',
+        align_source='ffhq',
+        use_tddfav2=False,
+    )
+    pool_process = 170
+    audio = True
+    concat = False
+    if pool_process <= 1:
+        for target, M, original_target, name, t_facial_mask in tqdm.tqdm(
+                zip(targets, Ms, original_frames, names, t_facial_masks)
+        ):
+            if M is None or target is None:
+                Image.fromarray(original_target.astype(np.uint8)).save(name)
+                continue
+            Image.fromarray(paste_back(np.array(target), M, original_target, t_facial_mask)).save(name)
+    else:
+        with Pool(pool_process) as pool:
+            pool.map(save, zip(targets, Ms, original_frames, names, t_facial_masks))
+    video_save_path = os.path.join(out_dir, out_name)
+    if audio:
+        print("use audio")
+        os.system(
+            f"ffmpeg  -y -r {fps} -i {out_dir}/frame_%05d.png -i {target_path}"
+            f" -map 0:v:0 -map 1:a:0? -c:a copy -c:v libx264 -r {fps} -crf 10 -pix_fmt yuv420p  {video_save_path}"
+        )
+    else:
+        print("no audio")
+        os.system(
+            f"ffmpeg  -y -r {fps} -i ./tmp/frame_%05d.png "
+            f"-c:v libx264 -r {fps} -crf 10 -pix_fmt yuv420p {video_save_path}"
+        )
+    # ffmpeg -i left.mp4 -i right.mp4 -filter_complex hstack output.mp4
+    if concat:
+        concat_video_save_path = os.path.join(out_dir, "concat_" + out_name)
+        os.system(
+            f"ffmpeg -y  -i {target_path}  -i {video_save_path} -filter_complex hstack {concat_video_save_path}"
+        )
+    # delete tmp file
+    shutil.rmtree("./tmp/")
+    for match in glob.glob(os.path.join(out_dir, "*.png")):
+        os.remove(match)
+    print(video_save_path)
+    return video_save_path
+if __name__ == "__main__":
+    with gr.Blocks() as demo:
+        gr.Markdown("SuperSwap")
+        with gr.Tab("Image"):
+            with gr.Row(equal_height=True):
+                with gr.Column(scale=3):
+                    image1_input = gr.Image()
+                    image2_input = gr.Image()
+                    use_post = gr.Checkbox(label="后处理")
+                    use_gpen = gr.Checkbox(label="超分增强")
+                with gr.Column(scale=2):
+                    image_output = gr.Image()
+                    image_button = gr.Button("换脸")
+        with gr.Tab("Video"):
+            with gr.Row(equal_height=True):
+                with gr.Column(scale=3):
+                    image3_input = gr.Image()
+                    video_input = gr.Video()
+                with gr.Column(scale=2):
+                    video_output = gr.Video()
+                    video_button = gr.Button("换脸")
+        image_button.click(
+            swap_image_gr,
+            inputs=[image1_input, image2_input, use_post, use_gpen],
+            outputs=image_output,
+        )
+        video_button.click(
+            swap_video_gr,
+            inputs=[image3_input, video_input],
+            outputs=video_output,
+        )
+    demo.launch(server_name="0.0.0.0", server_port=7860)