Spaces:
Sleeping
Sleeping
| import os | |
| from io import BytesIO | |
| import gradio as gr | |
| import grpc | |
| from PIL import Image | |
| import pandas as pd | |
| from inference_pb2 import SFERequest, SFEResponse, SFERequestMask, SFEResponseMask | |
| from inference_pb2_grpc import SFEServiceStub | |
| PREDEFINED_EDITINGS_DATA = { | |
| "glasses": ([-20.0, 30.0], False), | |
| "smile": ([-10.0, 10.0], False), | |
| "makeup": ([-10.0, 15.0], False), | |
| "eye_openness": ([-45.0, 30.0], True), | |
| "trimmed_beard": ([-30.0, 30.0], True), | |
| "lipstick": ([-60.0, 60.0], False), # ?? | |
| "face_roundness": ([-20.0, 15.0], False), | |
| "nose_length": ([-30.0, 30.0], True), | |
| "eyebrow_thickness": ([-20.0, 20.0], True), | |
| "displeased": ([-10.0, 10.0], False), | |
| "age": ([-10.0, 10.0], False), | |
| "rotation": ([-7.0, 7.0], False), | |
| "afro": ([0.05, 0.14], False), | |
| "angry": ([0.05, 0.14], False), | |
| "bobcut": ([0.05, 0.18], False), | |
| "bowlcut": ([0.05, 0.14], False), | |
| "mohawk": ([0.05, 0.1], False), | |
| "curly_hair": ([0.05, 0.12], False), | |
| "purple_hair": ([0.05, 0.12], False), | |
| "surprised": ([0.05, 0.1], False), | |
| "beyonce": ([0.05, 0.12], False), | |
| "hilary_clinton": ([0.05, 0.1], False), | |
| "depp": ([0.05, 0.12], False), | |
| "taylor_swift": ([0.05, 0.1], False), | |
| "trump": ([0.05, 0.1], False), | |
| "zuckerberg": ([0.05, 0.1], False), | |
| "black hair": ([-7.0, 10.0], False), | |
| "blond hair": ([-7.0, 10.0], True), | |
| "grey hair": ([-7.0, 7.0], True), | |
| "wavy hair": ([-7.0, 7.0], False), | |
| "receding hairline": ([-10.0, 10.0], True), | |
| "sideburns": ([-7.0, 7.0], True), | |
| "goatee": ([-7.0, 7.0], True), | |
| "earrings": ([-10.0, 15.0], False), | |
| "gender": ([-10.0, 7.0], False) | |
| } | |
| DIRECTIONS_NAME_SWAP = { | |
| "smile" : "fs_smiling", | |
| "glasses": "fs_glasses", | |
| "makeup": "fs_makeup", | |
| } | |
| def denormalize_power(direction_name, directon_power): | |
| if direction_name not in PREDEFINED_EDITINGS_DATA: | |
| return directon_power | |
| original_range, is_reversed = PREDEFINED_EDITINGS_DATA[direction_name] | |
| if directon_power > 0: | |
| normalized = directon_power / 15 * abs(original_range[1]) | |
| else: | |
| normalized = directon_power / 15 * abs(original_range[0]) | |
| if is_reversed: | |
| normalized = -normalized | |
| return normalized | |
| def get_bytes(img): | |
| if img is None: | |
| return img | |
| buffered = BytesIO() | |
| img.save(buffered, format="JPEG") | |
| return buffered.getvalue() | |
| def bytes_to_image(image: bytes) -> Image.Image: | |
| image = Image.open(BytesIO(image)) | |
| return image | |
| def edit_image(orig_image, edit_direction, edit_power, align, mask, progress=gr.Progress(track_tqdm=True)): | |
| if edit_direction in DIRECTIONS_NAME_SWAP: | |
| edit_direction = DIRECTIONS_NAME_SWAP[edit_direction] | |
| if not orig_image: | |
| return gr.update(visible=False), gr.update(visible=False), gr.update(value="Need to upload an input image ❗", visible=True) | |
| orig_image_bytes = get_bytes(orig_image) | |
| mask_bytes = get_bytes(mask) | |
| if mask_bytes is None: | |
| mask_bytes = b"mask" | |
| edit_power = denormalize_power(edit_direction, edit_power) | |
| with grpc.insecure_channel(os.environ["SERVER"]) as channel: | |
| stub = SFEServiceStub(channel) | |
| output: SFEResponse = stub.edit( | |
| SFERequest(orig_image=orig_image_bytes, direction=edit_direction, power=edit_power, align=align, mask=mask_bytes, use_cache=True) | |
| ) | |
| if output.image == b"aligner error": | |
| return gr.update(visible=False), gr.update(visible=False), gr.update(value="Face aligner can not find face in your image 😢 Try to upload another one", visible=True) | |
| output_edited = bytes_to_image(output.image) | |
| output_inv = bytes_to_image(output.inv_image) | |
| return gr.update(value=output_edited, visible=True), gr.update(value=output_inv, visible=True), gr.update(visible=False) | |
| def edit_image_clip(orig_image, neutral_prompt, target_prompt, disentanglement, edit_power, align, mask, progress=gr.Progress(track_tqdm=True)): | |
| edit_direction = "_".join(["styleclip_global", neutral_prompt, target_prompt, str(disentanglement)]) | |
| return edit_image(orig_image, edit_direction, edit_power, align, mask, progress=None) | |
| def get_mask(input_image, align, mask_trashhold, progress=gr.Progress(track_tqdm=True)): | |
| if not input_image: | |
| return gr.update(visible=False), gr.update(value="Need to upload an input image ❗", visible=True) | |
| input_image_bytes = get_bytes(input_image) | |
| with grpc.insecure_channel(os.environ["SERVER"]) as channel: | |
| stub = SFEServiceStub(channel) | |
| output: SFEResponseMask = stub.generate_mask( | |
| SFERequestMask(orig_image=input_image_bytes, trashold=mask_trashhold, align=align, use_cache=True) | |
| ) | |
| if output.mask == b"aligner error": | |
| return gr.update(visible=False), gr.update(value="Face aligner can not find face in your image 😢 Try to upload another one", visible=True) | |
| if output.mask == b"masker face parser error": | |
| return gr.update(visible=False), gr.update(value="Masker's face detector can't find face in your image 😢 Try to upload another one", visible=True) | |
| output_mask = bytes_to_image(output.mask) | |
| return gr.update(value=output_mask, visible=True), gr.update(visible=False) | |
| def get_demo(): | |
| editings_table = pd.read_csv("editings_table.csv") | |
| editings_table = editings_table.style.set_properties(**{"text-align": "center"}) | |
| editings_table = editings_table.set_table_styles([dict(selector="th", props=[("text-align", "center")])]) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## StyleFeatureEditor") | |
| gr.Markdown( | |
| '<div style="display: flex; align-items: center; gap: 10px;">' | |
| '<span>Official Gradio demo for StyleFeatureEditor:</span>' | |
| '<a href="https://arxiv.org/abs/2406.10601"><img src="https://img.shields.io/badge/arXiv-2404.01094-b31b1b.svg" height=22.5></a>' | |
| '<a href="https://github.com/AIRI-Institute/StyleFeatureEditor"><img src="https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white" height=22.5></a>' | |
| '<a href="https://huggingface.co/AIRI-Institute/StyleFeatureEditor"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-md.svg" height=22.5></a>' | |
| '<a href="https://colab.research.google.com/#fileId=https://github.com/AIRI-Institute/StyleFeatureEditor/blob/main/notebook/StyleFeatureEditor_inference.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=22.5></a>' | |
| '</div>' | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Accordion("Input Image", open=True): | |
| input_image = gr.Image(label="Input image you want to edit", type="pil", height=300) | |
| align = gr.Checkbox(label="Align (crop and resize) the input image. For SFE to work well, it is necessary to align the input if it is not.", value=True) | |
| with gr.Accordion("Predefined Editings", open=True): | |
| with gr.Accordion("Description", open=False): | |
| gr.Markdown('''A branch of predefined editings gained from InterfaceGAN, Stylespace, GANSpace and StyleClip mappers. Look at the table below to see which direction is responsible for which editings. | |
| **Editing power** -- the greater the absolute value of this parameter, the more the selected edit will appear. Better use values in the range 7 - 13, lower values may not give the desired edit, higher values -- on the contrary -- may apply edit too much and create artefacts. | |
| **Editing effect** -- the effect applied to the image when positive editing power is used. If negative power is used, the effect is reversed. | |
| ''' | |
| ) | |
| gr.Dataframe(value=editings_table, datatype=["markdown","markdown","markdown"], interactive=False, wrap=True, | |
| column_widths=["30px", "35px", "35px"], height=300) | |
| with gr.Row(): | |
| predef_editing_direction = gr.Dropdown(list(PREDEFINED_EDITINGS_DATA.keys()), label="Editing direction", value="smile") | |
| # predef_editing_power = gr.Number(value=7, label="Editing power") | |
| predef_editing_power = gr.Slider(-20, 20, value=10, step=0.1, label="Editing power") | |
| btn_predef = gr.Button("Edit image") | |
| with gr.Accordion("Text Prompt (StyleClip) Editings", open=False): | |
| with gr.Accordion("Description", open=False): | |
| gr.Markdown('''You can alse use editings from text prompts via **StyleClip Global Mapper** (https://arxiv.org/abs/2103.17249). You just need to choose: | |
| **Editing power** -- the greater the absolute value of this parameter, the more the selected edit will appear. | |
| **Neutral prompt** -- some neutral description of the original image (e.g. "a face"). | |
| **Target prompt** -- text that contains the desired edit (e.g. "a smilling face"). | |
| **Disentanglement** -- positive number, the less this attribute -- the more related attributes will also be changed (e.g. for grey hair editing, wrinkle, skin colour and glasses may also be edited) | |
| ''') | |
| neutral_prompt = gr.Textbox(value="face with hair", label="Neutreal prompt (e.g. 'a face')") | |
| target_prompt = gr.Textbox(value="face with fire hair", label="Target prompt (e.g. 'a smilling face')") | |
| styleclip_editing_power = gr.Slider(-50, 50, value=10, step=1, label="Editing power") | |
| disentanglement = gr.Slider(0, 1, value=0.1, step=0.01, label="Disentanglement") | |
| btn_clip = gr.Button("Edit image") | |
| with gr.Accordion("Mask settings (optional)", open=False): | |
| gr.Markdown('''If some artefacts appear during editing (or some details disappear), you can specify an image mask to select which regions of the image should not be edited. The mask must have a size of 1024 x 1024 and represent an inversion of the original image. | |
| ''' | |
| ) | |
| mask = gr.Image(label="Upload mask for editing", type="pil", height=350) | |
| with gr.Accordion("Mask generating", open=False): | |
| gr.Markdown("Here you can generate mask that separates face (with hair) from the background.") | |
| with gr.Row(): | |
| input_mask = gr.Image(label="Input image for mask generating", type="pil", height=240) | |
| output_mask = gr.Image(label="Generated mask", height=240) | |
| error_message_mask = gr.Textbox(label="⚠️ Error ⚠️", visible=False, elem_classes="error-message") | |
| align_mask = gr.Checkbox(label="To align (crop and resize image) or not. Only uncheck this box if the original image has already been aligned.", value=True) | |
| mask_trashhold = gr.Slider(0, 1, value=0.9, step=0.001, label="Mask trashold", | |
| info="The more this parameter, the more is face part, and the less is background part.") | |
| btn_mask = gr.Button("Generate mask") | |
| with gr.Column(): | |
| with gr.Row(): | |
| output_inv = gr.Image(label="Inversion result", visible=True) | |
| output_edit = gr.Image(label="Editing result", visible=True) | |
| error_message = gr.Textbox(label="⚠️ Error ⚠️", visible=False, elem_classes="error-message") | |
| gr.Markdown("If artefacts appear during editing -- try lowering the editing power or using a mask.") | |
| gr.Examples( | |
| label="Input Examples", | |
| examples=[ | |
| ["images/scarlet.jpg", "images/scarlet.jpg"], | |
| ["images/gosling.jpg", "images/gosling.jpg"], | |
| ["images/robert.png", "images/robert.png"], | |
| ["images/smith.jpg", "images/smith.jpg"], | |
| ["images/watson.jpeg", "images/watson.jpeg"], | |
| ], | |
| inputs=[input_image, input_mask] | |
| ) | |
| btn_predef.click( | |
| fn=edit_image, | |
| inputs=[input_image, predef_editing_direction, predef_editing_power, align, mask], | |
| outputs=[output_edit, output_inv, error_message] | |
| ) | |
| btn_clip.click( | |
| fn=edit_image_clip, | |
| inputs=[input_image, neutral_prompt, target_prompt, disentanglement, styleclip_editing_power, align, mask], | |
| outputs=[output_edit, output_inv, error_message] | |
| ) | |
| btn_mask.click( | |
| fn=get_mask, | |
| inputs=[input_mask, align_mask, mask_trashhold], | |
| outputs=[output_mask, error_message_mask] | |
| ) | |
| gr.Markdown('''To cite the paper by the authors | |
| ``` | |
| @InProceedings{Bobkov_2024_CVPR, | |
| author = {Bobkov, Denis and Titov, Vadim and Alanov, Aibek and Vetrov, Dmitry}, | |
| title = {The Devil is in the Details: StyleFeatureEditor for Detail-Rich StyleGAN Inversion and High Quality Image Editing}, | |
| booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, | |
| month = {June}, | |
| year = {2024}, | |
| pages = {9337-9346} | |
| } | |
| ``` | |
| ''') | |
| return demo | |
| if __name__ == "__main__": | |
| demo = get_demo() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |