Spaces:

tonyassi
/

fashion-try-on

Running on Zero

File size: 6,763 Bytes

# app.py
import os
import math
import spaces
import gradio as gr
import torch
from PIL import Image, ImageChops
from diffusers import AutoPipelineForInpainting, AutoencoderKL

# =============================
# Helpers (CPU-only; no CUDA)
# =============================

def _round_up(x: int, m: int = 8) -> int:
    return int(math.ceil(x / m) * m)

def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol: int = 12) -> Image.Image:
    """
    Trim uniform white (or near-white) margins before centering/padding.
    Handles RGBA via alpha bbox; for RGB compares to a solid background.
    """
    if img.mode in ("RGBA", "LA"):
        alpha = img.split()[-1]
        bbox = alpha.getbbox()
        return img.crop(bbox) if bbox else img

    bg = Image.new(img.mode, img.size, bg_color)
    diff = ImageChops.difference(img, bg).convert("L")
    mask = diff.point(lambda p: 255 if p > tol else 0, mode="1")
    bbox = mask.getbbox()
    return img.crop(bbox) if bbox else img

def square_pad_meta(
    img: Image.Image, color: str = "white", multiple: int = 8
) -> tuple[Image.Image, int, int, int, int, int]:
    """
    Autocrop -> center-pad to a square whose side is rounded UP to `multiple`.
    Returns (square_img, left, top, orig_w, orig_h, side).
    """
    img = autocrop_content(img, (255, 255, 255), tol=12)
    orig_w, orig_h = img.size
    side = _round_up(max(orig_w, orig_h), multiple)

    bg = Image.new("RGB", (side, side), color=color)
    left = (side - orig_w) // 2
    top = (side - orig_h) // 2
    bg.paste(img, (left, top))
    return bg, left, top, orig_w, orig_h, side

def resize_to_multiple(image: Image.Image, m: int = 8) -> Image.Image:
    """
    Resize **up** so width/height are multiples of m (avoids 1012x1012 errors).
    """
    w, h = image.size
    nw = _round_up(w, m)
    nh = _round_up(h, m)
    if (nw, nh) == (w, h):
        return image
    return image.resize((nw, nh), Image.LANCZOS)

# =============================
# Lazy singletons (created inside GPU context)
# =============================
PIPELINE = None

def _get_pipeline(device: str):
    """
    Create & cache the diffusers pipeline once we actually have a GPU (ZeroGPU).
    No CUDA calls should happen before this is executed.
    """
    global PIPELINE
    if PIPELINE is not None:
        PIPELINE.to(device)
        return PIPELINE

    model_id = os.environ.get("MODEL")
    ip_adapter_repo = os.environ.get("IP_ADAPTER")

    if not model_id:
        raise RuntimeError("Missing env var MODEL (e.g. 'stabilityai/stable-diffusion-xl-base-1.0').")
    if not ip_adapter_repo:
        raise RuntimeError("Missing env var IP_ADAPTER (e.g. 'h94/IP-Adapter').")

    # Build VAE & pipeline WITHOUT touching CUDA yet.
    vae = AutoencoderKL.from_pretrained(
        "madebyollin/sdxl-vae-fp16-fix",
        torch_dtype=torch.float16,
    )

    pipe = AutoPipelineForInpainting.from_pretrained(
        model_id,
        vae=vae,
        torch_dtype=torch.float16,
        variant="fp16",
        use_safetensors=True,
    )

    # Attach IP-Adapter weights (no CUDA op yet)
    pipe.load_ip_adapter(
        ip_adapter_repo,
        subfolder="sdxl_models",
        weight_name="ip-adapter_sdxl.bin",
    )

    # NOW move the whole pipeline to the device ZeroGPU assigned
    pipe.to(device)
    PIPELINE = pipe
    return PIPELINE

# =============================
# Main generate (GPU section)
# =============================
@spaces.GPU(duration=180)
def generate(person: Image.Image, clothing: Image.Image) -> Image.Image:
    """
    This function is called *after* ZeroGPU allocates a CUDA device.
    All CUDA/ONNXRuntime initializations must happen here (or deeper).
    """
    # Import segmentation modules here so they initialize after GPU exists.
    from SegBody import segment_body
    from SegCloth import segment_clothing

    # If onnxruntime is used under the hood, ensure it doesn't try CUDA without a GPU.
    try:
        import onnxruntime as ort  # noqa: F401
        if not torch.cuda.is_available():
            os.environ.setdefault("ORT_DISABLE_CUDA", "1")
    except Exception:
        pass

    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = _get_pipeline(device)

    # --- Preprocess (CPU)
    person = person.copy()
    clothing = clothing.copy()

    # Keep person within 1024, then square-pad to /8 and remember offsets.
    person.thumbnail((1024, 1024))
    square_img, left, top, ow, oh, side = square_pad_meta(person, color="white", multiple=8)
    image = square_img  # feed this square to seg & pipeline (already /8-compliant)

    # Clothing can be smaller; make dimensions /8 to be safe.
    clothing.thumbnail((1024, 1024))
    clothing = resize_to_multiple(clothing, 8)

    # --- Segmentation (after GPU allocation; modules can use GPU if they choose)
    seg_image, mask_image = segment_body(image, face=False)
    seg_cloth = segment_clothing(
        clothing,
        clothes=["Upper-clothes", "Skirt", "Pants", "Dress", "Belt"],
    )

    # --- Diffusion
    pipe.set_ip_adapter_scale(1.0)
    result = pipe(
        prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin",
        negative_prompt=(
            "ugly, bad quality, bad anatomy, deformed body, deformed hands, "
            "deformed feet, deformed face, deformed clothing, deformed skin, "
            "bad skin, leggings, tights, stockings"
        ),
        image=image,
        mask_image=mask_image,
        ip_adapter_image=seg_cloth,
        width=image.width,
        height=image.height,
        strength=0.99,
        guidance_scale=7.5,
        num_inference_steps=100,
    ).images[0]

    # Crop back to the original (post-thumbnail) person frame using the paste offsets.
    final = result.crop((left, top, left + ow, top + oh))
    return final

# =============================
# Gradio UI
# =============================
iface = gr.Interface(
    fn=generate,
    inputs=[gr.Image(label="Person", type="pil"), gr.Image(label="Clothing", type="pil")],
    outputs=[gr.Image(label="Result")],
    title="Fashion Try-On",
    description="""
    by <a href="https://www.tonyassi.com/">Tony Assi</a><br/>
    Check out <a href="https://huggingface.co/spaces/tonyassi/Virtual-Try-On-Pro">Virtual Try-On Pro</a> !<br/><br/>
    Please ❤️ this Space. I build custom AI apps for companies. <a href="mailto: tony.assi.media@gmail.com">Email me</a> for business inquiries.
    """,
    theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
    examples=[
        ["images/person1.jpg", "images/clothing1.jpg"],
        ["images/person1.jpg", "images/clothing2.jpg"],
    ],
)

if __name__ == "__main__":
    iface.launch()