fashion-try-on / app.py
tonyassi's picture
Update app.py
9cb2064 verified
# app.py
import os
import math
import spaces
import gradio as gr
import torch
from PIL import Image, ImageChops
from diffusers import AutoPipelineForInpainting, AutoencoderKL
# =============================
# Helpers (CPU-only; no CUDA)
# =============================
def _round_up(x: int, m: int = 8) -> int:
return int(math.ceil(x / m) * m)
def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol: int = 12) -> Image.Image:
"""
Trim uniform white (or near-white) margins before centering/padding.
Handles RGBA via alpha bbox; for RGB compares to a solid background.
"""
if img.mode in ("RGBA", "LA"):
alpha = img.split()[-1]
bbox = alpha.getbbox()
return img.crop(bbox) if bbox else img
bg = Image.new(img.mode, img.size, bg_color)
diff = ImageChops.difference(img, bg).convert("L")
mask = diff.point(lambda p: 255 if p > tol else 0, mode="1")
bbox = mask.getbbox()
return img.crop(bbox) if bbox else img
def square_pad_meta(
img: Image.Image, color: str = "white", multiple: int = 8
) -> tuple[Image.Image, int, int, int, int, int]:
"""
Autocrop -> center-pad to a square whose side is rounded UP to `multiple`.
Returns (square_img, left, top, orig_w, orig_h, side).
"""
img = autocrop_content(img, (255, 255, 255), tol=12)
orig_w, orig_h = img.size
side = _round_up(max(orig_w, orig_h), multiple)
bg = Image.new("RGB", (side, side), color=color)
left = (side - orig_w) // 2
top = (side - orig_h) // 2
bg.paste(img, (left, top))
return bg, left, top, orig_w, orig_h, side
def resize_to_multiple(image: Image.Image, m: int = 8) -> Image.Image:
"""
Resize **up** so width/height are multiples of m (avoids 1012x1012 errors).
"""
w, h = image.size
nw = _round_up(w, m)
nh = _round_up(h, m)
if (nw, nh) == (w, h):
return image
return image.resize((nw, nh), Image.LANCZOS)
# =============================
# Lazy singletons (created inside GPU context)
# =============================
PIPELINE = None
def _get_pipeline(device: str):
"""
Create & cache the diffusers pipeline once we actually have a GPU (ZeroGPU).
No CUDA calls should happen before this is executed.
"""
global PIPELINE
if PIPELINE is not None:
PIPELINE.to(device)
return PIPELINE
model_id = os.environ.get("MODEL")
ip_adapter_repo = os.environ.get("IP_ADAPTER")
if not model_id:
raise RuntimeError("Missing env var MODEL (e.g. 'stabilityai/stable-diffusion-xl-base-1.0').")
if not ip_adapter_repo:
raise RuntimeError("Missing env var IP_ADAPTER (e.g. 'h94/IP-Adapter').")
# Build VAE & pipeline WITHOUT touching CUDA yet.
vae = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix",
torch_dtype=torch.float16,
)
pipe = AutoPipelineForInpainting.from_pretrained(
model_id,
vae=vae,
torch_dtype=torch.float16,
variant="fp16",
use_safetensors=True,
)
# Attach IP-Adapter weights (no CUDA op yet)
pipe.load_ip_adapter(
ip_adapter_repo,
subfolder="sdxl_models",
weight_name="ip-adapter_sdxl.bin",
)
# NOW move the whole pipeline to the device ZeroGPU assigned
pipe.to(device)
PIPELINE = pipe
return PIPELINE
# =============================
# Main generate (GPU section)
# =============================
@spaces.GPU(duration=180)
def generate(person: Image.Image, clothing: Image.Image) -> Image.Image:
"""
This function is called *after* ZeroGPU allocates a CUDA device.
All CUDA/ONNXRuntime initializations must happen here (or deeper).
"""
# Import segmentation modules here so they initialize after GPU exists.
from SegBody import segment_body
from SegCloth import segment_clothing
# If onnxruntime is used under the hood, ensure it doesn't try CUDA without a GPU.
try:
import onnxruntime as ort # noqa: F401
if not torch.cuda.is_available():
os.environ.setdefault("ORT_DISABLE_CUDA", "1")
except Exception:
pass
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = _get_pipeline(device)
# --- Preprocess (CPU)
person = person.copy()
clothing = clothing.copy()
# Keep person within 1024, then square-pad to /8 and remember offsets.
person.thumbnail((1024, 1024))
square_img, left, top, ow, oh, side = square_pad_meta(person, color="white", multiple=8)
image = square_img # feed this square to seg & pipeline (already /8-compliant)
# Clothing can be smaller; make dimensions /8 to be safe.
clothing.thumbnail((1024, 1024))
clothing = resize_to_multiple(clothing, 8)
# --- Segmentation (after GPU allocation; modules can use GPU if they choose)
seg_image, mask_image = segment_body(image, face=False)
seg_cloth = segment_clothing(
clothing,
clothes=["Upper-clothes", "Skirt", "Pants", "Dress", "Belt"],
)
# --- Diffusion
pipe.set_ip_adapter_scale(1.0)
result = pipe(
prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin",
negative_prompt=(
"ugly, bad quality, bad anatomy, deformed body, deformed hands, "
"deformed feet, deformed face, deformed clothing, deformed skin, "
"bad skin, leggings, tights, stockings"
),
image=image,
mask_image=mask_image,
ip_adapter_image=seg_cloth,
width=image.width,
height=image.height,
strength=0.99,
guidance_scale=7.5,
num_inference_steps=100,
).images[0]
# Crop back to the original (post-thumbnail) person frame using the paste offsets.
final = result.crop((left, top, left + ow, top + oh))
return final
# =============================
# Gradio UI
# =============================
iface = gr.Interface(
fn=generate,
inputs=[gr.Image(label="Person", type="pil"), gr.Image(label="Clothing", type="pil")],
outputs=[gr.Image(label="Result")],
title="Fashion Try-On",
description="""
by <a href="https://www.tonyassi.com/">Tony Assi</a><br/>
Check out <a href="https://huggingface.co/spaces/tonyassi/Virtual-Try-On-Pro">Virtual Try-On Pro</a> !<br/><br/>
Please ❀️ this Space. I build custom AI apps for companies. <a href="mailto: tony.assi.media@gmail.com">Email me</a> for business inquiries.
""",
theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"),
examples=[
["images/person1.jpg", "images/clothing1.jpg"],
["images/person1.jpg", "images/clothing2.jpg"],
],
)
if __name__ == "__main__":
iface.launch()