# app.py import os import math import spaces import gradio as gr import torch from PIL import Image, ImageChops from diffusers import AutoPipelineForInpainting, AutoencoderKL # ============================= # Helpers (CPU-only; no CUDA) # ============================= def _round_up(x: int, m: int = 8) -> int: return int(math.ceil(x / m) * m) def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol: int = 12) -> Image.Image: """ Trim uniform white (or near-white) margins before centering/padding. Handles RGBA via alpha bbox; for RGB compares to a solid background. """ if img.mode in ("RGBA", "LA"): alpha = img.split()[-1] bbox = alpha.getbbox() return img.crop(bbox) if bbox else img bg = Image.new(img.mode, img.size, bg_color) diff = ImageChops.difference(img, bg).convert("L") mask = diff.point(lambda p: 255 if p > tol else 0, mode="1") bbox = mask.getbbox() return img.crop(bbox) if bbox else img def square_pad_meta( img: Image.Image, color: str = "white", multiple: int = 8 ) -> tuple[Image.Image, int, int, int, int, int]: """ Autocrop -> center-pad to a square whose side is rounded UP to `multiple`. Returns (square_img, left, top, orig_w, orig_h, side). """ img = autocrop_content(img, (255, 255, 255), tol=12) orig_w, orig_h = img.size side = _round_up(max(orig_w, orig_h), multiple) bg = Image.new("RGB", (side, side), color=color) left = (side - orig_w) // 2 top = (side - orig_h) // 2 bg.paste(img, (left, top)) return bg, left, top, orig_w, orig_h, side def resize_to_multiple(image: Image.Image, m: int = 8) -> Image.Image: """ Resize **up** so width/height are multiples of m (avoids 1012x1012 errors). """ w, h = image.size nw = _round_up(w, m) nh = _round_up(h, m) if (nw, nh) == (w, h): return image return image.resize((nw, nh), Image.LANCZOS) # ============================= # Lazy singletons (created inside GPU context) # ============================= PIPELINE = None def _get_pipeline(device: str): """ Create & cache the diffusers pipeline once we actually have a GPU (ZeroGPU). No CUDA calls should happen before this is executed. """ global PIPELINE if PIPELINE is not None: PIPELINE.to(device) return PIPELINE model_id = os.environ.get("MODEL") ip_adapter_repo = os.environ.get("IP_ADAPTER") if not model_id: raise RuntimeError("Missing env var MODEL (e.g. 'stabilityai/stable-diffusion-xl-base-1.0').") if not ip_adapter_repo: raise RuntimeError("Missing env var IP_ADAPTER (e.g. 'h94/IP-Adapter').") # Build VAE & pipeline WITHOUT touching CUDA yet. vae = AutoencoderKL.from_pretrained( "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, ) pipe = AutoPipelineForInpainting.from_pretrained( model_id, vae=vae, torch_dtype=torch.float16, variant="fp16", use_safetensors=True, ) # Attach IP-Adapter weights (no CUDA op yet) pipe.load_ip_adapter( ip_adapter_repo, subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin", ) # NOW move the whole pipeline to the device ZeroGPU assigned pipe.to(device) PIPELINE = pipe return PIPELINE # ============================= # Main generate (GPU section) # ============================= @spaces.GPU(duration=180) def generate(person: Image.Image, clothing: Image.Image) -> Image.Image: """ This function is called *after* ZeroGPU allocates a CUDA device. All CUDA/ONNXRuntime initializations must happen here (or deeper). """ # Import segmentation modules here so they initialize after GPU exists. from SegBody import segment_body from SegCloth import segment_clothing # If onnxruntime is used under the hood, ensure it doesn't try CUDA without a GPU. try: import onnxruntime as ort # noqa: F401 if not torch.cuda.is_available(): os.environ.setdefault("ORT_DISABLE_CUDA", "1") except Exception: pass device = "cuda" if torch.cuda.is_available() else "cpu" pipe = _get_pipeline(device) # --- Preprocess (CPU) person = person.copy() clothing = clothing.copy() # Keep person within 1024, then square-pad to /8 and remember offsets. person.thumbnail((1024, 1024)) square_img, left, top, ow, oh, side = square_pad_meta(person, color="white", multiple=8) image = square_img # feed this square to seg & pipeline (already /8-compliant) # Clothing can be smaller; make dimensions /8 to be safe. clothing.thumbnail((1024, 1024)) clothing = resize_to_multiple(clothing, 8) # --- Segmentation (after GPU allocation; modules can use GPU if they choose) seg_image, mask_image = segment_body(image, face=False) seg_cloth = segment_clothing( clothing, clothes=["Upper-clothes", "Skirt", "Pants", "Dress", "Belt"], ) # --- Diffusion pipe.set_ip_adapter_scale(1.0) result = pipe( prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin", negative_prompt=( "ugly, bad quality, bad anatomy, deformed body, deformed hands, " "deformed feet, deformed face, deformed clothing, deformed skin, " "bad skin, leggings, tights, stockings" ), image=image, mask_image=mask_image, ip_adapter_image=seg_cloth, width=image.width, height=image.height, strength=0.99, guidance_scale=7.5, num_inference_steps=100, ).images[0] # Crop back to the original (post-thumbnail) person frame using the paste offsets. final = result.crop((left, top, left + ow, top + oh)) return final # ============================= # Gradio UI # ============================= iface = gr.Interface( fn=generate, inputs=[gr.Image(label="Person", type="pil"), gr.Image(label="Clothing", type="pil")], outputs=[gr.Image(label="Result")], title="Fashion Try-On", description=""" by Tony Assi
Check out Virtual Try-On Pro !

Please ❤️ this Space. I build custom AI apps for companies. Email me for business inquiries. """, theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), examples=[ ["images/person1.jpg", "images/clothing1.jpg"], ["images/person1.jpg", "images/clothing2.jpg"], ], ) if __name__ == "__main__": iface.launch()