Spaces:
Running
on
Zero
Running
on
Zero
| # app.py | |
| import os | |
| import math | |
| import spaces | |
| import gradio as gr | |
| import torch | |
| from PIL import Image, ImageChops | |
| from diffusers import AutoPipelineForInpainting, AutoencoderKL | |
| # ============================= | |
| # Helpers (CPU-only; no CUDA) | |
| # ============================= | |
| def _round_up(x: int, m: int = 8) -> int: | |
| return int(math.ceil(x / m) * m) | |
| def autocrop_content(img: Image.Image, bg_color=(255, 255, 255), tol: int = 12) -> Image.Image: | |
| """ | |
| Trim uniform white (or near-white) margins before centering/padding. | |
| Handles RGBA via alpha bbox; for RGB compares to a solid background. | |
| """ | |
| if img.mode in ("RGBA", "LA"): | |
| alpha = img.split()[-1] | |
| bbox = alpha.getbbox() | |
| return img.crop(bbox) if bbox else img | |
| bg = Image.new(img.mode, img.size, bg_color) | |
| diff = ImageChops.difference(img, bg).convert("L") | |
| mask = diff.point(lambda p: 255 if p > tol else 0, mode="1") | |
| bbox = mask.getbbox() | |
| return img.crop(bbox) if bbox else img | |
| def square_pad_meta( | |
| img: Image.Image, color: str = "white", multiple: int = 8 | |
| ) -> tuple[Image.Image, int, int, int, int, int]: | |
| """ | |
| Autocrop -> center-pad to a square whose side is rounded UP to `multiple`. | |
| Returns (square_img, left, top, orig_w, orig_h, side). | |
| """ | |
| img = autocrop_content(img, (255, 255, 255), tol=12) | |
| orig_w, orig_h = img.size | |
| side = _round_up(max(orig_w, orig_h), multiple) | |
| bg = Image.new("RGB", (side, side), color=color) | |
| left = (side - orig_w) // 2 | |
| top = (side - orig_h) // 2 | |
| bg.paste(img, (left, top)) | |
| return bg, left, top, orig_w, orig_h, side | |
| def resize_to_multiple(image: Image.Image, m: int = 8) -> Image.Image: | |
| """ | |
| Resize **up** so width/height are multiples of m (avoids 1012x1012 errors). | |
| """ | |
| w, h = image.size | |
| nw = _round_up(w, m) | |
| nh = _round_up(h, m) | |
| if (nw, nh) == (w, h): | |
| return image | |
| return image.resize((nw, nh), Image.LANCZOS) | |
| # ============================= | |
| # Lazy singletons (created inside GPU context) | |
| # ============================= | |
| PIPELINE = None | |
| def _get_pipeline(device: str): | |
| """ | |
| Create & cache the diffusers pipeline once we actually have a GPU (ZeroGPU). | |
| No CUDA calls should happen before this is executed. | |
| """ | |
| global PIPELINE | |
| if PIPELINE is not None: | |
| PIPELINE.to(device) | |
| return PIPELINE | |
| model_id = os.environ.get("MODEL") | |
| ip_adapter_repo = os.environ.get("IP_ADAPTER") | |
| if not model_id: | |
| raise RuntimeError("Missing env var MODEL (e.g. 'stabilityai/stable-diffusion-xl-base-1.0').") | |
| if not ip_adapter_repo: | |
| raise RuntimeError("Missing env var IP_ADAPTER (e.g. 'h94/IP-Adapter').") | |
| # Build VAE & pipeline WITHOUT touching CUDA yet. | |
| vae = AutoencoderKL.from_pretrained( | |
| "madebyollin/sdxl-vae-fp16-fix", | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe = AutoPipelineForInpainting.from_pretrained( | |
| model_id, | |
| vae=vae, | |
| torch_dtype=torch.float16, | |
| variant="fp16", | |
| use_safetensors=True, | |
| ) | |
| # Attach IP-Adapter weights (no CUDA op yet) | |
| pipe.load_ip_adapter( | |
| ip_adapter_repo, | |
| subfolder="sdxl_models", | |
| weight_name="ip-adapter_sdxl.bin", | |
| ) | |
| # NOW move the whole pipeline to the device ZeroGPU assigned | |
| pipe.to(device) | |
| PIPELINE = pipe | |
| return PIPELINE | |
| # ============================= | |
| # Main generate (GPU section) | |
| # ============================= | |
| def generate(person: Image.Image, clothing: Image.Image) -> Image.Image: | |
| """ | |
| This function is called *after* ZeroGPU allocates a CUDA device. | |
| All CUDA/ONNXRuntime initializations must happen here (or deeper). | |
| """ | |
| # Import segmentation modules here so they initialize after GPU exists. | |
| from SegBody import segment_body | |
| from SegCloth import segment_clothing | |
| # If onnxruntime is used under the hood, ensure it doesn't try CUDA without a GPU. | |
| try: | |
| import onnxruntime as ort # noqa: F401 | |
| if not torch.cuda.is_available(): | |
| os.environ.setdefault("ORT_DISABLE_CUDA", "1") | |
| except Exception: | |
| pass | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| pipe = _get_pipeline(device) | |
| # --- Preprocess (CPU) | |
| person = person.copy() | |
| clothing = clothing.copy() | |
| # Keep person within 1024, then square-pad to /8 and remember offsets. | |
| person.thumbnail((1024, 1024)) | |
| square_img, left, top, ow, oh, side = square_pad_meta(person, color="white", multiple=8) | |
| image = square_img # feed this square to seg & pipeline (already /8-compliant) | |
| # Clothing can be smaller; make dimensions /8 to be safe. | |
| clothing.thumbnail((1024, 1024)) | |
| clothing = resize_to_multiple(clothing, 8) | |
| # --- Segmentation (after GPU allocation; modules can use GPU if they choose) | |
| seg_image, mask_image = segment_body(image, face=False) | |
| seg_cloth = segment_clothing( | |
| clothing, | |
| clothes=["Upper-clothes", "Skirt", "Pants", "Dress", "Belt"], | |
| ) | |
| # --- Diffusion | |
| pipe.set_ip_adapter_scale(1.0) | |
| result = pipe( | |
| prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin", | |
| negative_prompt=( | |
| "ugly, bad quality, bad anatomy, deformed body, deformed hands, " | |
| "deformed feet, deformed face, deformed clothing, deformed skin, " | |
| "bad skin, leggings, tights, stockings" | |
| ), | |
| image=image, | |
| mask_image=mask_image, | |
| ip_adapter_image=seg_cloth, | |
| width=image.width, | |
| height=image.height, | |
| strength=0.99, | |
| guidance_scale=7.5, | |
| num_inference_steps=100, | |
| ).images[0] | |
| # Crop back to the original (post-thumbnail) person frame using the paste offsets. | |
| final = result.crop((left, top, left + ow, top + oh)) | |
| return final | |
| # ============================= | |
| # Gradio UI | |
| # ============================= | |
| iface = gr.Interface( | |
| fn=generate, | |
| inputs=[gr.Image(label="Person", type="pil"), gr.Image(label="Clothing", type="pil")], | |
| outputs=[gr.Image(label="Result")], | |
| title="Fashion Try-On", | |
| description=""" | |
| by <a href="https://www.tonyassi.com/">Tony Assi</a><br/> | |
| Check out <a href="https://huggingface.co/spaces/tonyassi/Virtual-Try-On-Pro">Virtual Try-On Pro</a> !<br/><br/> | |
| Please β€οΈ this Space. I build custom AI apps for companies. <a href="mailto: tony.assi.media@gmail.com">Email me</a> for business inquiries. | |
| """, | |
| theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate"), | |
| examples=[ | |
| ["images/person1.jpg", "images/clothing1.jpg"], | |
| ["images/person1.jpg", "images/clothing2.jpg"], | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |