my-image-generator

Build error

App Files Files Community

my-image-generator / app.py

i0switch

Update app.py

f47143a verified 4 months ago

raw

history blame

12 kB

	"""InstantID × Beautiful Realistic Asians v7 (ZeroGPU‑friendly, persistent cache)

	ポイント
	---------
	* import spaces を最初にして ZeroGPU パッチを確実に適用。
	* グローバル領域では CPU でモデルをロードし、CUDA への移動は
	`@spaces.GPU` 関数内で一度だけ実行。
	* `.to("cuda")` や `torch.cuda.*` を関数外に置かないことで
	`RuntimeError: No CUDA GPUs are available` を回避。
	"""

	# ---------------------------------------------------------------------------
	# 0. 依存ライブラリの読み込み (ZeroGPU パッチ → PyTorch の順)
	# ---------------------------------------------------------------------------
	import spaces # ⭐ ZeroGPU は torch より前に必須

	# --- ★ Monkey‑Patch: torchvision 0.17+ で消えた functional_tensor を補完 ---
	import types, sys
	from torchvision.transforms import functional as F

	mod = types.ModuleType("torchvision.transforms.functional_tensor")
	mod.rgb_to_grayscale = F.rgb_to_grayscale
	sys.modules["torchvision.transforms.functional_tensor"] = mod
	# ---------------------------------------------------------------------------

	import os, subprocess, cv2, torch, gradio as gr, numpy as np
	from pathlib import Path
	from PIL import Image
	from diffusers import (
	StableDiffusionPipeline,
	ControlNetModel,
	DPMSolverMultistepScheduler,
	AutoencoderKL,
	)
	from compel import Compel
	from insightface.app import FaceAnalysis

	# ---------------------------------------------------------------------------
	# 1. キャッシュ用ディレクトリ
	# ---------------------------------------------------------------------------
	PERSIST_BASE = Path("/data")
	CACHE_ROOT = (
	PERSIST_BASE / "instantid_cache"
	if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
	else Path.home() / ".cache" / "instantid_cache"
	)
	print("cache →", CACHE_ROOT)

	MODELS_DIR = CACHE_ROOT / "models"
	LORA_DIR = MODELS_DIR / "Lora" # FaceID LoRA などを置く
	EMB_DIR = CACHE_ROOT / "embeddings"
	UPSCALE_DIR = CACHE_ROOT / "realesrgan"
	for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
	p.mkdir(parents=True, exist_ok=True)


	def dl(url: str, dst: Path, attempts: int = 2):
	"""wget + リトライの簡易ダウンローダ"""
	if dst.exists():
	print("✓", dst.relative_to(CACHE_ROOT)); return
	for i in range(1, attempts + 1):
	print(f"⬇ {dst.name} (try {i}/{attempts})")
	if subprocess.call(["wget", "-q", "-O", str(dst), url]) == 0:
	return
	raise RuntimeError(f"download failed → {url}")

	# ---------------------------------------------------------------------------
	# 2. 必要アセットのダウンロード
	# ---------------------------------------------------------------------------
	print("— asset check —")

	# 2‑A. ベース checkpoint
	BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
	dl(
	"https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
	BASE_CKPT,
	)

	# 2‑B. FaceID LoRA（Δのみ）
	LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
	dl(
	"https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
	LORA_FILE,
	)

	# 2‑C. textual inversion Embeddings
	EMB_URLS = {
	"ng_deepnegative_v1_75t.pt": [
	"https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
	"https://huggingface.co/mrpxl2/animetarotV51.safetensors/raw/cc3008c0148061896549a995cc297aef0af4ef1b/ng_deepnegative_v1_75t.pt",
	],
	"badhandv4.pt": [
	"https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/badhandv4.pt",
	"https://huggingface.co/nolanaatama/embeddings/raw/main/badhandv4.pt",
	],
	"CyberRealistic_Negative-neg.pt": [
	"https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/CyberRealistic_Negative-neg.pt",
	"https://huggingface.co/wsj1995/embeddings/raw/main/CyberRealistic_Negative-neg.civitai.info",
	],
	"UnrealisticDream.pt": [
	"https://huggingface.co/datasets/gsdf/ConceptLab/resolve/main/UnrealisticDream.pt",
	"https://huggingface.co/imagepipeline/UnrealisticDream/raw/main/f84133b4-aad8-44be-b9ce-7e7e3a8c111f.pt",
	],
	}
	for fname, urls in EMB_URLS.items():
	dst = EMB_DIR / fname
	for idx, u in enumerate(urls, 1):
	try:
	dl(u, dst); break
	except RuntimeError:
	if idx == len(urls): raise
	print(" ↳ fallback URL …")

	# 2‑D. Real‑ESRGAN weights (×8)
	RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
	RRG_URLS = [
	"https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
	"https://huggingface.co/ai-forever/Real-ESRGAN/raw/main/RealESRGAN_x8.pth",
	"https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/8x_NMKD-Superscale_100k.pth",
	]
	for idx, link in enumerate(RRG_URLS, 1):
	try:
	dl(link, RRG_WEIGHTS); break
	except RuntimeError:
	if idx == len(RRG_URLS): raise
	print(" ↳ fallback URL …")

	# ---------------------------------------------------------------------------
	# 3. モデル読み込み (すべて CPU)
	# ---------------------------------------------------------------------------

	device: str = "cpu" # グローバルは CPU 固定
	dtype = torch.float32 # 後で GPU 化する際に float16 に

	# FaceAnalysis (insightface)
	providers = ["CPUExecutionProvider"]
	face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
	face_app.prepare(ctx_id=-1, det_size=(640, 640))

	# Stable Diffusion Pipeline (CPU)
	pipe = StableDiffusionPipeline.from_single_file(
	BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
	)
	pipe.vae = AutoencoderKL.from_pretrained(
	"stabilityai/sd-vae-ft-mse", torch_dtype=dtype
	)
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(
	pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
	)
	pipe.load_ip_adapter(
	"h94/IP-Adapter",
	subfolder="models",
	weight_name="ip-adapter-plus-face_sd15.bin",
	)
	pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
	pipe.set_ip_adapter_scale(0.65)

	# textual inversion
	for emb in EMB_DIR.glob("."):
	try:
	pipe.load_textual_inversion(emb, token=emb.stem)
	print("emb loaded →", emb.stem)
	except Exception:
	print("emb skip →", emb.name)

	# Real‑ESRGAN (CPU)
	try:
	from basicsr.archs.rrdb_arch import RRDBNet
	try:
	from realesrgan import RealESRGAN
	except ImportError:
	from realesrgan import RealESRGANer as RealESRGAN

	rrdb = RRDBNet(3, 3, 64, 23, 32, scale=8)
	upsampler = RealESRGAN("cpu", rrdb, scale=8)
	upsampler.load_weights(str(RRG_WEIGHTS))
	UPSCALE_OK = True
	except Exception as e:
	print("Real-ESRGAN disabled →", e)
	UPSCALE_OK = False

	# compel
	compel_proc = Compel(
	tokenizer=pipe.tokenizer,
	text_encoder=pipe.text_encoder,
	truncate_long_prompts=False,
	)
	print("pipeline ready (CPU) ✔")

	# ---------------------------------------------------------------------------
	# 4. プロンプト定義
	# ---------------------------------------------------------------------------
	BASE_PROMPT = (
	"Cinematic photo, (best quality:1.1), ultra-realistic, photorealistic of {subject}, "
	"natural skin texture, bokeh, standing, front view, full body shot, thighs, "
	"Canon EOS R5, 85 mm, f/1.4, ISO 200, 1/160 s, RAW"
	)
	NEG_PROMPT = (
	"ng_deepnegative_v1_75t, BadDream:0.6, UnrealisticDream:0.8, badhandv4:0.9, "
	"(worst quality:2), (low quality:1.8), lowres, blurry, jpeg artifacts, "
	"painting, sketch, illustration, cartoon, anime, cgi, render, 3d, "
	"monochrome, grayscale, text, logo, watermark, signature, username, "
	"bad anatomy, malformed, deformed, extra limbs, fused fingers, missing fingers, "
	"missing arms, missing legs, skin blemishes, acne, age spot"
	)

	# ---------------------------------------------------------------------------
	# 5. 生成関数 (GPU 処理部)
	# ---------------------------------------------------------------------------
	GPU_INITIALISED = False # 一度だけ GPU へ移動するためのフラグ

	@spaces.GPU(duration=60)
	def generate(
	face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
	progress=gr.Progress(track_tqdm=True),
	):
	global GPU_INITIALISED, device, dtype, pipe, face_app, upsampler

	if not GPU_INITIALISED:
	print("\n--- first GPU initialisation ---")
	device = "cuda"
	dtype = torch.float16

	pipe.to(device)
	pipe.vae.to(device)
	face_app.prepare(ctx_id=0, det_size=(640, 640))
	if UPSCALE_OK:
	try:
	upsampler.model = upsampler.model.to(device) # RealESRGANer
	upsampler.device = device # for newer API
	except Exception:
	pass
	GPU_INITIALISED = True
	print("GPU ready ✔")

	if face_np is None or face_np.size == 0:
	raise gr.Error("顔画像をアップロードしてください。")

	prompt = BASE_PROMPT.format(subject=(subject.strip() or "a beautiful 20yo woman"))
	if add_prompt:
	prompt += ", " + add_prompt
	neg = NEG_PROMPT + (", " + add_neg if add_neg else "")

	pipe.set_ip_adapter_scale(ip_scale)
	img_in = Image.fromarray(face_np)

	# compel で長さを揃えバッチ化
	prompt_embeds, negative_prompt_embeds = compel_proc([prompt, neg])
	prompt_embeds = prompt_embeds.unsqueeze(0)
	negative_prompt_embeds = negative_prompt_embeds.unsqueeze(0)

	result = pipe(
	prompt_embeds=prompt_embeds,
	negative_prompt_embeds=negative_prompt_embeds,
	ip_adapter_image=img_in,
	num_inference_steps=int(steps) + 5,
	guidance_scale=cfg,
	width=int(w),
	height=int(h),
	).images[0]

	if upscale:
	if UPSCALE_OK:
	up, _ = upsampler.enhance(
	cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor
	)
	result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
	else:
	result = result.resize(
	(int(result.width * up_factor), int(result.height * up_factor)),
	Image.LANCZOS,
	)
	return result

	# ---------------------------------------------------------------------------
	# 6. Gradio UI
	# ---------------------------------------------------------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# InstantID – Beautiful Realistic Asians v7 (ZeroGPU edition)")
	with gr.Row():
	with gr.Column():
	face_in = gr.Image(label="顔写真", type="numpy")
	subj_in = gr.Textbox(label="被写体説明", placeholder="e.g. woman in black suit, smiling")
	add_in = gr.Textbox(label="追加プロンプト")
	addneg_in = gr.Textbox(label="追加ネガティブ")
	ip_sld = gr.Slider(0, 1.5, 0.65, step=0.05, label="IP-Adapter scale")
	cfg_sld = gr.Slider(1, 15, 6, step=0.5, label="CFG")
	step_sld = gr.Slider(10, 50, 20, step=1, label="Steps")
	w_sld = gr.Slider(512, 1024, 512, step=64, label="幅")
	h_sld = gr.Slider(512, 1024, 768, step=64, label="高さ")
	up_ck = gr.Checkbox(label="アップスケール", value=True)
	up_fac = gr.Slider(1, 8, 2, step=1, label="倍率")
	btn = gr.Button("生成", variant="primary")
	with gr.Column():
	out_img = gr.Image(label="結果")

	btn.click(
	generate,
	[face_in, subj_in, add_in, addneg_in, cfg_sld, ip_sld, step_sld, w_sld, h_sld, up_ck, up_fac],
	out_img,
	api_name="predict",
	)

	print("launching …")