Spaces:

i0switch
/

my-image-generation

Running on Zero

App Files Files Community

i0switch commited on Jun 21

Commit

e0bbeb9

verified ·

1 Parent(s): fc0ad0a

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -39

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
 """Persistent-cache backend for InstantID portrait generation.
-- Caches model assets under /data when writable, else ~/.cache
-- Robust download with retry + multiple fallback URLs per asset
 """
 import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
 from pathlib import Path
@@ -13,22 +13,25 @@ from diffusers import (
 from insightface.app import FaceAnalysis
 ##############################################################################
-# 0. Cache dir & helpers
 ##############################################################################
 PERSIST_BASE = Path("/data")
-CACHE_ROOT = (PERSIST_BASE / "instantid_cache" if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
-              else Path.home() / ".cache" / "instantid_cache")
 print("cache →", CACHE_ROOT)
 MODELS_DIR  = CACHE_ROOT / "models"
-LORA_DIR    = MODELS_DIR / "Lora"
 EMB_DIR     = CACHE_ROOT / "embeddings"
 UPSCALE_DIR = CACHE_ROOT / "realesrgan"
 for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
     p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
     if dst.exists():
         print("✓", dst.relative_to(CACHE_ROOT)); return
     for i in range(1, attempts + 1):
@@ -38,22 +41,25 @@ def dl(url: str, dst: Path, attempts: int = 2):
     raise RuntimeError(f"download failed → {url}")
 ##############################################################################
-# 1. Asset download
 ##############################################################################
 print("— asset check —")
-# 1-A. base ckpt
 BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
-dl("https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16", BASE_CKPT)
-# 1-B. IP-Adapter core + FaceID LoRA
-IP_BIN_FILE = LORA_DIR / "ip-adapter-plus-face_sd15.bin"
-dl("https://huggingface.co/h94/IP-Adapter/resolve/main/models/ip-adapter-plus-face_sd15.bin", IP_BIN_FILE)
 LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
-dl("https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors", LORA_FILE)
-# 1-C. textual-inversion embeddings
 EMB_URLS = {
     "ng_deepnegative_v1_75t.pt": [
         "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
@@ -81,7 +87,7 @@ for fname, urls in EMB_URLS.items():
             if idx == len(urls): raise
             print("    ↳ fallback URL …")
-# 1-D. Real-ESRGAN weights 8×
 RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
 RRG_URLS = [
     "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
@@ -96,38 +102,48 @@ for idx, link in enumerate(RRG_URLS, 1):
         print("    ↳ fallback URL …")
 ##############################################################################
-# 2. Runtime init
 ##############################################################################
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype  = torch.float16 if torch.cuda.is_available() else torch.float32
 print("device:", device, "| dtype:", dtype)
-providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if torch.cuda.is_available() else ["CPUExecutionProvider"]
 face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
 face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
-controlnet = ControlNetModel.from_pretrained("InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype)
-pipe = StableDiffusionPipeline.from_single_file(BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2)
-pipe.vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=dtype).to(device)
 pipe.controlnet = controlnet
-pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++")
-# --- 修正ポイントここから --------------------------------------------------
-# 画像エンコーダは Lora/models/image_encoder/ に格納されている
-IMAGE_ENCODER_DIR = LORA_DIR / "models" / "image_encoder"
 pipe.load_ip_adapter(
-    str(LORA_DIR),                 # ip_adapter.bin の親ディレクトリ
-    subfolder="",                  # ip_adapter.bin は Lora/ 直下
-    weight_name=IP_BIN_FILE.name,  # LoRA 本体
-    image_encoder_path=str(IMAGE_ENCODER_DIR)  # 画像エンコーダの場所を明示
 )
-# --- 修正ポイントここまで --------------------------------------------------
-# FaceID LoRA (差分 LoRA)
 pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
 pipe.set_ip_adapter_scale(0.65)
 for emb in EMB_DIR.glob("*.*"):
     try:
         pipe.load_textual_inversion(emb, token=emb.stem)
@@ -138,7 +154,7 @@ pipe.to(device)
 print("pipeline ready ✔")
 ##############################################################################
-# 3. Upscaler
 ##############################################################################
 try:
     from basicsr.archs.rrdb_arch import RRDBNet
@@ -155,7 +171,7 @@ except Exception as e:
     UPSCALE_OK = False
 ##############################################################################
-# 4. Prompts & generation
 ##############################################################################
 BASE_PROMPT = (
     "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n"
@@ -177,7 +193,7 @@ NEG_PROMPT = (
 @spaces.GPU(duration=90)
 def generate(
     face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
-    progress=gr.Progress(track_tqdm=True)
 ):
     if face_np is None or face_np.size == 0:
         raise gr.Error("顔画像をアップロードしてください。")
@@ -204,11 +220,15 @@ def generate(
     if upscale:
         if UPSCALE_OK:
-            up, _ = upsampler.enhance(cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor)
             result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
         else:
-            result = result.resize((int(result.width * up_factor), int(result.height * up_factor)), Image.LANCZOS)
     return result
 ##############################################################################

 # app.py — InstantID × Beautiful Realistic Asians v7 (ZeroGPU-friendly, persistent cache)
 """Persistent-cache backend for InstantID portrait generation.
+   * 依存モデルは /data が書込可ならそこへ、それ以外は ~/.cache に保存
+   * wget を使った簡易リトライ DL
 """
 import os, subprocess, cv2, torch, spaces, gradio as gr, numpy as np
 from pathlib import Path
 from insightface.app import FaceAnalysis
 ##############################################################################
+# 0. キャッシュ用ディレクトリ
 ##############################################################################
 PERSIST_BASE = Path("/data")
+CACHE_ROOT = (
+    PERSIST_BASE / "instantid_cache"
+    if PERSIST_BASE.exists() and os.access(PERSIST_BASE, os.W_OK)
+    else Path.home() / ".cache" / "instantid_cache"
+)
 print("cache →", CACHE_ROOT)
 MODELS_DIR  = CACHE_ROOT / "models"
+LORA_DIR    = MODELS_DIR / "Lora"            # FaceID LoRA などを置く
 EMB_DIR     = CACHE_ROOT / "embeddings"
 UPSCALE_DIR = CACHE_ROOT / "realesrgan"
 for p in (MODELS_DIR, LORA_DIR, EMB_DIR, UPSCALE_DIR):
     p.mkdir(parents=True, exist_ok=True)
 def dl(url: str, dst: Path, attempts: int = 2):
+    """wget + リトライの簡易ダウンローダ"""
     if dst.exists():
         print("✓", dst.relative_to(CACHE_ROOT)); return
     for i in range(1, attempts + 1):
     raise RuntimeError(f"download failed → {url}")
 ##############################################################################
+# 1. 必要アセットのダウンロード
 ##############################################################################
 print("— asset check —")
+# 1-A. ベース checkpoint
 BASE_CKPT = MODELS_DIR / "beautiful_realistic_asians_v7_fp16.safetensors"
+dl(
+    "https://civitai.com/api/download/models/177164?type=Model&format=SafeTensor&size=pruned&fp=fp16",
+    BASE_CKPT,
+)
+# 1-B. FaceID LoRA（Δのみ）
 LORA_FILE = LORA_DIR / "ip-adapter-faceid-plusv2_sd15_lora.safetensors"
+dl(
+    "https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid-plusv2_sd15_lora.safetensors",
+    LORA_FILE,
+)
+# 1-C. textual inversion Embeddings
 EMB_URLS = {
     "ng_deepnegative_v1_75t.pt": [
         "https://huggingface.co/datasets/gsdf/EasyNegative/resolve/main/ng_deepnegative_v1_75t.pt",
             if idx == len(urls): raise
             print("    ↳ fallback URL …")
+# 1-D. Real-ESRGAN weights (×8)
 RRG_WEIGHTS = UPSCALE_DIR / "RealESRGAN_x8plus.pth"
 RRG_URLS = [
     "https://huggingface.co/NoCrypt/Superscale_RealESRGAN/resolve/main/RealESRGAN_x8plus.pth",
         print("    ↳ fallback URL …")
 ##############################################################################
+# 2. ランタイム初期化
 ##############################################################################
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 dtype  = torch.float16 if torch.cuda.is_available() else torch.float32
 print("device:", device, "| dtype:", dtype)
+providers = (
+    ["CUDAExecutionProvider", "CPUExecutionProvider"]
+    if torch.cuda.is_available()
+    else ["CPUExecutionProvider"]
+)
 face_app = FaceAnalysis(name="buffalo_l", root=str(CACHE_ROOT), providers=providers)
 face_app.prepare(ctx_id=(0 if torch.cuda.is_available() else -1), det_size=(640, 640))
+# ControlNet + SD パイプライン
+controlnet = ControlNetModel.from_pretrained(
+    "InstantX/InstantID", subfolder="ControlNetModel", torch_dtype=dtype
+)
+pipe = StableDiffusionPipeline.from_single_file(
+    BASE_CKPT, torch_dtype=dtype, safety_checker=None, use_safetensors=True, clip_skip=2
+)
+pipe.vae = AutoencoderKL.from_pretrained(
+    "stabilityai/sd-vae-ft-mse", torch_dtype=dtype
+).to(device)
 pipe.controlnet = controlnet
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+    pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++"
+)
+# --- ここが核心：画像エンコーダ込みで公式レポから直接ロード ------------------
 pipe.load_ip_adapter(
+    "h94/IP-Adapter",               # Hugging Face Hub ID
+    subfolder="models",             # ip-adapter-plus-face_sd15.bin が入っているフォルダ
+    weight_name="ip-adapter-plus-face_sd15.bin",
 )
+# ---------------------------------------------------------------------------
+# FaceID LoRA（差分 LoRA のみ）
 pipe.load_lora_weights(str(LORA_DIR), weight_name=LORA_FILE.name)
 pipe.set_ip_adapter_scale(0.65)
+# textual inversion 読み込み
 for emb in EMB_DIR.glob("*.*"):
     try:
         pipe.load_textual_inversion(emb, token=emb.stem)
 print("pipeline ready ✔")
 ##############################################################################
+# 3. アップスケーラ
 ##############################################################################
 try:
     from basicsr.archs.rrdb_arch import RRDBNet
     UPSCALE_OK = False
 ##############################################################################
+# 4. プロンプト & 生成関数
 ##############################################################################
 BASE_PROMPT = (
     "(masterpiece:1.2), best quality, ultra-realistic, RAW photo, 8k,\n"
 @spaces.GPU(duration=90)
 def generate(
     face_np, subject, add_prompt, add_neg, cfg, ip_scale, steps, w, h, upscale, up_factor,
+    progress=gr.Progress(track_tqdm=True),
 ):
     if face_np is None or face_np.size == 0:
         raise gr.Error("顔画像をアップロードしてください。")
     if upscale:
         if UPSCALE_OK:
+            up, _ = upsampler.enhance(
+                cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR), outscale=up_factor
+            )
             result = Image.fromarray(cv2.cvtColor(up, cv2.COLOR_BGR2RGB))
         else:
+            result = result.resize(
+                (int(result.width * up_factor), int(result.height * up_factor)),
+                Image.LANCZOS,
+            )
     return result
 ##############################################################################