Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -100,9 +100,12 @@ class RetroArtConverter:
|
|
| 100 |
)
|
| 101 |
|
| 102 |
# Determine which controlnets to use
|
| 103 |
-
controlnets = [self.controlnet_depth]
|
| 104 |
if self.instantid_enabled and self.controlnet_instantid is not None:
|
| 105 |
-
controlnets.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
# Load SDXL checkpoint from HuggingFace Hub
|
| 108 |
print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
|
|
@@ -114,7 +117,7 @@ class RetroArtConverter:
|
|
| 114 |
)
|
| 115 |
self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
|
| 116 |
model_path,
|
| 117 |
-
controlnet=controlnets
|
| 118 |
vae=self.vae,
|
| 119 |
torch_dtype=self.dtype,
|
| 120 |
use_safetensors=True
|
|
@@ -125,7 +128,7 @@ class RetroArtConverter:
|
|
| 125 |
print("Using default SDXL")
|
| 126 |
self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
|
| 127 |
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 128 |
-
controlnet=controlnets
|
| 129 |
vae=self.vae,
|
| 130 |
torch_dtype=self.dtype,
|
| 131 |
use_safetensors=True
|
|
@@ -160,6 +163,10 @@ class RetroArtConverter:
|
|
| 160 |
except Exception as e:
|
| 161 |
print(f"⚠️ xformers not available: {e}")
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
print("Model initialization complete!")
|
| 164 |
|
| 165 |
def get_depth_map(self, image):
|
|
@@ -250,27 +257,25 @@ class RetroArtConverter:
|
|
| 250 |
depth_image = self.get_depth_map(resized_image)
|
| 251 |
depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
|
| 252 |
|
|
|
|
|
|
|
|
|
|
| 253 |
# Extract face embeddings if InstantID is enabled
|
| 254 |
face_embeddings = None
|
| 255 |
-
|
| 256 |
-
conditioning_scales = [controlnet_conditioning_scale]
|
| 257 |
|
| 258 |
-
if
|
| 259 |
print("Extracting face embeddings...")
|
| 260 |
img_array = np.array(resized_image)
|
| 261 |
faces = self.face_app.get(img_array) if self.face_app is not None else []
|
| 262 |
|
| 263 |
if len(faces) > 0:
|
|
|
|
| 264 |
print(f"Detected {len(faces)} face(s), using for identity preservation")
|
| 265 |
# Get the largest face
|
| 266 |
face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
|
| 267 |
face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
|
| 268 |
|
| 269 |
-
# Prepare face image for InstantID ControlNet
|
| 270 |
-
face_control_image = resized_image.resize((target_width, target_height), Image.LANCZOS)
|
| 271 |
-
control_images.append(face_control_image)
|
| 272 |
-
conditioning_scales.append(image_scale)
|
| 273 |
-
|
| 274 |
# Enhance prompt for face preservation
|
| 275 |
prompt = f"portrait, detailed face, facial features, {prompt}"
|
| 276 |
|
|
@@ -292,20 +297,36 @@ class RetroArtConverter:
|
|
| 292 |
"generator": torch.Generator(device=self.device).manual_seed(42)
|
| 293 |
}
|
| 294 |
|
| 295 |
-
# Add control images and scales
|
| 296 |
-
if
|
| 297 |
-
# Multiple ControlNets
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
pipe_kwargs["image"] = control_images
|
| 299 |
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
else:
|
| 301 |
# Single ControlNet (depth only)
|
|
|
|
| 302 |
pipe_kwargs["image"] = depth_image
|
| 303 |
pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
|
| 304 |
|
| 305 |
-
# Add face embeddings if available (for InstantID IP-Adapter)
|
| 306 |
-
if face_embeddings is not None:
|
| 307 |
-
pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
|
| 308 |
-
|
| 309 |
# Generate image
|
| 310 |
print("Generating retro art...")
|
| 311 |
result = self.pipe(**pipe_kwargs)
|
|
|
|
| 100 |
)
|
| 101 |
|
| 102 |
# Determine which controlnets to use
|
|
|
|
| 103 |
if self.instantid_enabled and self.controlnet_instantid is not None:
|
| 104 |
+
controlnets = [self.controlnet_depth, self.controlnet_instantid]
|
| 105 |
+
print(f"Initializing with multiple ControlNets: Depth + InstantID")
|
| 106 |
+
else:
|
| 107 |
+
controlnets = self.controlnet_depth
|
| 108 |
+
print(f"Initializing with single ControlNet: Depth only")
|
| 109 |
|
| 110 |
# Load SDXL checkpoint from HuggingFace Hub
|
| 111 |
print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
|
|
|
|
| 117 |
)
|
| 118 |
self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
|
| 119 |
model_path,
|
| 120 |
+
controlnet=controlnets,
|
| 121 |
vae=self.vae,
|
| 122 |
torch_dtype=self.dtype,
|
| 123 |
use_safetensors=True
|
|
|
|
| 128 |
print("Using default SDXL")
|
| 129 |
self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
|
| 130 |
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 131 |
+
controlnet=controlnets,
|
| 132 |
vae=self.vae,
|
| 133 |
torch_dtype=self.dtype,
|
| 134 |
use_safetensors=True
|
|
|
|
| 163 |
except Exception as e:
|
| 164 |
print(f"⚠️ xformers not available: {e}")
|
| 165 |
|
| 166 |
+
# Track whether we're using multiple ControlNets
|
| 167 |
+
self.using_multiple_controlnets = isinstance(controlnets, list)
|
| 168 |
+
print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
|
| 169 |
+
|
| 170 |
print("Model initialization complete!")
|
| 171 |
|
| 172 |
def get_depth_map(self, image):
|
|
|
|
| 257 |
depth_image = self.get_depth_map(resized_image)
|
| 258 |
depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
|
| 259 |
|
| 260 |
+
# Determine if we're using multiple ControlNets based on initialization
|
| 261 |
+
using_multiple_controlnets = self.using_multiple_controlnets
|
| 262 |
+
|
| 263 |
# Extract face embeddings if InstantID is enabled
|
| 264 |
face_embeddings = None
|
| 265 |
+
has_detected_faces = False
|
|
|
|
| 266 |
|
| 267 |
+
if using_multiple_controlnets:
|
| 268 |
print("Extracting face embeddings...")
|
| 269 |
img_array = np.array(resized_image)
|
| 270 |
faces = self.face_app.get(img_array) if self.face_app is not None else []
|
| 271 |
|
| 272 |
if len(faces) > 0:
|
| 273 |
+
has_detected_faces = True
|
| 274 |
print(f"Detected {len(faces)} face(s), using for identity preservation")
|
| 275 |
# Get the largest face
|
| 276 |
face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
|
| 277 |
face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
# Enhance prompt for face preservation
|
| 280 |
prompt = f"portrait, detailed face, facial features, {prompt}"
|
| 281 |
|
|
|
|
| 297 |
"generator": torch.Generator(device=self.device).manual_seed(42)
|
| 298 |
}
|
| 299 |
|
| 300 |
+
# Add control images and scales based on ControlNet configuration
|
| 301 |
+
if using_multiple_controlnets and has_detected_faces:
|
| 302 |
+
# Multiple ControlNets: depth + InstantID
|
| 303 |
+
print("Using multiple ControlNets (Depth + InstantID)")
|
| 304 |
+
control_images = [depth_image, resized_image]
|
| 305 |
+
conditioning_scales = [controlnet_conditioning_scale, image_scale]
|
| 306 |
+
|
| 307 |
pipe_kwargs["image"] = control_images
|
| 308 |
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
| 309 |
+
|
| 310 |
+
# Add face embeddings for InstantID IP-Adapter
|
| 311 |
+
if face_embeddings is not None:
|
| 312 |
+
pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
|
| 313 |
+
|
| 314 |
+
elif using_multiple_controlnets and not has_detected_faces:
|
| 315 |
+
# Multiple ControlNets initialized but no faces detected
|
| 316 |
+
# Pass images for both controlnets but with zero weight for InstantID
|
| 317 |
+
print("Multiple ControlNets available but no faces detected, using depth only")
|
| 318 |
+
control_images = [depth_image, depth_image] # Use depth for both
|
| 319 |
+
conditioning_scales = [controlnet_conditioning_scale, 0.0] # Zero weight for InstantID
|
| 320 |
+
|
| 321 |
+
pipe_kwargs["image"] = control_images
|
| 322 |
+
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
| 323 |
+
|
| 324 |
else:
|
| 325 |
# Single ControlNet (depth only)
|
| 326 |
+
print("Using single ControlNet (Depth only)")
|
| 327 |
pipe_kwargs["image"] = depth_image
|
| 328 |
pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
|
| 329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
# Generate image
|
| 331 |
print("Generating retro art...")
|
| 332 |
result = self.pipe(**pipe_kwargs)
|