primerz commited on
Commit
42ed573
·
verified ·
1 Parent(s): bddeb26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -19
app.py CHANGED
@@ -100,9 +100,12 @@ class RetroArtConverter:
100
  )
101
 
102
  # Determine which controlnets to use
103
- controlnets = [self.controlnet_depth]
104
  if self.instantid_enabled and self.controlnet_instantid is not None:
105
- controlnets.append(self.controlnet_instantid)
 
 
 
 
106
 
107
  # Load SDXL checkpoint from HuggingFace Hub
108
  print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
@@ -114,7 +117,7 @@ class RetroArtConverter:
114
  )
115
  self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
116
  model_path,
117
- controlnet=controlnets if len(controlnets) > 1 else controlnets[0],
118
  vae=self.vae,
119
  torch_dtype=self.dtype,
120
  use_safetensors=True
@@ -125,7 +128,7 @@ class RetroArtConverter:
125
  print("Using default SDXL")
126
  self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
127
  "stabilityai/stable-diffusion-xl-base-1.0",
128
- controlnet=controlnets if len(controlnets) > 1 else controlnets[0],
129
  vae=self.vae,
130
  torch_dtype=self.dtype,
131
  use_safetensors=True
@@ -160,6 +163,10 @@ class RetroArtConverter:
160
  except Exception as e:
161
  print(f"⚠️ xformers not available: {e}")
162
 
 
 
 
 
163
  print("Model initialization complete!")
164
 
165
  def get_depth_map(self, image):
@@ -250,27 +257,25 @@ class RetroArtConverter:
250
  depth_image = self.get_depth_map(resized_image)
251
  depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
252
 
 
 
 
253
  # Extract face embeddings if InstantID is enabled
254
  face_embeddings = None
255
- control_images = [depth_image]
256
- conditioning_scales = [controlnet_conditioning_scale]
257
 
258
- if self.instantid_enabled and self.controlnet_instantid is not None:
259
  print("Extracting face embeddings...")
260
  img_array = np.array(resized_image)
261
  faces = self.face_app.get(img_array) if self.face_app is not None else []
262
 
263
  if len(faces) > 0:
 
264
  print(f"Detected {len(faces)} face(s), using for identity preservation")
265
  # Get the largest face
266
  face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
267
  face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
268
 
269
- # Prepare face image for InstantID ControlNet
270
- face_control_image = resized_image.resize((target_width, target_height), Image.LANCZOS)
271
- control_images.append(face_control_image)
272
- conditioning_scales.append(image_scale)
273
-
274
  # Enhance prompt for face preservation
275
  prompt = f"portrait, detailed face, facial features, {prompt}"
276
 
@@ -292,20 +297,36 @@ class RetroArtConverter:
292
  "generator": torch.Generator(device=self.device).manual_seed(42)
293
  }
294
 
295
- # Add control images and scales
296
- if len(control_images) > 1:
297
- # Multiple ControlNets
 
 
 
 
298
  pipe_kwargs["image"] = control_images
299
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  else:
301
  # Single ControlNet (depth only)
 
302
  pipe_kwargs["image"] = depth_image
303
  pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
304
 
305
- # Add face embeddings if available (for InstantID IP-Adapter)
306
- if face_embeddings is not None:
307
- pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
308
-
309
  # Generate image
310
  print("Generating retro art...")
311
  result = self.pipe(**pipe_kwargs)
 
100
  )
101
 
102
  # Determine which controlnets to use
 
103
  if self.instantid_enabled and self.controlnet_instantid is not None:
104
+ controlnets = [self.controlnet_depth, self.controlnet_instantid]
105
+ print(f"Initializing with multiple ControlNets: Depth + InstantID")
106
+ else:
107
+ controlnets = self.controlnet_depth
108
+ print(f"Initializing with single ControlNet: Depth only")
109
 
110
  # Load SDXL checkpoint from HuggingFace Hub
111
  print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
 
117
  )
118
  self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
119
  model_path,
120
+ controlnet=controlnets,
121
  vae=self.vae,
122
  torch_dtype=self.dtype,
123
  use_safetensors=True
 
128
  print("Using default SDXL")
129
  self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
130
  "stabilityai/stable-diffusion-xl-base-1.0",
131
+ controlnet=controlnets,
132
  vae=self.vae,
133
  torch_dtype=self.dtype,
134
  use_safetensors=True
 
163
  except Exception as e:
164
  print(f"⚠️ xformers not available: {e}")
165
 
166
+ # Track whether we're using multiple ControlNets
167
+ self.using_multiple_controlnets = isinstance(controlnets, list)
168
+ print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
169
+
170
  print("Model initialization complete!")
171
 
172
  def get_depth_map(self, image):
 
257
  depth_image = self.get_depth_map(resized_image)
258
  depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
259
 
260
+ # Determine if we're using multiple ControlNets based on initialization
261
+ using_multiple_controlnets = self.using_multiple_controlnets
262
+
263
  # Extract face embeddings if InstantID is enabled
264
  face_embeddings = None
265
+ has_detected_faces = False
 
266
 
267
+ if using_multiple_controlnets:
268
  print("Extracting face embeddings...")
269
  img_array = np.array(resized_image)
270
  faces = self.face_app.get(img_array) if self.face_app is not None else []
271
 
272
  if len(faces) > 0:
273
+ has_detected_faces = True
274
  print(f"Detected {len(faces)} face(s), using for identity preservation")
275
  # Get the largest face
276
  face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
277
  face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
278
 
 
 
 
 
 
279
  # Enhance prompt for face preservation
280
  prompt = f"portrait, detailed face, facial features, {prompt}"
281
 
 
297
  "generator": torch.Generator(device=self.device).manual_seed(42)
298
  }
299
 
300
+ # Add control images and scales based on ControlNet configuration
301
+ if using_multiple_controlnets and has_detected_faces:
302
+ # Multiple ControlNets: depth + InstantID
303
+ print("Using multiple ControlNets (Depth + InstantID)")
304
+ control_images = [depth_image, resized_image]
305
+ conditioning_scales = [controlnet_conditioning_scale, image_scale]
306
+
307
  pipe_kwargs["image"] = control_images
308
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
309
+
310
+ # Add face embeddings for InstantID IP-Adapter
311
+ if face_embeddings is not None:
312
+ pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
313
+
314
+ elif using_multiple_controlnets and not has_detected_faces:
315
+ # Multiple ControlNets initialized but no faces detected
316
+ # Pass images for both controlnets but with zero weight for InstantID
317
+ print("Multiple ControlNets available but no faces detected, using depth only")
318
+ control_images = [depth_image, depth_image] # Use depth for both
319
+ conditioning_scales = [controlnet_conditioning_scale, 0.0] # Zero weight for InstantID
320
+
321
+ pipe_kwargs["image"] = control_images
322
+ pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
323
+
324
  else:
325
  # Single ControlNet (depth only)
326
+ print("Using single ControlNet (Depth only)")
327
  pipe_kwargs["image"] = depth_image
328
  pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
329
 
 
 
 
 
330
  # Generate image
331
  print("Generating retro art...")
332
  result = self.pipe(**pipe_kwargs)