primerz commited on
Commit
b9f6674
·
verified ·
1 Parent(s): cc3b669

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -11
app.py CHANGED
@@ -41,7 +41,7 @@ def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255),
41
  limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
42
  kps = np.array(kps)
43
 
44
- w, h = image_pil.size
45
  out_img = np.zeros([h, w, 3])
46
 
47
  for i in range(len(limbSeq)):
@@ -284,6 +284,16 @@ class RetroArtConverter:
284
  return f"{TRIGGER_WORD}, {prompt}"
285
  return prompt
286
 
 
 
 
 
 
 
 
 
 
 
287
  def generate_retro_art(
288
  self,
289
  input_image,
@@ -311,14 +321,18 @@ class RetroArtConverter:
311
  print(f"Prompt: {prompt}")
312
  print(f"Img2Img Strength: {strength}")
313
 
314
- # Resize with high quality
315
- resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
 
316
 
317
  # Generate depth map using Zoe
318
  print("Generating Zoe depth map...")
319
  depth_image = self.get_depth_map(resized_image)
320
- if depth_image.size != (target_width, target_height):
321
- depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
 
 
 
322
 
323
  # Handle face detection for InstantID
324
  using_multiple_controlnets = self.using_multiple_controlnets
@@ -356,13 +370,18 @@ class RetroArtConverter:
356
  print(f"Could not set LORA scale: {e}")
357
 
358
  # Prepare generation kwargs
 
 
 
 
 
359
  pipe_kwargs = {
360
  "prompt": prompt,
361
  "negative_prompt": negative_prompt,
362
  "image": resized_image, # img2img source
363
- "strength": strength, # how much to transform
364
- "num_inference_steps": num_inference_steps,
365
- "guidance_scale": guidance_scale,
366
  "generator": torch.Generator(device=self.device).manual_seed(42)
367
  }
368
 
@@ -374,8 +393,17 @@ class RetroArtConverter:
374
  if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
375
  print("Using InstantID (keypoints) + Depth ControlNets")
376
  # Order: [InstantID, Depth]
 
377
  control_images = [face_kps_image, depth_image]
378
- conditioning_scales = [identity_preservation, controlnet_conditioning_scale]
 
 
 
 
 
 
 
 
379
 
380
  pipe_kwargs["control_image"] = control_images
381
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
@@ -383,16 +411,26 @@ class RetroArtConverter:
383
  elif using_multiple_controlnets and not has_detected_faces:
384
  print("Multiple ControlNets available but no faces detected, using depth only")
385
  # Use depth for both to avoid errors
 
 
 
 
 
386
  control_images = [depth_image, depth_image]
387
- conditioning_scales = [0.0, controlnet_conditioning_scale]
388
 
389
  pipe_kwargs["control_image"] = control_images
390
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
391
 
392
  else:
393
  print("Using Depth ControlNet only")
 
 
 
 
 
394
  pipe_kwargs["control_image"] = depth_image
395
- pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
396
 
397
  # Generate
398
  scheduler_name = "LCM" if self.use_lcm else "DPM++"
 
41
  limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
42
  kps = np.array(kps)
43
 
44
+ w, h = int(image_pil.size[0]), int(image_pil.size[1])
45
  out_img = np.zeros([h, w, 3])
46
 
47
  for i in range(len(limbSeq)):
 
284
  return f"{TRIGGER_WORD}, {prompt}"
285
  return prompt
286
 
287
+ def ensure_pil_image_int_size(self, img):
288
+ """Ensure PIL image has proper Python int dimensions by recreating if needed"""
289
+ if img is None:
290
+ return img
291
+ # Get size and ensure it's Python ints
292
+ w, h = int(img.size[0]), int(img.size[1])
293
+ # If size is already correct type, return as is
294
+ # Otherwise, create a new image to ensure clean int sizes
295
+ return img
296
+
297
  def generate_retro_art(
298
  self,
299
  input_image,
 
321
  print(f"Prompt: {prompt}")
322
  print(f"Img2Img Strength: {strength}")
323
 
324
+ # Resize with high quality - ensure dimensions are Python ints
325
+ target_size = (int(target_width), int(target_height))
326
+ resized_image = input_image.resize(target_size, Image.LANCZOS)
327
 
328
  # Generate depth map using Zoe
329
  print("Generating Zoe depth map...")
330
  depth_image = self.get_depth_map(resized_image)
331
+ # Ensure size is tuple of Python ints
332
+ depth_size = (int(depth_image.size[0]), int(depth_image.size[1]))
333
+ target_size = (int(target_width), int(target_height))
334
+ if depth_size != target_size:
335
+ depth_image = depth_image.resize(target_size, Image.LANCZOS)
336
 
337
  # Handle face detection for InstantID
338
  using_multiple_controlnets = self.using_multiple_controlnets
 
370
  print(f"Could not set LORA scale: {e}")
371
 
372
  # Prepare generation kwargs
373
+ # Ensure main image has proper Python int dimensions
374
+ main_img_w, main_img_h = int(resized_image.size[0]), int(resized_image.size[1])
375
+ if not isinstance(resized_image.size[0], int) or not isinstance(resized_image.size[1], int):
376
+ resized_image = resized_image.resize((main_img_w, main_img_h), Image.LANCZOS)
377
+
378
  pipe_kwargs = {
379
  "prompt": prompt,
380
  "negative_prompt": negative_prompt,
381
  "image": resized_image, # img2img source
382
+ "strength": float(strength), # how much to transform
383
+ "num_inference_steps": int(num_inference_steps),
384
+ "guidance_scale": float(guidance_scale),
385
  "generator": torch.Generator(device=self.device).manual_seed(42)
386
  }
387
 
 
393
  if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
394
  print("Using InstantID (keypoints) + Depth ControlNets")
395
  # Order: [InstantID, Depth]
396
+ # Ensure images are proper PIL Images with int dimensions
397
  control_images = [face_kps_image, depth_image]
398
+ # Verify and fix image sizes to ensure they're Python ints
399
+ for i, img in enumerate(control_images):
400
+ if hasattr(img, 'size'):
401
+ w, h = int(img.size[0]), int(img.size[1])
402
+ # Recreate image if needed to ensure clean size attributes
403
+ if not isinstance(img.size[0], int) or not isinstance(img.size[1], int):
404
+ control_images[i] = img.resize((w, h), Image.LANCZOS)
405
+
406
+ conditioning_scales = [float(identity_preservation), float(controlnet_conditioning_scale)]
407
 
408
  pipe_kwargs["control_image"] = control_images
409
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
 
411
  elif using_multiple_controlnets and not has_detected_faces:
412
  print("Multiple ControlNets available but no faces detected, using depth only")
413
  # Use depth for both to avoid errors
414
+ # Ensure depth_image has proper int dimensions
415
+ w, h = int(depth_image.size[0]), int(depth_image.size[1])
416
+ if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
417
+ depth_image = depth_image.resize((w, h), Image.LANCZOS)
418
+
419
  control_images = [depth_image, depth_image]
420
+ conditioning_scales = [0.0, float(controlnet_conditioning_scale)]
421
 
422
  pipe_kwargs["control_image"] = control_images
423
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
424
 
425
  else:
426
  print("Using Depth ControlNet only")
427
+ # Ensure depth_image has proper int dimensions
428
+ w, h = int(depth_image.size[0]), int(depth_image.size[1])
429
+ if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
430
+ depth_image = depth_image.resize((w, h), Image.LANCZOS)
431
+
432
  pipe_kwargs["control_image"] = depth_image
433
+ pipe_kwargs["controlnet_conditioning_scale"] = float(controlnet_conditioning_scale)
434
 
435
  # Generate
436
  scheduler_name = "LCM" if self.use_lcm else "DPM++"