primerz commited on
Commit
914c99d
·
verified ·
1 Parent(s): b9f6674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -55
app.py CHANGED
@@ -41,7 +41,7 @@ def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255),
41
  limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
42
  kps = np.array(kps)
43
 
44
- w, h = int(image_pil.size[0]), int(image_pil.size[1])
45
  out_img = np.zeros([h, w, 3])
46
 
47
  for i in range(len(limbSeq)):
@@ -270,13 +270,12 @@ class RetroArtConverter:
270
  best_diff = diff
271
  best_match = (width, height)
272
 
273
- # Ensure dimensions are multiples of 8
274
  width, height = best_match
275
- width = (width // 8) * 8
276
- height = (height // 8) * 8
277
 
278
- # Convert to Python int to avoid numpy int64 issues
279
- return int(width), int(height)
280
 
281
  def add_trigger_word(self, prompt):
282
  """Add trigger word to prompt if not present"""
@@ -284,16 +283,6 @@ class RetroArtConverter:
284
  return f"{TRIGGER_WORD}, {prompt}"
285
  return prompt
286
 
287
- def ensure_pil_image_int_size(self, img):
288
- """Ensure PIL image has proper Python int dimensions by recreating if needed"""
289
- if img is None:
290
- return img
291
- # Get size and ensure it's Python ints
292
- w, h = int(img.size[0]), int(img.size[1])
293
- # If size is already correct type, return as is
294
- # Otherwise, create a new image to ensure clean int sizes
295
- return img
296
-
297
  def generate_retro_art(
298
  self,
299
  input_image,
@@ -313,8 +302,6 @@ class RetroArtConverter:
313
 
314
  # Calculate optimal size
315
  original_width, original_height = input_image.size
316
- original_width = int(original_width)
317
- original_height = int(original_height)
318
  target_width, target_height = self.calculate_optimal_size(original_width, original_height)
319
 
320
  print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
@@ -322,17 +309,13 @@ class RetroArtConverter:
322
  print(f"Img2Img Strength: {strength}")
323
 
324
  # Resize with high quality - ensure dimensions are Python ints
325
- target_size = (int(target_width), int(target_height))
326
- resized_image = input_image.resize(target_size, Image.LANCZOS)
327
 
328
  # Generate depth map using Zoe
329
  print("Generating Zoe depth map...")
330
  depth_image = self.get_depth_map(resized_image)
331
- # Ensure size is tuple of Python ints
332
- depth_size = (int(depth_image.size[0]), int(depth_image.size[1]))
333
- target_size = (int(target_width), int(target_height))
334
- if depth_size != target_size:
335
- depth_image = depth_image.resize(target_size, Image.LANCZOS)
336
 
337
  # Handle face detection for InstantID
338
  using_multiple_controlnets = self.using_multiple_controlnets
@@ -370,18 +353,13 @@ class RetroArtConverter:
370
  print(f"Could not set LORA scale: {e}")
371
 
372
  # Prepare generation kwargs
373
- # Ensure main image has proper Python int dimensions
374
- main_img_w, main_img_h = int(resized_image.size[0]), int(resized_image.size[1])
375
- if not isinstance(resized_image.size[0], int) or not isinstance(resized_image.size[1], int):
376
- resized_image = resized_image.resize((main_img_w, main_img_h), Image.LANCZOS)
377
-
378
  pipe_kwargs = {
379
  "prompt": prompt,
380
  "negative_prompt": negative_prompt,
381
  "image": resized_image, # img2img source
382
- "strength": float(strength), # how much to transform
383
- "num_inference_steps": int(num_inference_steps),
384
- "guidance_scale": float(guidance_scale),
385
  "generator": torch.Generator(device=self.device).manual_seed(42)
386
  }
387
 
@@ -393,17 +371,8 @@ class RetroArtConverter:
393
  if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
394
  print("Using InstantID (keypoints) + Depth ControlNets")
395
  # Order: [InstantID, Depth]
396
- # Ensure images are proper PIL Images with int dimensions
397
  control_images = [face_kps_image, depth_image]
398
- # Verify and fix image sizes to ensure they're Python ints
399
- for i, img in enumerate(control_images):
400
- if hasattr(img, 'size'):
401
- w, h = int(img.size[0]), int(img.size[1])
402
- # Recreate image if needed to ensure clean size attributes
403
- if not isinstance(img.size[0], int) or not isinstance(img.size[1], int):
404
- control_images[i] = img.resize((w, h), Image.LANCZOS)
405
-
406
- conditioning_scales = [float(identity_preservation), float(controlnet_conditioning_scale)]
407
 
408
  pipe_kwargs["control_image"] = control_images
409
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
@@ -411,26 +380,16 @@ class RetroArtConverter:
411
  elif using_multiple_controlnets and not has_detected_faces:
412
  print("Multiple ControlNets available but no faces detected, using depth only")
413
  # Use depth for both to avoid errors
414
- # Ensure depth_image has proper int dimensions
415
- w, h = int(depth_image.size[0]), int(depth_image.size[1])
416
- if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
417
- depth_image = depth_image.resize((w, h), Image.LANCZOS)
418
-
419
  control_images = [depth_image, depth_image]
420
- conditioning_scales = [0.0, float(controlnet_conditioning_scale)]
421
 
422
  pipe_kwargs["control_image"] = control_images
423
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
424
 
425
  else:
426
  print("Using Depth ControlNet only")
427
- # Ensure depth_image has proper int dimensions
428
- w, h = int(depth_image.size[0]), int(depth_image.size[1])
429
- if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
430
- depth_image = depth_image.resize((w, h), Image.LANCZOS)
431
-
432
  pipe_kwargs["control_image"] = depth_image
433
- pipe_kwargs["controlnet_conditioning_scale"] = float(controlnet_conditioning_scale)
434
 
435
  # Generate
436
  scheduler_name = "LCM" if self.use_lcm else "DPM++"
 
41
  limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
42
  kps = np.array(kps)
43
 
44
+ w, h = image_pil.size
45
  out_img = np.zeros([h, w, 3])
46
 
47
  for i in range(len(limbSeq)):
 
270
  best_diff = diff
271
  best_match = (width, height)
272
 
273
+ # Ensure dimensions are multiples of 8 and explicitly convert to Python int
274
  width, height = best_match
275
+ width = int((width // 8) * 8)
276
+ height = int((height // 8) * 8)
277
 
278
+ return width, height
 
279
 
280
  def add_trigger_word(self, prompt):
281
  """Add trigger word to prompt if not present"""
 
283
  return f"{TRIGGER_WORD}, {prompt}"
284
  return prompt
285
 
 
 
 
 
 
 
 
 
 
 
286
  def generate_retro_art(
287
  self,
288
  input_image,
 
302
 
303
  # Calculate optimal size
304
  original_width, original_height = input_image.size
 
 
305
  target_width, target_height = self.calculate_optimal_size(original_width, original_height)
306
 
307
  print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
 
309
  print(f"Img2Img Strength: {strength}")
310
 
311
  # Resize with high quality - ensure dimensions are Python ints
312
+ resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
 
313
 
314
  # Generate depth map using Zoe
315
  print("Generating Zoe depth map...")
316
  depth_image = self.get_depth_map(resized_image)
317
+ if depth_image.size != (target_width, target_height):
318
+ depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
 
 
 
319
 
320
  # Handle face detection for InstantID
321
  using_multiple_controlnets = self.using_multiple_controlnets
 
353
  print(f"Could not set LORA scale: {e}")
354
 
355
  # Prepare generation kwargs
 
 
 
 
 
356
  pipe_kwargs = {
357
  "prompt": prompt,
358
  "negative_prompt": negative_prompt,
359
  "image": resized_image, # img2img source
360
+ "strength": strength, # how much to transform
361
+ "num_inference_steps": num_inference_steps,
362
+ "guidance_scale": guidance_scale,
363
  "generator": torch.Generator(device=self.device).manual_seed(42)
364
  }
365
 
 
371
  if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
372
  print("Using InstantID (keypoints) + Depth ControlNets")
373
  # Order: [InstantID, Depth]
 
374
  control_images = [face_kps_image, depth_image]
375
+ conditioning_scales = [identity_preservation, controlnet_conditioning_scale]
 
 
 
 
 
 
 
 
376
 
377
  pipe_kwargs["control_image"] = control_images
378
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
 
380
  elif using_multiple_controlnets and not has_detected_faces:
381
  print("Multiple ControlNets available but no faces detected, using depth only")
382
  # Use depth for both to avoid errors
 
 
 
 
 
383
  control_images = [depth_image, depth_image]
384
+ conditioning_scales = [0.0, controlnet_conditioning_scale]
385
 
386
  pipe_kwargs["control_image"] = control_images
387
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
388
 
389
  else:
390
  print("Using Depth ControlNet only")
 
 
 
 
 
391
  pipe_kwargs["control_image"] = depth_image
392
+ pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
393
 
394
  # Generate
395
  scheduler_name = "LCM" if self.use_lcm else "DPM++"