Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -41,7 +41,7 @@ def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255),
|
|
| 41 |
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
| 42 |
kps = np.array(kps)
|
| 43 |
|
| 44 |
-
w, h =
|
| 45 |
out_img = np.zeros([h, w, 3])
|
| 46 |
|
| 47 |
for i in range(len(limbSeq)):
|
|
@@ -270,13 +270,12 @@ class RetroArtConverter:
|
|
| 270 |
best_diff = diff
|
| 271 |
best_match = (width, height)
|
| 272 |
|
| 273 |
-
# Ensure dimensions are multiples of 8
|
| 274 |
width, height = best_match
|
| 275 |
-
width = (width // 8) * 8
|
| 276 |
-
height = (height // 8) * 8
|
| 277 |
|
| 278 |
-
|
| 279 |
-
return int(width), int(height)
|
| 280 |
|
| 281 |
def add_trigger_word(self, prompt):
|
| 282 |
"""Add trigger word to prompt if not present"""
|
|
@@ -284,16 +283,6 @@ class RetroArtConverter:
|
|
| 284 |
return f"{TRIGGER_WORD}, {prompt}"
|
| 285 |
return prompt
|
| 286 |
|
| 287 |
-
def ensure_pil_image_int_size(self, img):
|
| 288 |
-
"""Ensure PIL image has proper Python int dimensions by recreating if needed"""
|
| 289 |
-
if img is None:
|
| 290 |
-
return img
|
| 291 |
-
# Get size and ensure it's Python ints
|
| 292 |
-
w, h = int(img.size[0]), int(img.size[1])
|
| 293 |
-
# If size is already correct type, return as is
|
| 294 |
-
# Otherwise, create a new image to ensure clean int sizes
|
| 295 |
-
return img
|
| 296 |
-
|
| 297 |
def generate_retro_art(
|
| 298 |
self,
|
| 299 |
input_image,
|
|
@@ -313,8 +302,6 @@ class RetroArtConverter:
|
|
| 313 |
|
| 314 |
# Calculate optimal size
|
| 315 |
original_width, original_height = input_image.size
|
| 316 |
-
original_width = int(original_width)
|
| 317 |
-
original_height = int(original_height)
|
| 318 |
target_width, target_height = self.calculate_optimal_size(original_width, original_height)
|
| 319 |
|
| 320 |
print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
|
|
@@ -322,17 +309,13 @@ class RetroArtConverter:
|
|
| 322 |
print(f"Img2Img Strength: {strength}")
|
| 323 |
|
| 324 |
# Resize with high quality - ensure dimensions are Python ints
|
| 325 |
-
|
| 326 |
-
resized_image = input_image.resize(target_size, Image.LANCZOS)
|
| 327 |
|
| 328 |
# Generate depth map using Zoe
|
| 329 |
print("Generating Zoe depth map...")
|
| 330 |
depth_image = self.get_depth_map(resized_image)
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
target_size = (int(target_width), int(target_height))
|
| 334 |
-
if depth_size != target_size:
|
| 335 |
-
depth_image = depth_image.resize(target_size, Image.LANCZOS)
|
| 336 |
|
| 337 |
# Handle face detection for InstantID
|
| 338 |
using_multiple_controlnets = self.using_multiple_controlnets
|
|
@@ -370,18 +353,13 @@ class RetroArtConverter:
|
|
| 370 |
print(f"Could not set LORA scale: {e}")
|
| 371 |
|
| 372 |
# Prepare generation kwargs
|
| 373 |
-
# Ensure main image has proper Python int dimensions
|
| 374 |
-
main_img_w, main_img_h = int(resized_image.size[0]), int(resized_image.size[1])
|
| 375 |
-
if not isinstance(resized_image.size[0], int) or not isinstance(resized_image.size[1], int):
|
| 376 |
-
resized_image = resized_image.resize((main_img_w, main_img_h), Image.LANCZOS)
|
| 377 |
-
|
| 378 |
pipe_kwargs = {
|
| 379 |
"prompt": prompt,
|
| 380 |
"negative_prompt": negative_prompt,
|
| 381 |
"image": resized_image, # img2img source
|
| 382 |
-
"strength":
|
| 383 |
-
"num_inference_steps":
|
| 384 |
-
"guidance_scale":
|
| 385 |
"generator": torch.Generator(device=self.device).manual_seed(42)
|
| 386 |
}
|
| 387 |
|
|
@@ -393,17 +371,8 @@ class RetroArtConverter:
|
|
| 393 |
if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
|
| 394 |
print("Using InstantID (keypoints) + Depth ControlNets")
|
| 395 |
# Order: [InstantID, Depth]
|
| 396 |
-
# Ensure images are proper PIL Images with int dimensions
|
| 397 |
control_images = [face_kps_image, depth_image]
|
| 398 |
-
|
| 399 |
-
for i, img in enumerate(control_images):
|
| 400 |
-
if hasattr(img, 'size'):
|
| 401 |
-
w, h = int(img.size[0]), int(img.size[1])
|
| 402 |
-
# Recreate image if needed to ensure clean size attributes
|
| 403 |
-
if not isinstance(img.size[0], int) or not isinstance(img.size[1], int):
|
| 404 |
-
control_images[i] = img.resize((w, h), Image.LANCZOS)
|
| 405 |
-
|
| 406 |
-
conditioning_scales = [float(identity_preservation), float(controlnet_conditioning_scale)]
|
| 407 |
|
| 408 |
pipe_kwargs["control_image"] = control_images
|
| 409 |
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
|
@@ -411,26 +380,16 @@ class RetroArtConverter:
|
|
| 411 |
elif using_multiple_controlnets and not has_detected_faces:
|
| 412 |
print("Multiple ControlNets available but no faces detected, using depth only")
|
| 413 |
# Use depth for both to avoid errors
|
| 414 |
-
# Ensure depth_image has proper int dimensions
|
| 415 |
-
w, h = int(depth_image.size[0]), int(depth_image.size[1])
|
| 416 |
-
if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
|
| 417 |
-
depth_image = depth_image.resize((w, h), Image.LANCZOS)
|
| 418 |
-
|
| 419 |
control_images = [depth_image, depth_image]
|
| 420 |
-
conditioning_scales = [0.0,
|
| 421 |
|
| 422 |
pipe_kwargs["control_image"] = control_images
|
| 423 |
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
| 424 |
|
| 425 |
else:
|
| 426 |
print("Using Depth ControlNet only")
|
| 427 |
-
# Ensure depth_image has proper int dimensions
|
| 428 |
-
w, h = int(depth_image.size[0]), int(depth_image.size[1])
|
| 429 |
-
if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
|
| 430 |
-
depth_image = depth_image.resize((w, h), Image.LANCZOS)
|
| 431 |
-
|
| 432 |
pipe_kwargs["control_image"] = depth_image
|
| 433 |
-
pipe_kwargs["controlnet_conditioning_scale"] =
|
| 434 |
|
| 435 |
# Generate
|
| 436 |
scheduler_name = "LCM" if self.use_lcm else "DPM++"
|
|
|
|
| 41 |
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
|
| 42 |
kps = np.array(kps)
|
| 43 |
|
| 44 |
+
w, h = image_pil.size
|
| 45 |
out_img = np.zeros([h, w, 3])
|
| 46 |
|
| 47 |
for i in range(len(limbSeq)):
|
|
|
|
| 270 |
best_diff = diff
|
| 271 |
best_match = (width, height)
|
| 272 |
|
| 273 |
+
# Ensure dimensions are multiples of 8 and explicitly convert to Python int
|
| 274 |
width, height = best_match
|
| 275 |
+
width = int((width // 8) * 8)
|
| 276 |
+
height = int((height // 8) * 8)
|
| 277 |
|
| 278 |
+
return width, height
|
|
|
|
| 279 |
|
| 280 |
def add_trigger_word(self, prompt):
|
| 281 |
"""Add trigger word to prompt if not present"""
|
|
|
|
| 283 |
return f"{TRIGGER_WORD}, {prompt}"
|
| 284 |
return prompt
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
def generate_retro_art(
|
| 287 |
self,
|
| 288 |
input_image,
|
|
|
|
| 302 |
|
| 303 |
# Calculate optimal size
|
| 304 |
original_width, original_height = input_image.size
|
|
|
|
|
|
|
| 305 |
target_width, target_height = self.calculate_optimal_size(original_width, original_height)
|
| 306 |
|
| 307 |
print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
|
|
|
|
| 309 |
print(f"Img2Img Strength: {strength}")
|
| 310 |
|
| 311 |
# Resize with high quality - ensure dimensions are Python ints
|
| 312 |
+
resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
|
|
|
| 313 |
|
| 314 |
# Generate depth map using Zoe
|
| 315 |
print("Generating Zoe depth map...")
|
| 316 |
depth_image = self.get_depth_map(resized_image)
|
| 317 |
+
if depth_image.size != (target_width, target_height):
|
| 318 |
+
depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
# Handle face detection for InstantID
|
| 321 |
using_multiple_controlnets = self.using_multiple_controlnets
|
|
|
|
| 353 |
print(f"Could not set LORA scale: {e}")
|
| 354 |
|
| 355 |
# Prepare generation kwargs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
pipe_kwargs = {
|
| 357 |
"prompt": prompt,
|
| 358 |
"negative_prompt": negative_prompt,
|
| 359 |
"image": resized_image, # img2img source
|
| 360 |
+
"strength": strength, # how much to transform
|
| 361 |
+
"num_inference_steps": num_inference_steps,
|
| 362 |
+
"guidance_scale": guidance_scale,
|
| 363 |
"generator": torch.Generator(device=self.device).manual_seed(42)
|
| 364 |
}
|
| 365 |
|
|
|
|
| 371 |
if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
|
| 372 |
print("Using InstantID (keypoints) + Depth ControlNets")
|
| 373 |
# Order: [InstantID, Depth]
|
|
|
|
| 374 |
control_images = [face_kps_image, depth_image]
|
| 375 |
+
conditioning_scales = [identity_preservation, controlnet_conditioning_scale]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
pipe_kwargs["control_image"] = control_images
|
| 378 |
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
|
|
|
| 380 |
elif using_multiple_controlnets and not has_detected_faces:
|
| 381 |
print("Multiple ControlNets available but no faces detected, using depth only")
|
| 382 |
# Use depth for both to avoid errors
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
control_images = [depth_image, depth_image]
|
| 384 |
+
conditioning_scales = [0.0, controlnet_conditioning_scale]
|
| 385 |
|
| 386 |
pipe_kwargs["control_image"] = control_images
|
| 387 |
pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
|
| 388 |
|
| 389 |
else:
|
| 390 |
print("Using Depth ControlNet only")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
pipe_kwargs["control_image"] = depth_image
|
| 392 |
+
pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
|
| 393 |
|
| 394 |
# Generate
|
| 395 |
scheduler_name = "LCM" if self.use_lcm else "DPM++"
|