pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 27

Commit

914c99d

verified ·

1 Parent(s): b9f6674

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -55

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255),
     limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
     kps = np.array(kps)
-    w, h = int(image_pil.size[0]), int(image_pil.size[1])
     out_img = np.zeros([h, w, 3])
     for i in range(len(limbSeq)):
@@ -270,13 +270,12 @@ class RetroArtConverter:
                 best_diff = diff
                 best_match = (width, height)
-        # Ensure dimensions are multiples of 8
         width, height = best_match
-        width = (width // 8) * 8
-        height = (height // 8) * 8
-        # Convert to Python int to avoid numpy int64 issues
-        return int(width), int(height)
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
@@ -284,16 +283,6 @@ class RetroArtConverter:
             return f"{TRIGGER_WORD}, {prompt}"
         return prompt
-    def ensure_pil_image_int_size(self, img):
-        """Ensure PIL image has proper Python int dimensions by recreating if needed"""
-        if img is None:
-            return img
-        # Get size and ensure it's Python ints
-        w, h = int(img.size[0]), int(img.size[1])
-        # If size is already correct type, return as is
-        # Otherwise, create a new image to ensure clean int sizes
-        return img
     def generate_retro_art(
         self,
         input_image,
@@ -313,8 +302,6 @@ class RetroArtConverter:
         # Calculate optimal size
         original_width, original_height = input_image.size
-        original_width = int(original_width)
-        original_height = int(original_height)
         target_width, target_height = self.calculate_optimal_size(original_width, original_height)
         print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
@@ -322,17 +309,13 @@ class RetroArtConverter:
         print(f"Img2Img Strength: {strength}")
         # Resize with high quality - ensure dimensions are Python ints
-        target_size = (int(target_width), int(target_height))
-        resized_image = input_image.resize(target_size, Image.LANCZOS)
         # Generate depth map using Zoe
         print("Generating Zoe depth map...")
         depth_image = self.get_depth_map(resized_image)
-        # Ensure size is tuple of Python ints
-        depth_size = (int(depth_image.size[0]), int(depth_image.size[1]))
-        target_size = (int(target_width), int(target_height))
-        if depth_size != target_size:
-            depth_image = depth_image.resize(target_size, Image.LANCZOS)
         # Handle face detection for InstantID
         using_multiple_controlnets = self.using_multiple_controlnets
@@ -370,18 +353,13 @@ class RetroArtConverter:
                 print(f"Could not set LORA scale: {e}")
         # Prepare generation kwargs
-        # Ensure main image has proper Python int dimensions
-        main_img_w, main_img_h = int(resized_image.size[0]), int(resized_image.size[1])
-        if not isinstance(resized_image.size[0], int) or not isinstance(resized_image.size[1], int):
-            resized_image = resized_image.resize((main_img_w, main_img_h), Image.LANCZOS)
         pipe_kwargs = {
             "prompt": prompt,
             "negative_prompt": negative_prompt,
             "image": resized_image,  # img2img source
-            "strength": float(strength),  # how much to transform
-            "num_inference_steps": int(num_inference_steps),
-            "guidance_scale": float(guidance_scale),
             "generator": torch.Generator(device=self.device).manual_seed(42)
         }
@@ -393,17 +371,8 @@ class RetroArtConverter:
         if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
             print("Using InstantID (keypoints) + Depth ControlNets")
             # Order: [InstantID, Depth]
-            # Ensure images are proper PIL Images with int dimensions
             control_images = [face_kps_image, depth_image]
-            # Verify and fix image sizes to ensure they're Python ints
-            for i, img in enumerate(control_images):
-                if hasattr(img, 'size'):
-                    w, h = int(img.size[0]), int(img.size[1])
-                    # Recreate image if needed to ensure clean size attributes
-                    if not isinstance(img.size[0], int) or not isinstance(img.size[1], int):
-                        control_images[i] = img.resize((w, h), Image.LANCZOS)
-            conditioning_scales = [float(identity_preservation), float(controlnet_conditioning_scale)]
             pipe_kwargs["control_image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
@@ -411,26 +380,16 @@ class RetroArtConverter:
         elif using_multiple_controlnets and not has_detected_faces:
             print("Multiple ControlNets available but no faces detected, using depth only")
             # Use depth for both to avoid errors
-            # Ensure depth_image has proper int dimensions
-            w, h = int(depth_image.size[0]), int(depth_image.size[1])
-            if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
-                depth_image = depth_image.resize((w, h), Image.LANCZOS)
             control_images = [depth_image, depth_image]
-            conditioning_scales = [0.0, float(controlnet_conditioning_scale)]
             pipe_kwargs["control_image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
         else:
             print("Using Depth ControlNet only")
-            # Ensure depth_image has proper int dimensions
-            w, h = int(depth_image.size[0]), int(depth_image.size[1])
-            if not isinstance(depth_image.size[0], int) or not isinstance(depth_image.size[1], int):
-                depth_image = depth_image.resize((w, h), Image.LANCZOS)
             pipe_kwargs["control_image"] = depth_image
-            pipe_kwargs["controlnet_conditioning_scale"] = float(controlnet_conditioning_scale)
         # Generate
         scheduler_name = "LCM" if self.use_lcm else "DPM++"

     limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
     kps = np.array(kps)
+    w, h = image_pil.size
     out_img = np.zeros([h, w, 3])
     for i in range(len(limbSeq)):
                 best_diff = diff
                 best_match = (width, height)
+        # Ensure dimensions are multiples of 8 and explicitly convert to Python int
         width, height = best_match
+        width = int((width // 8) * 8)
+        height = int((height // 8) * 8)
+        return width, height
     def add_trigger_word(self, prompt):
         """Add trigger word to prompt if not present"""
             return f"{TRIGGER_WORD}, {prompt}"
         return prompt
     def generate_retro_art(
         self,
         input_image,
         # Calculate optimal size
         original_width, original_height = input_image.size
         target_width, target_height = self.calculate_optimal_size(original_width, original_height)
         print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
         print(f"Img2Img Strength: {strength}")
         # Resize with high quality - ensure dimensions are Python ints
+        resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Generate depth map using Zoe
         print("Generating Zoe depth map...")
         depth_image = self.get_depth_map(resized_image)
+        if depth_image.size != (target_width, target_height):
+            depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
         # Handle face detection for InstantID
         using_multiple_controlnets = self.using_multiple_controlnets
                 print(f"Could not set LORA scale: {e}")
         # Prepare generation kwargs
         pipe_kwargs = {
             "prompt": prompt,
             "negative_prompt": negative_prompt,
             "image": resized_image,  # img2img source
+            "strength": strength,  # how much to transform
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": guidance_scale,
             "generator": torch.Generator(device=self.device).manual_seed(42)
         }
         if using_multiple_controlnets and has_detected_faces and face_kps_image is not None:
             print("Using InstantID (keypoints) + Depth ControlNets")
             # Order: [InstantID, Depth]
             control_images = [face_kps_image, depth_image]
+            conditioning_scales = [identity_preservation, controlnet_conditioning_scale]
             pipe_kwargs["control_image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
         elif using_multiple_controlnets and not has_detected_faces:
             print("Multiple ControlNets available but no faces detected, using depth only")
             # Use depth for both to avoid errors
             control_images = [depth_image, depth_image]
+            conditioning_scales = [0.0, controlnet_conditioning_scale]
             pipe_kwargs["control_image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
         else:
             print("Using Depth ControlNet only")
             pipe_kwargs["control_image"] = depth_image
+            pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
         # Generate
         scheduler_name = "LCM" if self.use_lcm else "DPM++"