pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 27

Commit

42ed573

verified ·

1 Parent(s): bddeb26

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -19

app.py CHANGED Viewed

@@ -100,9 +100,12 @@ class RetroArtConverter:
         )
         # Determine which controlnets to use
-        controlnets = [self.controlnet_depth]
         if self.instantid_enabled and self.controlnet_instantid is not None:
-            controlnets.append(self.controlnet_instantid)
         # Load SDXL checkpoint from HuggingFace Hub
         print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
@@ -114,7 +117,7 @@ class RetroArtConverter:
             )
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
-                controlnet=controlnets if len(controlnets) > 1 else controlnets[0],
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
@@ -125,7 +128,7 @@ class RetroArtConverter:
             print("Using default SDXL")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
-                controlnet=controlnets if len(controlnets) > 1 else controlnets[0],
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
@@ -160,6 +163,10 @@ class RetroArtConverter:
             except Exception as e:
                 print(f"⚠️ xformers not available: {e}")
         print("Model initialization complete!")
     def get_depth_map(self, image):
@@ -250,27 +257,25 @@ class RetroArtConverter:
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
         # Extract face embeddings if InstantID is enabled
         face_embeddings = None
-        control_images = [depth_image]
-        conditioning_scales = [controlnet_conditioning_scale]
-        if self.instantid_enabled and self.controlnet_instantid is not None:
             print("Extracting face embeddings...")
             img_array = np.array(resized_image)
             faces = self.face_app.get(img_array) if self.face_app is not None else []
             if len(faces) > 0:
                 print(f"Detected {len(faces)} face(s), using for identity preservation")
                 # Get the largest face
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
                 face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
-                # Prepare face image for InstantID ControlNet
-                face_control_image = resized_image.resize((target_width, target_height), Image.LANCZOS)
-                control_images.append(face_control_image)
-                conditioning_scales.append(image_scale)
                 # Enhance prompt for face preservation
                 prompt = f"portrait, detailed face, facial features, {prompt}"
@@ -292,20 +297,36 @@ class RetroArtConverter:
             "generator": torch.Generator(device=self.device).manual_seed(42)
         }
-        # Add control images and scales
-        if len(control_images) > 1:
-            # Multiple ControlNets
             pipe_kwargs["image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
         else:
             # Single ControlNet (depth only)
             pipe_kwargs["image"] = depth_image
             pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
-        # Add face embeddings if available (for InstantID IP-Adapter)
-        if face_embeddings is not None:
-            pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
         # Generate image
         print("Generating retro art...")
         result = self.pipe(**pipe_kwargs)

         )
         # Determine which controlnets to use
         if self.instantid_enabled and self.controlnet_instantid is not None:
+            controlnets = [self.controlnet_depth, self.controlnet_instantid]
+            print(f"Initializing with multiple ControlNets: Depth + InstantID")
+        else:
+            controlnets = self.controlnet_depth
+            print(f"Initializing with single ControlNet: Depth only")
         # Load SDXL checkpoint from HuggingFace Hub
         print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
             )
             self.pipe = StableDiffusionXLControlNetPipeline.from_single_file(
                 model_path,
+                controlnet=controlnets,
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             print("Using default SDXL")
             self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
                 "stabilityai/stable-diffusion-xl-base-1.0",
+                controlnet=controlnets,
                 vae=self.vae,
                 torch_dtype=self.dtype,
                 use_safetensors=True
             except Exception as e:
                 print(f"⚠️ xformers not available: {e}")
+        # Track whether we're using multiple ControlNets
+        self.using_multiple_controlnets = isinstance(controlnets, list)
+        print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
         print("Model initialization complete!")
     def get_depth_map(self, image):
         depth_image = self.get_depth_map(resized_image)
         depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
+        # Determine if we're using multiple ControlNets based on initialization
+        using_multiple_controlnets = self.using_multiple_controlnets
         # Extract face embeddings if InstantID is enabled
         face_embeddings = None
+        has_detected_faces = False
+        if using_multiple_controlnets:
             print("Extracting face embeddings...")
             img_array = np.array(resized_image)
             faces = self.face_app.get(img_array) if self.face_app is not None else []
             if len(faces) > 0:
+                has_detected_faces = True
                 print(f"Detected {len(faces)} face(s), using for identity preservation")
                 # Get the largest face
                 face = sorted(faces, key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
                 face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(self.device, dtype=self.dtype)
                 # Enhance prompt for face preservation
                 prompt = f"portrait, detailed face, facial features, {prompt}"
             "generator": torch.Generator(device=self.device).manual_seed(42)
         }
+        # Add control images and scales based on ControlNet configuration
+        if using_multiple_controlnets and has_detected_faces:
+            # Multiple ControlNets: depth + InstantID
+            print("Using multiple ControlNets (Depth + InstantID)")
+            control_images = [depth_image, resized_image]
+            conditioning_scales = [controlnet_conditioning_scale, image_scale]
             pipe_kwargs["image"] = control_images
             pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
+            # Add face embeddings for InstantID IP-Adapter
+            if face_embeddings is not None:
+                pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
+        elif using_multiple_controlnets and not has_detected_faces:
+            # Multiple ControlNets initialized but no faces detected
+            # Pass images for both controlnets but with zero weight for InstantID
+            print("Multiple ControlNets available but no faces detected, using depth only")
+            control_images = [depth_image, depth_image]  # Use depth for both
+            conditioning_scales = [controlnet_conditioning_scale, 0.0]  # Zero weight for InstantID
+            pipe_kwargs["image"] = control_images
+            pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
         else:
             # Single ControlNet (depth only)
+            print("Using single ControlNet (Depth only)")
             pipe_kwargs["image"] = depth_image
             pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
         # Generate image
         print("Generating retro art...")
         result = self.pipe(**pipe_kwargs)