pixagram-dev

Runtime error

App Files Files Community

primerz commited on Nov 1

Commit

aa69808

verified ·

1 Parent(s): 22858c3

Update generator.py

Browse files

Files changed (1) hide show

generator.py +71 -109

generator.py CHANGED Viewed

@@ -1,5 +1,9 @@
 """
 Generation logic for Pixagram AI Pixel Art Generator
 """
 import gc
 import torch
@@ -19,10 +23,10 @@ from utils import (
     draw_kps, get_demographic_description, calculate_optimal_size, enhance_face_crop
 )
 from models import (
-    load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
     load_sdxl_pipeline, load_loras, setup_ip_adapter,
-    # --- START FIX: Import setup_cappella ---
-    setup_cappella,
     # --- END FIX ---
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
     load_openpose_detector, load_mediapipe_face_detector
@@ -71,11 +75,9 @@ class RetroArtConverter:
         self.instantid_enabled = instantid_success
         self.models_loaded['instantid'] = instantid_success
-        # Load image encoder
-        if self.instantid_enabled:
-            self.image_encoder = load_image_encoder()
-        else:
-            self.image_encoder = None
         # --- FIX START: Robust ControlNet Loading ---
         # Determine which controlnets to use
@@ -122,16 +124,18 @@ class RetroArtConverter:
         self.models_loaded['lora'] = lora_success
         # Setup IP-Adapter
-        if self.instantid_active and self.image_encoder is not None: # <-- Check instantid_active
-            self.image_proj_model, ip_adapter_success = setup_ip_adapter(self.pipe, self.image_encoder)
             self.models_loaded['ip_adapter'] = ip_adapter_success
         else:
-            print("[INFO] Face preservation: IP-Adapter disabled (InstantID model failed or encoder failed)")
             self.models_loaded['ip_adapter'] = False
             self.image_proj_model = None
-        # --- START FIX: Setup Cappella ---
-        self.cappella, self.use_cappella = setup_cappella(self.pipe)
         # --- END FIX ---
         # Setup LCM scheduler
@@ -182,24 +186,21 @@ class RetroArtConverter:
         print("=== UPGRADE VERIFICATION ===")
         try:
-            # --- FIX: Corrected import paths and class names ---
-            from resampler import Resampler
-            from attention_processor import IPAttnProcessor2_0
-            resampler_check = isinstance(self.image_proj_model, Resampler) if hasattr(self, 'image_proj_model') and self.image_proj_model is not None else False
-            custom_attn_check = any(isinstance(p, IPAttnProcessor2_0) for p in self.pipe.unet.attn_processors.values()) if hasattr(self, 'pipe') else False
-            # --- END FIX ---
-            print(f"Enhanced Perceiver Resampler: {'[OK] ACTIVE' if resampler_check else '[INFO] Not active'}")
-            print(f"Enhanced IP-Adapter Attention: {'[OK] ACTIVE' if custom_attn_check else '[INFO] Not active'}")
-            if resampler_check and custom_attn_check:
-                print("[SUCCESS] Face preservation upgrade fully active")
-                print("  Expected improvement: +10-15% face similarity")
-            elif resampler_check or custom_attn_check:
-                print("[PARTIAL] Some upgrades active")
-            else:
-                print("[INFO] Using standard components")
         except Exception as e:
             print(f"[INFO] Verification skipped: {e}")
         print("============================\n")
@@ -641,33 +642,16 @@ class RetroArtConverter:
                             guidance_scale = adaptive_params['guidance_scale']
                             lora_scale = adaptive_params['lora_scale']
-                        # Extract face embeddings
-                        face_embeddings_base = face.normed_embedding
                         # Extract face crop
                         bbox = face.bbox.astype(int)
                         x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
                         face_bbox_original = [x1, y1, x2, y2]
-                        # Add padding
-                        face_width = x2 - x1
-                        face_height = y2 - y1
-                        padding_x = int(face_width * 0.3)
-                        padding_y = int(face_height * 0.3)
-                        x1 = max(0, x1 - padding_x)
-                        y1 = max(0, y1 - padding_y)
-                        x2 = min(resized_image.width, x2 + padding_x)
-                        y2 = min(resized_image.height, y2 + padding_y)
-                        # Crop face region
-                        face_crop = resized_image.crop((x1, y1, x2, y2))
-                        # MULTI-SCALE PROCESSING
-                        face_embeddings = self.extract_multi_scale_face(face_crop, face)
-                        # Enhance face crop
-                        face_crop_enhanced = enhance_face_crop(face_crop)
                         # Draw keypoints
                         face_kps = face.kps
                         face_kps_image = draw_kps(resized_image, face_kps)
@@ -677,7 +661,7 @@ class RetroArtConverter:
                         facial_attrs = get_facial_attributes(face)
                         # Update prompt with detected attributes
-                        prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD[lora_choice])
                         # Legacy output for compatibility
                         age = facial_attrs['age']
@@ -686,7 +670,7 @@ class RetroArtConverter:
                         gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
                         print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
-                        print(f"Face crop size: {face_crop.size}, enhanced: {face_crop_enhanced.size if face_crop_enhanced else 'N/A'}")
                     else:
                         print("✗ InsightFace found no faces")
@@ -745,15 +729,20 @@ class RetroArtConverter:
             if adapter_name != "none" and self.loaded_loras.get(adapter_name, False):
                 try:
                     self.pipe.set_adapters([adapter_name], adapter_weights=[lora_scale])
-                    print(f"LORA: Set adapter '{adapter_name}' with scale: {lora_scale}")
                 except Exception as e:
-                    print(f"Could not set LORA adapter '{adapter_name}': {e}")
                     self.pipe.set_adapters([]) # Disable LORAs if setting failed
             else:
                 if adapter_name == "none":
                     print("LORAs disabled by user choice.")
                 else:
                     print(f"LORA '{adapter_name}' not loaded or available, disabling LORAs.")
                 self.pipe.set_adapters([]) # Disable all LORAs
@@ -777,28 +766,33 @@ class RetroArtConverter:
         pipe_kwargs["generator"] = generator
-        # --- START FIX: Use our new Cappella module ---
-        if self.use_cappella and self.cappella is not None:
             try:
-                print("Encoding prompts with Cappella...")
-                # Call Cappella once. It handles truncation and padding.
-                conditioning = self.cappella(prompt, negative_prompt)
-                # Unpack the results
-                pipe_kwargs["prompt_embeds"] = conditioning.embeds
-                pipe_kwargs["pooled_prompt_embeds"] = conditioning.pooled_embeds
-                pipe_kwargs["negative_prompt_embeds"] = conditioning.negative_embeds
-                pipe_kwargs["negative_pooled_prompt_embeds"] = conditioning.negative_pooled_embeds
-                print(f"[OK] Cappella encoded - Prompt: {pipe_kwargs['prompt_embeds'].shape}, Negative: {pipe_kwargs['negative_prompt_embeds'].shape}")
             except Exception as e:
-                print(f"Cappella encoding failed, using standard prompts: {e}")
                 traceback.print_exc()
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
-            print("[WARNING] Cappella not found, using standard prompt encoding.")
             pipe_kwargs["prompt"] = prompt
             pipe_kwargs["negative_prompt"] = negative_prompt
         # --- END FIX ---
@@ -831,53 +825,21 @@ class RetroArtConverter:
                 conditioning_scales.append(identity_control_scale)
                 scale_debug_str.append(f"Identity: {identity_control_scale:.2f}")
-                # Add face embeddings for IP-Adapter if available
-                if face_embeddings is not None and self.models_loaded.get('ip_adapter', False) and face_crop_enhanced is not None:
-                    print(f"Processing InstantID face embeddings with Resampler...")
-                    with torch.no_grad():
-                        face_emb_tensor = torch.from_numpy(face_embeddings).to(device=self.device, dtype=self.dtype)
-                        face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
-                        face_proj_embeds = self.image_proj_model(face_emb_tensor)
-                        boosted_scale = identity_preservation * IDENTITY_BOOST_MULTIPLIER
-                        face_proj_embeds = face_proj_embeds * boosted_scale
-                        print(f"  - Face embedding: {face_proj_embeds.shape}, Scale: {boosted_scale:.2f}")
-                        # --- START FIX: Your padding solution ---
-                        # This fixes the "109 vs 77" error
-                        if 'prompt_embeds' in pipe_kwargs:
-                            original_embeds = pipe_kwargs['prompt_embeds']
-                            # Concatenate face embeddings to POSITIVE prompt
-                            combined_embeds = torch.cat([original_embeds, face_proj_embeds], dim=1)
-                            pipe_kwargs['prompt_embeds'] = combined_embeds
-                            # CRITICAL: Pad negative_prompt_embeds by the same amount
-                            if 'negative_prompt_embeds' in pipe_kwargs:
-                                negative_embeds = pipe_kwargs['negative_prompt_embeds']
-                                # Create zero padding [1, 16, 2048]
-                                neg_padding = torch.zeros(
-                                    (
-                                        negative_embeds.shape[0],      # 1
-                                        face_proj_embeds.shape[1], # 16
-                                        negative_embeds.shape[2],      # 2048
-                                    ),
-                                    device=negative_embeds.device,
-                                    dtype=negative_embeds.dtype
-                                )
-                                # Concatenate zero padding to NEGATIVE prompt
-                                pipe_kwargs['negative_prompt_embeds'] = torch.cat([negative_embeds, neg_padding], dim=1)
-                                print(f"  [OK] Negative prompt padded to match: {pipe_kwargs['negative_prompt_embeds'].shape}")
-                            print(f"  [OK] Face embeddings concatenated successfully! Prompt: {combined_embeds.shape}")
-                        else:
-                            print(f"  [WARNING] Can't concatenate - no prompt_embeds (use Cappella)")
-                        # --- END FIX 2 ---
                 elif has_detected_faces:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
             else:
                 # No face detected - blank map needed to maintain ControlNet list order

 """
 Generation logic for Pixagram AI Pixel Art Generator
+--- UPGRADED VERSION ---
+- Uses StableDiffusionXLInstantIDImg2ImgPipeline for native InstantID support.
+- Replaces broken 'cappella' encoder with 'Compel' for robust prompt chunking.
+- Fixes LoRA style conflicts by using the correct pipeline architecture.
 """
 import gc
 import torch
     draw_kps, get_demographic_description, calculate_optimal_size, enhance_face_crop
 )
 from models import (
+    load_face_analysis, load_depth_detector, load_controlnets,
     load_sdxl_pipeline, load_loras, setup_ip_adapter,
+    # --- START FIX: Import setup_compel ---
+    setup_compel,
     # --- END FIX ---
     setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip,
     load_openpose_detector, load_mediapipe_face_detector
         self.instantid_enabled = instantid_success
         self.models_loaded['instantid'] = instantid_success
+        # --- FIX: Image encoder is loaded by pipeline ---
+        self.image_encoder = None
+        # --- END FIX ---
         # --- FIX START: Robust ControlNet Loading ---
         # Determine which controlnets to use
         self.models_loaded['lora'] = lora_success
         # Setup IP-Adapter
+        if self.instantid_active:
+            # The new setup_ip_adapter loads it *into* the pipe.
+            _ , ip_adapter_success = setup_ip_adapter(self.pipe)
             self.models_loaded['ip_adapter'] = ip_adapter_success
+            self.image_proj_model = None # No longer managed here
         else:
+            print("[INFO] Face preservation: IP-Adapter disabled (InstantID model failed)")
             self.models_loaded['ip_adapter'] = False
             self.image_proj_model = None
+        # --- START FIX: Setup Compel ---
+        self.compel, self.use_compel = setup_compel(self.pipe)
         # --- END FIX ---
         # Setup LCM scheduler
         print("=== UPGRADE VERIFICATION ===")
         try:
+            # --- FIX: Check if the correct pipeline is loaded ---
+            correct_pipeline = "StableDiffusionXLInstantIDImg2ImgPipeline"
+            pipeline_class_name = self.pipe.__class__.__name__
+            pipeline_check = correct_pipeline in pipeline_class_name
+            print(f"Pipeline Type: {pipeline_class_name}")
+            if pipeline_check:
+                print("[SUCCESS] Correct InstantID pipeline is active.")
+            else:
+                print(f"[WARNING] Incorrect pipeline active. Expected {correct_pipeline}")
+            compel_check = hasattr(self, 'compel') and self.compel is not None
+            print(f"Prompt Encoder: {'[OK] Compel' if compel_check else '[WARNING] Compel not loaded'}")
+            # --- END FIX ---
         except Exception as e:
             print(f"[INFO] Verification skipped: {e}")
         print("============================\n")
                             guidance_scale = adaptive_params['guidance_scale']
                             lora_scale = adaptive_params['lora_scale']
+                        # --- FIX: Use raw embedding as required by InstantID pipeline ---
+                        face_embeddings = face.normed_embedding
+                        face_crop_enhanced = None # Not needed by this pipeline
+                        # --- END FIX ---
                         # Extract face crop
                         bbox = face.bbox.astype(int)
                         x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
                         face_bbox_original = [x1, y1, x2, y2]
                         # Draw keypoints
                         face_kps = face.kps
                         face_kps_image = draw_kps(resized_image, face_kps)
                         facial_attrs = get_facial_attributes(face)
                         # Update prompt with detected attributes
+                        prompt = build_enhanced_prompt(prompt, facial_attrs, TRIGGER_WORD.get(lora_choice, ""))
                         # Legacy output for compatibility
                         age = facial_attrs['age']
                         gender_str = 'M' if gender_code == 1 else ('F' if gender_code == 0 else 'N/A')
                         print(f"Face info: bbox={face.bbox}, age={age if age else 'N/A'}, gender={gender_str}")
+                        print(f"Face crop size: N/A, enhanced: N/A")
                     else:
                         print("✗ InsightFace found no faces")
             if adapter_name != "none" and self.loaded_loras.get(adapter_name, False):
                 try:
                     self.pipe.set_adapters([adapter_name], adapter_weights=[lora_scale])
+                    # --- FIX: Fuse LoRA weights for correct interaction with IP-Adapter ---
+                    self.pipe.fuse_lora(lora_scale=lora_scale, adapter_names=[adapter_name])
+                    print(f"LORA: Fused adapter '{adapter_name}' with scale: {lora_scale}")
                 except Exception as e:
+                    print(f"Could not set/fuse LORA adapter '{adapter_name}': {e}")
+                    self.pipe.unfuse_lora()
                     self.pipe.set_adapters([]) # Disable LORAs if setting failed
             else:
                 if adapter_name == "none":
                     print("LORAs disabled by user choice.")
                 else:
                     print(f"LORA '{adapter_name}' not loaded or available, disabling LORAs.")
+                # --- FIX: Unfuse any previously fused LoRAs ---
+                self.pipe.unfuse_lora()
                 self.pipe.set_adapters([]) # Disable all LORAs
         pipe_kwargs["generator"] = generator
+        # --- START FIX: Use Compel ---
+        if self.use_compel and self.compel is not None:
             try:
+                print("Encoding prompts with Compel...")
+                # Encode positive prompt
+                conditioning, pooled = self.compel(prompt)
+                pipe_kwargs["prompt_embeds"] = conditioning
+                pipe_kwargs["pooled_prompt_embeds"] = pooled
+                # Encode negative prompt
+                if not negative_prompt or not negative_prompt.strip():
+                    negative_prompt = "" # Compel must encode something
+                negative_conditioning, negative_pooled = self.compel(negative_prompt)
+                pipe_kwargs["negative_prompt_embeds"] = negative_conditioning
+                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled
+                print(f"[OK] Compel encoded - Prompt: {conditioning.shape}")
             except Exception as e:
+                print(f"Compel encoding failed, using standard prompts: {e}")
                 traceback.print_exc()
                 pipe_kwargs["prompt"] = prompt
                 pipe_kwargs["negative_prompt"] = negative_prompt
         else:
+            print("[WARNING] Compel not found, using standard prompt encoding.")
             pipe_kwargs["prompt"] = prompt
             pipe_kwargs["negative_prompt"] = negative_prompt
         # --- END FIX ---
                 conditioning_scales.append(identity_control_scale)
                 scale_debug_str.append(f"Identity: {identity_control_scale:.2f}")
+                # --- START FIX: Pass raw face embedding to pipeline ---
+                if face_embeddings is not None and self.models_loaded.get('ip_adapter', False):
+                    print(f"Adding InstantID face embeddings (raw)...")
+                    # The pipeline expects the raw [1, 512] embedding
+                    face_emb_tensor = torch.from_numpy(face_embeddings).to(device=self.device, dtype=self.dtype)
+                    pipe_kwargs["image_embeds"] = face_emb_tensor
+                    # Set the IP-Adapter scale (face preservation)
+                    self.pipe.set_ip_adapter_scale(identity_preservation)
+                    print(f"  - IP-Adapter scale set to: {identity_preservation:.2f}")
                 elif has_detected_faces:
                     print("  Face detected but IP-Adapter/embeddings unavailable, using keypoints only")
+                # --- END FIX ---
             else:
                 # No face detected - blank map needed to maintain ControlNet list order