File size: 23,674 Bytes
66937e6
ff176e8
 
 
 
 
ee4fca1
 
ff176e8
 
 
c6815c0
ff176e8
 
8967a30
ff176e8
 
ee4fca1
 
 
 
 
 
8967a30
 
66937e6
 
ff176e8
962b8c2
 
 
66937e6
 
962b8c2
ff176e8
 
ee4fca1
66937e6
 
ee4fca1
912e6dd
 
 
 
 
ff176e8
bddeb26
ee4fca1
ee0b470
 
 
 
 
 
 
 
 
 
 
 
 
ff176e8
ee4fca1
bddeb26
 
 
 
 
 
 
 
 
912e6dd
bddeb26
 
 
 
 
ee4fca1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff176e8
962b8c2
bddeb26
ee4fca1
 
42ed573
 
ee4fca1
 
 
 
 
 
 
 
bddeb26
8967a30
ee4fca1
8967a30
 
 
 
 
 
ee4fca1
 
ff176e8
42ed573
ee4fca1
ff176e8
 
 
ee4fca1
912e6dd
8967a30
912e6dd
962b8c2
ee4fca1
ff176e8
42ed573
ee4fca1
ff176e8
 
 
912e6dd
ff176e8
ee4fca1
 
 
 
 
 
 
 
 
 
 
 
 
 
912e6dd
8967a30
 
 
 
 
 
 
ff176e8
962b8c2
 
912e6dd
8967a30
912e6dd
 
ff176e8
ee4fca1
 
 
 
 
 
 
 
 
 
 
 
38897a0
962b8c2
ff176e8
 
962b8c2
38897a0
ff176e8
 
38897a0
ff176e8
38897a0
ff176e8
c6815c0
42ed573
962b8c2
42ed573
912e6dd
 
 
 
 
 
962b8c2
ee4fca1
 
 
 
 
 
 
 
 
 
 
 
962b8c2
 
ff176e8
c6815c0
ee4fca1
 
 
 
 
 
 
 
 
 
ff176e8
962b8c2
 
ff176e8
 
ee4fca1
962b8c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff176e8
 
 
 
962b8c2
 
ee4fca1
 
 
962b8c2
 
ee4fca1
 
ff176e8
ee4fca1
962b8c2
 
 
ff176e8
962b8c2
ff176e8
962b8c2
ff176e8
 
962b8c2
ff176e8
962b8c2
ff176e8
 
ee4fca1
 
c6815c0
ee4fca1
 
ff176e8
962b8c2
42ed573
ee4fca1
bddeb26
42ed573
bddeb26
ee4fca1
 
bddeb26
ee4fca1
bddeb26
 
42ed573
c6815c0
ee4fca1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bddeb26
ff176e8
912e6dd
bddeb26
 
962b8c2
912e6dd
c6815c0
bddeb26
962b8c2
bddeb26
 
962b8c2
ee4fca1
bddeb26
 
ee4fca1
962b8c2
bddeb26
 
962b8c2
ee4fca1
 
 
 
42ed573
ee4fca1
bddeb26
42ed573
ee4fca1
 
 
42ed573
ee4fca1
 
 
912e6dd
ee4fca1
42ed573
ee4fca1
42ed573
 
bddeb26
ee4fca1
 
 
bddeb26
c6815c0
ee4fca1
 
bddeb26
ff176e8
 
 
c6815c0
962b8c2
ee4fca1
 
 
ff176e8
38897a0
ff176e8
 
 
 
 
 
ee4fca1
ff176e8
bddeb26
ee4fca1
 
 
ff176e8
 
 
 
 
ee4fca1
 
 
ff176e8
 
 
 
 
 
ee4fca1
ff176e8
bddeb26
ee4fca1
 
ff176e8
 
 
 
bddeb26
 
ff176e8
 
962b8c2
ee4fca1
ff176e8
ee4fca1
ff176e8
ee4fca1
ff176e8
ee4fca1
 
 
 
 
 
ff176e8
 
c6815c0
ee4fca1
 
 
 
 
c6815c0
ff176e8
 
 
 
 
962b8c2
 
 
 
ff176e8
 
 
 
962b8c2
ff176e8
 
 
ee4fca1
 
 
 
 
 
 
ff176e8
c6815c0
ee4fca1
c6815c0
 
ee4fca1
ff176e8
 
 
962b8c2
ee4fca1
962b8c2
c6815c0
ee4fca1
ff176e8
962b8c2
ee4fca1
962b8c2
ee4fca1
 
912e6dd
ee4fca1
ff176e8
ee4fca1
 
ff176e8
962b8c2
 
 
ff176e8
 
 
ee4fca1
 
 
 
 
 
 
 
912e6dd
ee4fca1
 
962b8c2
ee4fca1
 
 
 
 
bddeb26
 
ee4fca1
bddeb26
962b8c2
ee4fca1
bddeb26
ee4fca1
 
bddeb26
ff176e8
912e6dd
ff176e8
 
 
912e6dd
 
962b8c2
 
ee4fca1
 
 
 
 
 
 
 
962b8c2
ee4fca1
 
 
 
 
c6815c0
ee4fca1
 
 
 
912e6dd
ff176e8
ee4fca1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ff176e8
 
912e6dd
ee4fca1
 
912e6dd
ff176e8
 
 
 
 
 
 
 
 
bddeb26
ee0b470
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
import spaces  # MUST be first, before any CUDA-related imports
import gradio as gr
import torch
from diffusers import (
    ControlNetModel,
    AutoencoderKL,
    DPMSolverMultistepScheduler,
    LCMScheduler
)
from diffusers.models.attention_processor import AttnProcessor2_0
from insightface.app import FaceAnalysis
from PIL import Image
import numpy as np
import cv2
from huggingface_hub import hf_hub_download
import os

# Import the custom img2img pipeline with InstantID
from pipeline_stable_diffusion_xl_instantid_img2img import StableDiffusionXLInstantIDImg2ImgPipeline, draw_kps

# Import ZoeDetector for better depth maps
from controlnet_aux import ZoeDetector

# Configuration
MODEL_REPO = "primerz/pixagram"
device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = torch.float16 if device == "cuda" else torch.float32

# LORA trigger word
TRIGGER_WORD = "p1x3l4rt, pixel art"

print(f"Using device: {device}")
print(f"Loading models from: {MODEL_REPO}")
print(f"LORA Trigger Word: {TRIGGER_WORD}")

class RetroArtConverter:
    def __init__(self, use_lcm=False):
        self.device = device
        self.dtype = dtype
        self.use_lcm = use_lcm
        self.models_loaded = {
            'custom_checkpoint': False,
            'lora': False,
            'instantid': False
        }
        
        # Initialize face analysis for InstantID
        print("Loading face analysis model (antelopev2)...")
        try:
            self.face_app = FaceAnalysis(
                name='antelopev2',
                root='./models/insightface',
                providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
            )
            self.face_app.prepare(ctx_id=0, det_size=(640, 640))
            print("โœ“ Face analysis model loaded successfully")
            self.face_detection_enabled = True
        except Exception as e:
            print(f"โš ๏ธ Face detection not available: {e}")
            self.face_app = None
            self.face_detection_enabled = False
        
        # Load ControlNet for InstantID
        print("Loading InstantID ControlNet...")
        try:
            self.controlnet_instantid = ControlNetModel.from_pretrained(
                "InstantX/InstantID",
                subfolder="ControlNetModel",
                torch_dtype=self.dtype
            ).to(self.device)
            print("โœ“ InstantID ControlNet loaded successfully")
            self.instantid_enabled = True
            self.models_loaded['instantid'] = True
        except Exception as e:
            print(f"โš ๏ธ InstantID ControlNet not available: {e}")
            self.controlnet_instantid = None
            self.instantid_enabled = False
        
        # Load ControlNet for Zoe depth
        print("Loading Zoe Depth ControlNet...")
        self.controlnet_depth = ControlNetModel.from_pretrained(
            "diffusers/controlnet-zoe-depth-sdxl-1.0",
            torch_dtype=self.dtype
        ).to(self.device)
        
        # Load Zoe depth detector (better than DPT)
        print("Loading Zoe depth detector...")
        try:
            self.zoe_detector = ZoeDetector.from_pretrained("lllyasviel/Annotators")
            self.zoe_detector.to(self.device)
            print("โœ“ Zoe detector loaded successfully")
        except Exception as e:
            print(f"โš ๏ธ Could not load Zoe detector: {e}")
            self.zoe_detector = None
        
        # Determine which controlnets to use
        if self.instantid_enabled and self.controlnet_instantid is not None:
            controlnets = [self.controlnet_instantid, self.controlnet_depth]
            print(f"Initializing with multiple ControlNets: InstantID + Zoe Depth")
        else:
            controlnets = self.controlnet_depth
            print(f"Initializing with single ControlNet: Zoe Depth only")
        
        # Load VAE
        print("Loading VAE...")
        self.vae = AutoencoderKL.from_pretrained(
            "madebyollin/sdxl-vae-fp16-fix",
            torch_dtype=self.dtype
        ).to(self.device)
        
        # Load SDXL checkpoint from HuggingFace Hub
        print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...")
        try:
            model_path = hf_hub_download(
                repo_id=MODEL_REPO,
                filename="horizon.safetensors",
                repo_type="model"
            )
            # Use the custom img2img pipeline for better results
            self.pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
                model_path,
                controlnet=controlnets,
                vae=self.vae,
                torch_dtype=self.dtype,
                use_safetensors=True
            ).to(self.device)
            print("โœ“ Custom checkpoint loaded successfully")
            self.models_loaded['custom_checkpoint'] = True
        except Exception as e:
            print(f"โš ๏ธ Could not load custom checkpoint: {e}")
            print("Using default SDXL base model")
            self.pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
                "stabilityai/stable-diffusion-xl-base-1.0",
                controlnet=controlnets,
                vae=self.vae,
                torch_dtype=self.dtype,
                use_safetensors=True
            ).to(self.device)
            self.models_loaded['custom_checkpoint'] = False
        
        # Load InstantID IP-Adapter
        if self.instantid_enabled:
            print("Loading InstantID IP-Adapter...")
            try:
                ip_adapter_path = hf_hub_download(
                    repo_id="InstantX/InstantID",
                    filename="ip-adapter.bin"
                )
                self.pipe.load_ip_adapter_instantid(ip_adapter_path)
                self.pipe.set_ip_adapter_scale(0.8)
                print("โœ“ InstantID IP-Adapter loaded successfully")
            except Exception as e:
                print(f"โš ๏ธ Could not load IP-Adapter: {e}")
        
        # Load LORA from HuggingFace Hub
        print("Loading LORA (retroart) from HuggingFace Hub...")
        try:
            lora_path = hf_hub_download(
                repo_id=MODEL_REPO,
                filename="retroart.safetensors",
                repo_type="model"
            )
            self.pipe.load_lora_weights(lora_path)
            print(f"โœ“ LORA loaded successfully")
            print(f"  Trigger word: '{TRIGGER_WORD}'")
            self.models_loaded['lora'] = True
        except Exception as e:
            print(f"โš ๏ธ Could not load LORA: {e}")
            self.models_loaded['lora'] = False
        
        # Choose scheduler based on mode
        if use_lcm:
            print("Setting up LCM scheduler for fast generation...")
            self.pipe.scheduler = LCMScheduler.from_config(
                self.pipe.scheduler.config
            )
        else:
            print("Setting up DPMSolverMultistep scheduler with Karras sigmas for quality...")
            self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
                self.pipe.scheduler.config,
                use_karras_sigmas=True
            )
        
        # Enable attention optimizations
        self.pipe.unet.set_attn_processor(AttnProcessor2_0())
        
        # Try to enable xformers
        if self.device == "cuda":
            try:
                self.pipe.enable_xformers_memory_efficient_attention()
                print("โœ“ xformers enabled")
            except Exception as e:
                print(f"โš ๏ธ xformers not available: {e}")
        
        # Track controlnet configuration
        self.using_multiple_controlnets = isinstance(controlnets, list)
        print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
        
        print("\n=== MODEL STATUS ===")
        for model, loaded in self.models_loaded.items():
            status = "โœ“ LOADED" if loaded else "โœ— FALLBACK"
            print(f"{model}: {status}")
        print("===================\n")
        
        print("โœ“ Model initialization complete!")
        if use_lcm:
            print("\n=== LCM CONFIGURATION ===")
            print("Scheduler: LCM")
            print("Recommended Steps: 8-12")
            print("Recommended CFG: 1.0-1.5")
            print("Recommended Strength: 0.6-0.8")
        else:
            print("\n=== QUALITY CONFIGURATION ===")
            print("Scheduler: DPMSolverMultistep + Karras")
            print("Recommended Steps: 25-40")
            print("Recommended CFG: 5.0-7.5")
            print("Recommended Strength: 0.4-0.7")
        print(f"LORA Trigger: '{TRIGGER_WORD}'")
        print("=========================\n")
    
    def get_depth_map(self, image):
        """Generate depth map from input image using Zoe"""
        if self.zoe_detector is not None:
            # Use Zoe detector for better depth maps
            depth_image = self.zoe_detector(image)
            return depth_image
        else:
            # Fallback to basic conversion
            img_array = np.array(image.convert('L'))
            depth_colored = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
            return Image.fromarray(depth_colored)
    
    def calculate_optimal_size(self, original_width, original_height):
        """Calculate optimal size from recommended resolutions"""
        aspect_ratio = original_width / original_height
        
        # Recommended resolutions for SDXL
        recommended_sizes = [
            (896, 1152),  # Portrait
            (1152, 896),  # Landscape
            (832, 1216),  # Tall portrait
            (1216, 832),  # Wide landscape
            (1024, 1024)  # Square
        ]
        
        # Find closest matching aspect ratio
        best_match = None
        best_diff = float('inf')
        
        for width, height in recommended_sizes:
            rec_aspect = width / height
            diff = abs(rec_aspect - aspect_ratio)
            if diff < best_diff:
                best_diff = diff
                best_match = (width, height)
        
        # Ensure dimensions are multiples of 8
        width, height = best_match
        width = (width // 8) * 8
        height = (height // 8) * 8
        
        return width, height
    
    def add_trigger_word(self, prompt):
        """Add trigger word to prompt if not present"""
        if TRIGGER_WORD.lower() not in prompt.lower():
            return f"{TRIGGER_WORD}, {prompt}"
        return prompt
    
    def generate_retro_art(
        self,
        input_image,
        prompt="retro game character, vibrant colors, detailed",
        negative_prompt="blurry, low quality, ugly, distorted",
        num_inference_steps=25,
        guidance_scale=5.0,
        strength=0.6,  # img2img strength
        controlnet_conditioning_scale=0.8,
        lora_scale=1.0,
        face_strength=0.85,  # InstantID face strength
        depth_control_scale=0.8  # Zoe depth strength
    ):
        """Generate retro art using img2img pipeline with face keypoints"""
        
        # Add trigger word to prompt
        prompt = self.add_trigger_word(prompt)
        
        # Calculate optimal size
        original_width, original_height = input_image.size
        target_width, target_height = self.calculate_optimal_size(original_width, original_height)
        
        print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
        print(f"Prompt: {prompt}")
        
        # Resize with high quality
        resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
        
        # Generate depth map using Zoe
        print("Generating Zoe depth map...")
        depth_image = self.get_depth_map(resized_image)
        if depth_image.size != (target_width, target_height):
            depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
        
        # Handle face detection for InstantID
        using_multiple_controlnets = self.using_multiple_controlnets
        face_kps = None
        face_embeddings = None
        has_detected_faces = False
        
        if using_multiple_controlnets and self.face_app is not None:
            print("Detecting faces and extracting keypoints...")
            img_array = np.array(resized_image)
            faces = self.face_app.get(img_array)
            
            if len(faces) > 0:
                has_detected_faces = True
                print(f"Detected {len(faces)} face(s)")
                
                # Get the largest face
                face = sorted(faces, 
                            key=lambda x: (x.bbox[2] - x.bbox[0]) * (x.bbox[3] - x.bbox[1]))[-1]
                
                # Extract face embeddings
                face_embeddings = torch.from_numpy(face.normed_embedding).unsqueeze(0).to(
                    self.device, dtype=self.dtype
                )
                
                # Draw keypoints (this shows age, gender, expression)
                face_kps = draw_kps(resized_image, face.kps)
                print(f"Face keypoints drawn (age/gender/expression preserved)")
            else:
                print("No faces detected in image")
        
        # Set LORA scale
        if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
            try:
                self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
                print(f"LORA scale: {lora_scale}")
            except Exception as e:
                print(f"Could not set LORA scale: {e}")
        
        # Prepare generation kwargs
        pipe_kwargs = {
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "image": resized_image,  # Original image for img2img
            "num_inference_steps": num_inference_steps,
            "guidance_scale": guidance_scale,
            "strength": strength,  # img2img denoising strength
            "generator": torch.Generator(device=self.device).manual_seed(42)
        }
        
        # Configure ControlNet inputs
        if using_multiple_controlnets and has_detected_faces and face_kps is not None:
            print("Using InstantID + Zoe Depth ControlNets with face keypoints")
            control_images = [face_kps, depth_image]
            conditioning_scales = [face_strength, depth_control_scale]
            
            pipe_kwargs["control_image"] = control_images
            pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
            
            # Add face embeddings through IP-Adapter
            if face_embeddings is not None and hasattr(self.pipe, 'set_ip_adapter_scale'):
                pipe_kwargs["ip_adapter_image_embeds"] = [face_embeddings]
        
        elif using_multiple_controlnets:
            print("Multiple ControlNets available but no faces detected - using depth only")
            # Use depth for both to maintain structure
            control_images = [depth_image, depth_image]
            conditioning_scales = [0.0, depth_control_scale]  # Disable InstantID
            
            pipe_kwargs["control_image"] = control_images
            pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
        
        else:
            print("Using Zoe Depth ControlNet only")
            pipe_kwargs["control_image"] = depth_image
            pipe_kwargs["controlnet_conditioning_scale"] = depth_control_scale
        
        # Generate
        mode = "LCM" if self.use_lcm else "Quality"
        print(f"Generating with {mode} mode: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
        result = self.pipe(**pipe_kwargs)
        
        return result.images[0]

# Initialize converter
print("Initializing RetroArt Converter...")
print("Choose mode: LCM (fast) or Quality (better)")
converter_lcm = RetroArtConverter(use_lcm=True)
converter_quality = RetroArtConverter(use_lcm=False)

@spaces.GPU
def process_image(
    image,
    prompt,
    negative_prompt,
    steps,
    guidance_scale,
    strength,
    controlnet_scale,
    lora_scale,
    face_strength,
    depth_control_scale,
    use_lcm_mode
):
    if image is None:
        return None
    
    try:
        # Choose the right converter based on mode
        converter = converter_lcm if use_lcm_mode else converter_quality
        
        result = converter.generate_retro_art(
            input_image=image,
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_inference_steps=int(steps),
            guidance_scale=guidance_scale,
            strength=strength,
            controlnet_conditioning_scale=controlnet_scale,
            lora_scale=lora_scale,
            face_strength=face_strength,
            depth_control_scale=depth_control_scale
        )
        return result
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()
        raise gr.Error(f"Generation failed: {str(e)}")

# Gradio UI
with gr.Blocks(title="RetroArt Converter - Improved", theme=gr.themes.Soft()) as demo:
    gr.Markdown("""
    # ๐ŸŽฎ RetroArt Converter (Improved with True Img2Img)
    
    Convert images into retro pixel art style with **proper face detection** and **gender/age preservation**!
    
    **โœจ Key Improvements:**
    - ๐ŸŽฏ **True img2img pipeline** for better structure preservation
    - ๐Ÿ‘ค **draw_kps**: Detects and preserves age, gender, expression
    - ๐Ÿ—บ๏ธ **Zoe Depth**: Superior depth estimation
    - โšก **Dual Mode**: Fast LCM or Quality DPM++
    - ๐ŸŽจ Custom pixel art LORA with trigger: `p1x3l4rt, pixel art`
    """)
    
    # Model status
    status_text = "**๐Ÿ“ฆ Loaded Models (LCM Mode):**\n"
    status_text += f"- Custom Checkpoint: {'โœ“ Loaded' if converter_lcm.models_loaded['custom_checkpoint'] else 'โœ— Using SDXL base'}\n"
    status_text += f"- LORA (RetroArt): {'โœ“ Loaded' if converter_lcm.models_loaded['lora'] else 'โœ— Disabled'}\n"
    status_text += f"- InstantID: {'โœ“ Loaded' if converter_lcm.models_loaded['instantid'] else 'โœ— Disabled'}\n"
    gr.Markdown(status_text)
    
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Input Image", type="pil")
            
            prompt = gr.Textbox(
                label="Prompt (trigger word auto-added)",
                value="retro game character, vibrant colors, highly detailed",
                lines=3,
                info=f"'{TRIGGER_WORD}' will be automatically added"
            )
            
            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                value="blurry, low quality, ugly, distorted, deformed, bad anatomy",
                lines=2
            )
            
            use_lcm_mode = gr.Checkbox(
                label="Use LCM Mode (Fast)",
                value=True,
                info="Uncheck for Quality mode (slower but better)"
            )
            
            with gr.Accordion("โš™๏ธ Generation Settings", open=True):
                steps = gr.Slider(
                    minimum=4,
                    maximum=50,
                    value=12,
                    step=1,
                    label="Inference Steps (12 for LCM, 25-40 for Quality)"
                )
                
                guidance_scale = gr.Slider(
                    minimum=0.5,
                    maximum=15.0,
                    value=1.0,
                    step=0.1,
                    label="Guidance Scale (1.0-1.5 for LCM, 5-7.5 for Quality)"
                )
                
                strength = gr.Slider(
                    minimum=0.3,
                    maximum=1.0,
                    value=0.7,
                    step=0.05,
                    label="Img2Img Strength (how much to change)"
                )
            
            with gr.Accordion("๐ŸŽจ Style Settings", open=True):
                lora_scale = gr.Slider(
                    minimum=0.5,
                    maximum=1.5,
                    value=1.0,
                    step=0.05,
                    label="RetroArt LORA Scale"
                )
                
                controlnet_scale = gr.Slider(
                    minimum=0.3,
                    maximum=1.2,
                    value=0.8,
                    step=0.05,
                    label="Overall ControlNet Scale"
                )
            
            with gr.Accordion("๐Ÿ‘ค Face & Depth Settings", open=False):
                face_strength = gr.Slider(
                    minimum=0,
                    maximum=2.0,
                    value=0.85,
                    step=0.05,
                    label="Face Preservation (InstantID)",
                    info="Higher = better face likeness"
                )
                
                depth_control_scale = gr.Slider(
                    minimum=0,
                    maximum=1.0,
                    value=0.8,
                    step=0.05,
                    label="Zoe Depth Control Scale",
                    info="Higher = more structure preservation"
                )
            
            generate_btn = gr.Button("๐ŸŽจ Generate Retro Art", variant="primary", size="lg")
        
        with gr.Column():
            output_image = gr.Image(label="Retro Art Output")
            
            gr.Markdown("""
            ### ๐Ÿ’ก Tips for Best Results:
            
            **Mode Selection:**
            - โœ… **LCM Mode**: 12 steps, CFG 1.0-1.5, Strength 0.6-0.8 (โšก fast!)
            - โœ… **Quality Mode**: 25-40 steps, CFG 5-7.5, Strength 0.4-0.7 (๐ŸŽจ better!)
            
            **Face Preservation:**
            - System automatically detects faces and draws keypoints
            - Preserves age, gender, and expression characteristics
            - Adjust "Face Preservation" slider for control
            
            **For Best Quality:**
            - Use high-resolution input images (min 512px)
            - For portraits: enable Quality mode + high face strength
            - For scenes: lower img2img strength for more creativity
            - Adjust depth control for structure vs creativity balance
            
            **Style Control:**
            - LORA trigger word auto-added for pixel art style
            - Increase LORA scale (1.2-1.5) for stronger retro effect
            - Try: "SNES style", "16-bit RPG", "Game Boy advance style"
            """)
    
    # Update defaults when switching modes
    def update_mode_defaults(use_lcm):
        if use_lcm:
            return (
                gr.update(value=12),  # steps
                gr.update(value=1.0),  # guidance_scale
                gr.update(value=0.7)  # strength
            )
        else:
            return (
                gr.update(value=30),  # steps
                gr.update(value=6.0),  # guidance_scale
                gr.update(value=0.6)  # strength
            )
    
    use_lcm_mode.change(
        fn=update_mode_defaults,
        inputs=[use_lcm_mode],
        outputs=[steps, guidance_scale, strength]
    )
    
    generate_btn.click(
        fn=process_image,
        inputs=[
            input_image, prompt, negative_prompt, steps, guidance_scale, strength,
            controlnet_scale, lora_scale, face_strength, depth_control_scale, use_lcm_mode
        ],
        outputs=[output_image]
    )

if __name__ == "__main__":
    demo.queue(max_size=20)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_api=True
    )