primerz commited on
Commit
912e6dd
·
verified ·
1 Parent(s): 42ed573

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -77
app.py CHANGED
@@ -6,11 +6,12 @@ from diffusers import (
6
  StableDiffusionXLControlNetPipeline,
7
  ControlNetModel,
8
  AutoencoderKL,
9
- DPMSolverMultistepScheduler
 
10
  )
11
  from diffusers.models.attention_processor import AttnProcessor2_0
12
  from insightface.app import FaceAnalysis
13
- from PIL import Image
14
  import numpy as np
15
  import cv2
16
  from transformers import pipeline as transformers_pipeline
@@ -29,6 +30,12 @@ class RetroArtConverter:
29
  def __init__(self):
30
  self.device = device
31
  self.dtype = dtype
 
 
 
 
 
 
32
 
33
  # Initialize face analysis for InstantID
34
  print("Loading face analysis model...")
@@ -43,7 +50,7 @@ class RetroArtConverter:
43
  self.face_detection_enabled = True
44
  except Exception as e:
45
  print(f"⚠️ Face detection not available: {e}")
46
- print("Continuing without face detection (will still work fine)")
47
  self.face_app = None
48
  self.face_detection_enabled = False
49
 
@@ -64,9 +71,10 @@ class RetroArtConverter:
64
  ).to(self.device)
65
  print("✓ InstantID ControlNet loaded successfully")
66
  self.instantid_enabled = True
 
67
  except Exception as e:
68
  print(f"⚠️ InstantID ControlNet not available: {e}")
69
- print("Running without InstantID (identity may not be preserved)")
70
  self.controlnet_instantid = None
71
  self.instantid_enabled = False
72
 
@@ -83,13 +91,15 @@ class RetroArtConverter:
83
  torch_dtype=self.dtype
84
  ).to(self.device)
85
  print("✓ Custom VAE loaded successfully")
 
86
  except Exception as e:
87
- print(f"Warning: Could not load custom VAE: {e}")
88
- print("Using default SDXL VAE")
89
  self.vae = AutoencoderKL.from_pretrained(
90
  "madebyollin/sdxl-vae-fp16-fix",
91
  torch_dtype=self.dtype
92
  ).to(self.device)
 
93
 
94
  # Load depth estimator for preprocessing
95
  print("Loading depth estimator...")
@@ -123,9 +133,10 @@ class RetroArtConverter:
123
  use_safetensors=True
124
  ).to(self.device)
125
  print("✓ Custom checkpoint loaded successfully")
 
126
  except Exception as e:
127
- print(f"Warning: Could not load custom checkpoint: {e}")
128
- print("Using default SDXL")
129
  self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
130
  "stabilityai/stable-diffusion-xl-base-1.0",
131
  controlnet=controlnets,
@@ -133,8 +144,9 @@ class RetroArtConverter:
133
  torch_dtype=self.dtype,
134
  use_safetensors=True
135
  ).to(self.device)
 
136
 
137
- # Load LORA from HuggingFace Hub (requires PEFT)
138
  print("Loading LORA (retroart) from HuggingFace Hub...")
139
  try:
140
  lora_path = hf_hub_download(
@@ -144,18 +156,24 @@ class RetroArtConverter:
144
  )
145
  self.pipe.load_lora_weights(lora_path)
146
  print("✓ LORA loaded successfully")
 
147
  except Exception as e:
148
- print(f"Warning: Could not load LORA: {e}")
149
  print("Running without LORA")
 
150
 
151
- # Optimize pipeline
152
- self.pipe.scheduler = DPMSolverMultistepScheduler.from_config(
153
  self.pipe.scheduler.config
154
  )
155
 
156
- self.pipe.enable_vae_slicing()
 
 
 
157
  self.pipe.unet.set_attn_processor(AttnProcessor2_0())
158
 
 
159
  if self.device == "cuda":
160
  try:
161
  self.pipe.enable_xformers_memory_efficient_attention()
@@ -167,16 +185,46 @@ class RetroArtConverter:
167
  self.using_multiple_controlnets = isinstance(controlnets, list)
168
  print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
169
 
 
 
 
 
 
 
170
  print("Model initialization complete!")
171
 
172
- def get_depth_map(self, image):
173
- """Generate depth map from input image"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  depth = self.depth_estimator(image)
175
  depth_image = depth['depth']
176
 
177
  depth_array = np.array(depth_image)
178
- depth_normalized = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min()) * 255
 
 
 
179
  depth_normalized = depth_normalized.astype(np.uint8)
 
 
 
 
 
180
  depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
181
 
182
  return Image.fromarray(depth_colored)
@@ -200,19 +248,6 @@ class RetroArtConverter:
200
  print(f"Face embedding extraction error: {e}")
201
  return None
202
 
203
- def prepare_face_image(self, image, face_bbox):
204
- """Prepare face image for InstantID ControlNet"""
205
- x1, y1, x2, y2 = map(int, face_bbox)
206
- # Add some padding
207
- padding = 20
208
- x1 = max(0, x1 - padding)
209
- y1 = max(0, y1 - padding)
210
- x2 = min(image.width, x2 + padding)
211
- y2 = min(image.height, y2 + padding)
212
-
213
- face_image = image.crop((x1, y1, x2, y2))
214
- return face_image
215
-
216
  def calculate_target_size(self, original_width, original_height, max_dimension=1024):
217
  """Calculate target size maintaining aspect ratio"""
218
  aspect_ratio = original_width / original_height
@@ -235,14 +270,15 @@ class RetroArtConverter:
235
  input_image,
236
  prompt="retro pixel art game, 16-bit style, vibrant colors",
237
  negative_prompt="blurry, low quality, modern, photorealistic, 3d render",
238
- num_inference_steps=30,
239
  guidance_scale=7.5,
240
- controlnet_conditioning_scale=0.8,
241
  lora_scale=0.85,
242
- identity_preservation=0.8, # NEW PARAMETER
243
- image_scale=0.2 # NEW PARAMETER for InstantID strength
 
244
  ):
245
- """Main generation function"""
246
 
247
  # Resize image maintaining aspect ratio
248
  original_width, original_height = input_image.size
@@ -250,14 +286,19 @@ class RetroArtConverter:
250
 
251
  print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
252
 
 
253
  resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
254
 
255
- # Generate depth map
 
 
 
 
256
  print("Generating depth map...")
257
- depth_image = self.get_depth_map(resized_image)
258
  depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
259
 
260
- # Determine if we're using multiple ControlNets based on initialization
261
  using_multiple_controlnets = self.using_multiple_controlnets
262
 
263
  # Extract face embeddings if InstantID is enabled
@@ -280,16 +321,20 @@ class RetroArtConverter:
280
  prompt = f"portrait, detailed face, facial features, {prompt}"
281
 
282
  # Set LORA scale
283
- if hasattr(self.pipe, 'set_adapters'):
284
  try:
285
  self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
286
- except:
287
- print("Could not set LORA adapters, continuing without")
 
 
 
 
288
 
289
  # Prepare pipeline kwargs
290
  pipe_kwargs = {
291
  "prompt": prompt,
292
- "negative_prompt": negative_prompt,
293
  "num_inference_steps": num_inference_steps,
294
  "guidance_scale": guidance_scale,
295
  "width": target_width,
@@ -299,7 +344,6 @@ class RetroArtConverter:
299
 
300
  # Add control images and scales based on ControlNet configuration
301
  if using_multiple_controlnets and has_detected_faces:
302
- # Multiple ControlNets: depth + InstantID
303
  print("Using multiple ControlNets (Depth + InstantID)")
304
  control_images = [depth_image, resized_image]
305
  conditioning_scales = [controlnet_conditioning_scale, image_scale]
@@ -312,23 +356,21 @@ class RetroArtConverter:
312
  pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
313
 
314
  elif using_multiple_controlnets and not has_detected_faces:
315
- # Multiple ControlNets initialized but no faces detected
316
- # Pass images for both controlnets but with zero weight for InstantID
317
  print("Multiple ControlNets available but no faces detected, using depth only")
318
- control_images = [depth_image, depth_image] # Use depth for both
319
- conditioning_scales = [controlnet_conditioning_scale, 0.0] # Zero weight for InstantID
320
 
321
  pipe_kwargs["image"] = control_images
322
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
323
 
324
  else:
325
- # Single ControlNet (depth only)
326
  print("Using single ControlNet (Depth only)")
327
  pipe_kwargs["image"] = depth_image
328
  pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
329
 
330
  # Generate image
331
  print("Generating retro art...")
 
332
  result = self.pipe(**pipe_kwargs)
333
 
334
  return result.images[0]
@@ -347,8 +389,9 @@ def process_image(
347
  guidance_scale,
348
  controlnet_scale,
349
  lora_scale,
350
- identity_preservation, # NEW
351
- image_scale # NEW
 
352
  ):
353
  if image is None:
354
  return None
@@ -362,8 +405,9 @@ def process_image(
362
  guidance_scale=guidance_scale,
363
  controlnet_conditioning_scale=controlnet_scale,
364
  lora_scale=lora_scale,
365
- identity_preservation=identity_preservation, # NEW
366
- image_scale=image_scale # NEW
 
367
  )
368
  return result
369
  except Exception as e:
@@ -373,59 +417,75 @@ def process_image(
373
  raise gr.Error(f"Generation failed: {str(e)}")
374
 
375
  # Create Gradio interface
376
- with gr.Blocks(title="RetroArt Converter") as demo:
377
  gr.Markdown("""
378
- # 🎮 RetroArt Converter
379
 
380
- Convert any image into retro game art style!
381
 
382
  **Features:**
383
- - Custom SDXL checkpoint (Horizon)
 
 
384
  - Pixelate VAE for authentic retro look
385
  - RetroArt LORA for style enhancement
386
- - Face preservation with InstantID (if available)
387
- - Depth-aware generation with ControlNet
388
  """)
389
 
 
 
 
 
 
 
 
 
 
390
  with gr.Row():
391
  with gr.Column():
392
  input_image = gr.Image(label="Input Image", type="pil")
393
 
394
  prompt = gr.Textbox(
395
  label="Prompt",
396
- value="retro pixel art game, 16-bit style, vibrant colors, detailed",
397
  lines=3
398
  )
399
 
400
  negative_prompt = gr.Textbox(
401
  label="Negative Prompt",
402
- value="blurry, low quality, modern, photorealistic, 3d render, ugly, distorted",
403
  lines=2
404
  )
405
 
406
- with gr.Accordion("Advanced Settings", open=False):
 
 
 
 
 
 
407
  steps = gr.Slider(
408
  minimum=20,
409
- maximum=50,
410
- value=30,
411
- step=1,
412
- label="Inference Steps"
413
  )
414
 
415
  guidance_scale = gr.Slider(
416
- minimum=1,
417
  maximum=15,
418
  value=7.5,
419
  step=0.5,
420
- label="Guidance Scale"
421
  )
422
 
423
  controlnet_scale = gr.Slider(
424
  minimum=0,
425
- maximum=2,
426
- value=0.8,
427
- step=0.1,
428
- label="ControlNet Depth Scale"
429
  )
430
 
431
  lora_scale = gr.Slider(
@@ -435,14 +495,14 @@ with gr.Blocks(title="RetroArt Converter") as demo:
435
  step=0.05,
436
  label="RetroArt LORA Scale"
437
  )
438
-
439
- # NEW PARAMETERS
440
  identity_preservation = gr.Slider(
441
  minimum=0,
442
  maximum=1.5,
443
  value=0.8,
444
  step=0.1,
445
- label="Identity Preservation (InstantID strength)"
446
  )
447
 
448
  image_scale = gr.Slider(
@@ -453,16 +513,34 @@ with gr.Blocks(title="RetroArt Converter") as demo:
453
  label="InstantID Image Scale"
454
  )
455
 
456
- generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary")
457
 
458
  with gr.Column():
459
  output_image = gr.Image(label="Retro Art Output")
 
 
 
 
 
 
 
 
 
 
460
 
461
  gr.Examples(
462
  examples=[
463
- ["example_portrait.jpg", "retro pixel art portrait, 16-bit game character", "blurry, modern", 30, 7.5, 0.8, 0.85, 0.8, 0.2],
 
 
 
 
 
 
 
 
 
464
  ],
465
- inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale, identity_preservation, image_scale],
466
  outputs=[output_image],
467
  fn=process_image,
468
  cache_examples=False
@@ -470,7 +548,10 @@ with gr.Blocks(title="RetroArt Converter") as demo:
470
 
471
  generate_btn.click(
472
  fn=process_image,
473
- inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale, identity_preservation, image_scale],
 
 
 
474
  outputs=[output_image]
475
  )
476
 
 
6
  StableDiffusionXLControlNetPipeline,
7
  ControlNetModel,
8
  AutoencoderKL,
9
+ DPMSolverMultistepScheduler,
10
+ EulerAncestralDiscreteScheduler
11
  )
12
  from diffusers.models.attention_processor import AttnProcessor2_0
13
  from insightface.app import FaceAnalysis
14
+ from PIL import Image, ImageEnhance, ImageFilter
15
  import numpy as np
16
  import cv2
17
  from transformers import pipeline as transformers_pipeline
 
30
  def __init__(self):
31
  self.device = device
32
  self.dtype = dtype
33
+ self.models_loaded = {
34
+ 'custom_checkpoint': False,
35
+ 'custom_vae': False,
36
+ 'lora': False,
37
+ 'instantid': False
38
+ }
39
 
40
  # Initialize face analysis for InstantID
41
  print("Loading face analysis model...")
 
50
  self.face_detection_enabled = True
51
  except Exception as e:
52
  print(f"⚠️ Face detection not available: {e}")
53
+ print("Continuing without face detection")
54
  self.face_app = None
55
  self.face_detection_enabled = False
56
 
 
71
  ).to(self.device)
72
  print("✓ InstantID ControlNet loaded successfully")
73
  self.instantid_enabled = True
74
+ self.models_loaded['instantid'] = True
75
  except Exception as e:
76
  print(f"⚠️ InstantID ControlNet not available: {e}")
77
+ print("Running without InstantID")
78
  self.controlnet_instantid = None
79
  self.instantid_enabled = False
80
 
 
91
  torch_dtype=self.dtype
92
  ).to(self.device)
93
  print("✓ Custom VAE loaded successfully")
94
+ self.models_loaded['custom_vae'] = True
95
  except Exception as e:
96
+ print(f"⚠️ Could not load custom VAE: {e}")
97
+ print("Using high-quality SDXL VAE instead")
98
  self.vae = AutoencoderKL.from_pretrained(
99
  "madebyollin/sdxl-vae-fp16-fix",
100
  torch_dtype=self.dtype
101
  ).to(self.device)
102
+ self.models_loaded['custom_vae'] = False
103
 
104
  # Load depth estimator for preprocessing
105
  print("Loading depth estimator...")
 
133
  use_safetensors=True
134
  ).to(self.device)
135
  print("✓ Custom checkpoint loaded successfully")
136
+ self.models_loaded['custom_checkpoint'] = True
137
  except Exception as e:
138
+ print(f"⚠️ Could not load custom checkpoint: {e}")
139
+ print("Using default SDXL base model")
140
  self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
141
  "stabilityai/stable-diffusion-xl-base-1.0",
142
  controlnet=controlnets,
 
144
  torch_dtype=self.dtype,
145
  use_safetensors=True
146
  ).to(self.device)
147
+ self.models_loaded['custom_checkpoint'] = False
148
 
149
+ # Load LORA from HuggingFace Hub
150
  print("Loading LORA (retroart) from HuggingFace Hub...")
151
  try:
152
  lora_path = hf_hub_download(
 
156
  )
157
  self.pipe.load_lora_weights(lora_path)
158
  print("✓ LORA loaded successfully")
159
+ self.models_loaded['lora'] = True
160
  except Exception as e:
161
+ print(f"⚠️ Could not load LORA: {e}")
162
  print("Running without LORA")
163
+ self.models_loaded['lora'] = False
164
 
165
+ # Use EulerAncestral scheduler for better quality
166
+ self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
167
  self.pipe.scheduler.config
168
  )
169
 
170
+ # Disable VAE slicing for better quality (use only if you have VRAM issues)
171
+ # self.pipe.enable_vae_slicing()
172
+
173
+ # Enable attention slicing for memory efficiency
174
  self.pipe.unet.set_attn_processor(AttnProcessor2_0())
175
 
176
+ # Try to enable xformers if available
177
  if self.device == "cuda":
178
  try:
179
  self.pipe.enable_xformers_memory_efficient_attention()
 
185
  self.using_multiple_controlnets = isinstance(controlnets, list)
186
  print(f"Pipeline initialized with {'multiple' if self.using_multiple_controlnets else 'single'} ControlNet(s)")
187
 
188
+ print("\n=== MODEL STATUS ===")
189
+ for model, loaded in self.models_loaded.items():
190
+ status = "✓ LOADED" if loaded else "✗ FALLBACK"
191
+ print(f"{model}: {status}")
192
+ print("===================\n")
193
+
194
  print("Model initialization complete!")
195
 
196
+ def enhance_image_quality(self, image):
197
+ """Enhance input image quality before processing"""
198
+ # Sharpen slightly
199
+ enhancer = ImageEnhance.Sharpness(image)
200
+ image = enhancer.enhance(1.2)
201
+
202
+ # Enhance contrast slightly
203
+ enhancer = ImageEnhance.Contrast(image)
204
+ image = enhancer.enhance(1.1)
205
+
206
+ return image
207
+
208
+ def get_depth_map(self, image, enhance=True):
209
+ """Generate depth map from input image with quality improvements"""
210
+ # Enhance image before depth estimation if needed
211
+ if enhance:
212
+ image = self.enhance_image_quality(image)
213
+
214
  depth = self.depth_estimator(image)
215
  depth_image = depth['depth']
216
 
217
  depth_array = np.array(depth_image)
218
+
219
+ # Better normalization with histogram stretching
220
+ depth_min, depth_max = np.percentile(depth_array, [2, 98])
221
+ depth_normalized = np.clip((depth_array - depth_min) / (depth_max - depth_min + 1e-8), 0, 1) * 255
222
  depth_normalized = depth_normalized.astype(np.uint8)
223
+
224
+ # Apply slight gaussian blur to reduce noise
225
+ depth_normalized = cv2.GaussianBlur(depth_normalized, (3, 3), 0)
226
+
227
+ # Convert to 3-channel image
228
  depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB)
229
 
230
  return Image.fromarray(depth_colored)
 
248
  print(f"Face embedding extraction error: {e}")
249
  return None
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  def calculate_target_size(self, original_width, original_height, max_dimension=1024):
252
  """Calculate target size maintaining aspect ratio"""
253
  aspect_ratio = original_width / original_height
 
270
  input_image,
271
  prompt="retro pixel art game, 16-bit style, vibrant colors",
272
  negative_prompt="blurry, low quality, modern, photorealistic, 3d render",
273
+ num_inference_steps=40, # Increased for better quality
274
  guidance_scale=7.5,
275
+ controlnet_conditioning_scale=0.6, # Reduced for less depth influence
276
  lora_scale=0.85,
277
+ identity_preservation=0.8,
278
+ image_scale=0.2,
279
+ enhance_quality=True # New parameter
280
  ):
281
+ """Main generation function with quality improvements"""
282
 
283
  # Resize image maintaining aspect ratio
284
  original_width, original_height = input_image.size
 
286
 
287
  print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}")
288
 
289
+ # Use LANCZOS for high-quality resizing
290
  resized_image = input_image.resize((target_width, target_height), Image.LANCZOS)
291
 
292
+ # Optionally enhance image quality
293
+ if enhance_quality:
294
+ resized_image = self.enhance_image_quality(resized_image)
295
+
296
+ # Generate depth map with quality enhancements
297
  print("Generating depth map...")
298
+ depth_image = self.get_depth_map(resized_image, enhance=enhance_quality)
299
  depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS)
300
 
301
+ # Determine if we're using multiple ControlNets
302
  using_multiple_controlnets = self.using_multiple_controlnets
303
 
304
  # Extract face embeddings if InstantID is enabled
 
321
  prompt = f"portrait, detailed face, facial features, {prompt}"
322
 
323
  # Set LORA scale
324
+ if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
325
  try:
326
  self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
327
+ print(f"LORA scale set to: {lora_scale}")
328
+ except Exception as e:
329
+ print(f"Could not set LORA adapters: {e}")
330
+
331
+ # Enhanced negative prompt for better quality
332
+ enhanced_negative_prompt = f"{negative_prompt}, worst quality, low quality, normal quality, lowres, watermark, signature, text, jpeg artifacts, noise, grainy"
333
 
334
  # Prepare pipeline kwargs
335
  pipe_kwargs = {
336
  "prompt": prompt,
337
+ "negative_prompt": enhanced_negative_prompt,
338
  "num_inference_steps": num_inference_steps,
339
  "guidance_scale": guidance_scale,
340
  "width": target_width,
 
344
 
345
  # Add control images and scales based on ControlNet configuration
346
  if using_multiple_controlnets and has_detected_faces:
 
347
  print("Using multiple ControlNets (Depth + InstantID)")
348
  control_images = [depth_image, resized_image]
349
  conditioning_scales = [controlnet_conditioning_scale, image_scale]
 
356
  pipe_kwargs["cross_attention_kwargs"] = {"ip_adapter_image_embeds": [face_embeddings]}
357
 
358
  elif using_multiple_controlnets and not has_detected_faces:
 
 
359
  print("Multiple ControlNets available but no faces detected, using depth only")
360
+ control_images = [depth_image, depth_image]
361
+ conditioning_scales = [controlnet_conditioning_scale, 0.0]
362
 
363
  pipe_kwargs["image"] = control_images
364
  pipe_kwargs["controlnet_conditioning_scale"] = conditioning_scales
365
 
366
  else:
 
367
  print("Using single ControlNet (Depth only)")
368
  pipe_kwargs["image"] = depth_image
369
  pipe_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
370
 
371
  # Generate image
372
  print("Generating retro art...")
373
+ print(f"Steps: {num_inference_steps}, Guidance: {guidance_scale}")
374
  result = self.pipe(**pipe_kwargs)
375
 
376
  return result.images[0]
 
389
  guidance_scale,
390
  controlnet_scale,
391
  lora_scale,
392
+ identity_preservation,
393
+ image_scale,
394
+ enhance_quality
395
  ):
396
  if image is None:
397
  return None
 
405
  guidance_scale=guidance_scale,
406
  controlnet_conditioning_scale=controlnet_scale,
407
  lora_scale=lora_scale,
408
+ identity_preservation=identity_preservation,
409
+ image_scale=image_scale,
410
+ enhance_quality=enhance_quality
411
  )
412
  return result
413
  except Exception as e:
 
417
  raise gr.Error(f"Generation failed: {str(e)}")
418
 
419
  # Create Gradio interface
420
+ with gr.Blocks(title="RetroArt Converter", theme=gr.themes.Soft()) as demo:
421
  gr.Markdown("""
422
+ # 🎮 RetroArt Converter - Quality Enhanced
423
 
424
+ Convert any image into retro game art style with improved quality!
425
 
426
  **Features:**
427
+ - High-quality depth estimation and preprocessing
428
+ - Enhanced prompts for better results
429
+ - Custom SDXL checkpoint (Horizon)
430
  - Pixelate VAE for authentic retro look
431
  - RetroArt LORA for style enhancement
432
+ - Face preservation with InstantID
 
433
  """)
434
 
435
+ # Model status display
436
+ if converter.models_loaded:
437
+ status_text = "**Loaded Models:**\n"
438
+ status_text += f"- Custom Checkpoint: {'✓' if converter.models_loaded['custom_checkpoint'] else '✗ (using SDXL base)'}\n"
439
+ status_text += f"- Custom VAE: {'✓' if converter.models_loaded['custom_vae'] else '✗ (using default VAE)'}\n"
440
+ status_text += f"- LORA: {'✓' if converter.models_loaded['lora'] else '✗ (disabled)'}\n"
441
+ status_text += f"- InstantID: {'✓' if converter.models_loaded['instantid'] else '✗ (disabled)'}\n"
442
+ gr.Markdown(status_text)
443
+
444
  with gr.Row():
445
  with gr.Column():
446
  input_image = gr.Image(label="Input Image", type="pil")
447
 
448
  prompt = gr.Textbox(
449
  label="Prompt",
450
+ value="masterpiece, best quality, retro pixel art game, 16-bit style, vibrant colors, highly detailed",
451
  lines=3
452
  )
453
 
454
  negative_prompt = gr.Textbox(
455
  label="Negative Prompt",
456
+ value="blurry, low quality, modern, photorealistic, 3d render, ugly, distorted, deformed",
457
  lines=2
458
  )
459
 
460
+ enhance_quality = gr.Checkbox(
461
+ label="Enable Quality Enhancement",
462
+ value=True,
463
+ info="Sharpen and enhance input image before processing"
464
+ )
465
+
466
+ with gr.Accordion("Quality Settings", open=True):
467
  steps = gr.Slider(
468
  minimum=20,
469
+ maximum=70,
470
+ value=40,
471
+ step=5,
472
+ label="Inference Steps (more = better quality but slower)"
473
  )
474
 
475
  guidance_scale = gr.Slider(
476
+ minimum=3,
477
  maximum=15,
478
  value=7.5,
479
  step=0.5,
480
+ label="Guidance Scale (how closely to follow prompt)"
481
  )
482
 
483
  controlnet_scale = gr.Slider(
484
  minimum=0,
485
+ maximum=1.5,
486
+ value=0.6,
487
+ step=0.05,
488
+ label="ControlNet Depth Scale (lower = more creative)"
489
  )
490
 
491
  lora_scale = gr.Slider(
 
495
  step=0.05,
496
  label="RetroArt LORA Scale"
497
  )
498
+
499
+ with gr.Accordion("Identity Settings (for portraits)", open=False):
500
  identity_preservation = gr.Slider(
501
  minimum=0,
502
  maximum=1.5,
503
  value=0.8,
504
  step=0.1,
505
+ label="Identity Preservation"
506
  )
507
 
508
  image_scale = gr.Slider(
 
513
  label="InstantID Image Scale"
514
  )
515
 
516
+ generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary", size="lg")
517
 
518
  with gr.Column():
519
  output_image = gr.Image(label="Retro Art Output")
520
+
521
+ gr.Markdown("""
522
+ ### Tips for Best Quality:
523
+ 1. **Use high-resolution input images** (at least 512x512)
524
+ 2. **Increase inference steps** to 50-60 for maximum quality
525
+ 3. **Lower ControlNet scale** (0.5-0.6) for more stylization
526
+ 4. **Adjust guidance scale:** 7-9 for balanced results
527
+ 5. **Enable quality enhancement** for sharper inputs
528
+ 6. Try different prompts with quality keywords: "masterpiece, best quality, highly detailed"
529
+ """)
530
 
531
  gr.Examples(
532
  examples=[
533
+ [
534
+ "example_portrait.jpg",
535
+ "masterpiece, best quality, retro pixel art portrait, 16-bit game character, vibrant colors",
536
+ "blurry, modern, low quality",
537
+ 40, 7.5, 0.6, 0.85, 0.8, 0.2, True
538
+ ],
539
+ ],
540
+ inputs=[
541
+ input_image, prompt, negative_prompt, steps, guidance_scale,
542
+ controlnet_scale, lora_scale, identity_preservation, image_scale, enhance_quality
543
  ],
 
544
  outputs=[output_image],
545
  fn=process_image,
546
  cache_examples=False
 
548
 
549
  generate_btn.click(
550
  fn=process_image,
551
+ inputs=[
552
+ input_image, prompt, negative_prompt, steps, guidance_scale,
553
+ controlnet_scale, lora_scale, identity_preservation, image_scale, enhance_quality
554
+ ],
555
  outputs=[output_image]
556
  )
557