Spaces:
Runtime error
Runtime error
| import spaces # MUST be first, before any CUDA-related imports | |
| import gradio as gr | |
| import torch | |
| from diffusers import ( | |
| StableDiffusionXLPipeline, | |
| StableDiffusionXLControlNetPipeline, | |
| ControlNetModel, | |
| AutoencoderKL, | |
| DPMSolverMultistepScheduler | |
| ) | |
| from diffusers.models.attention_processor import AttnProcessor2_0 | |
| from insightface.app import FaceAnalysis | |
| from PIL import Image | |
| import numpy as np | |
| import cv2 | |
| from transformers import pipeline as transformers_pipeline | |
| from huggingface_hub import hf_hub_download | |
| import os | |
| # Configuration | |
| MODEL_REPO = "primerz/pixagram" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| dtype = torch.float16 if device == "cuda" else torch.float32 | |
| print(f"Using device: {device}") | |
| print(f"Loading models from: {MODEL_REPO}") | |
| class RetroArtConverter: | |
| def __init__(self): | |
| self.device = device | |
| self.dtype = dtype | |
| # Initialize face analysis for InstantID (optional) | |
| print("Loading face analysis model...") | |
| try: | |
| self.face_app = FaceAnalysis( | |
| name='antelopev2', | |
| root='./models/insightface', | |
| providers=['CUDAExecutionProvider', 'CPUExecutionProvider'] | |
| ) | |
| self.face_app.prepare(ctx_id=0, det_size=(640, 640)) | |
| print("✓ Face analysis model loaded successfully") | |
| self.face_detection_enabled = True | |
| except Exception as e: | |
| print(f"⚠️ Face detection not available: {e}") | |
| print("Continuing without face detection (will still work fine)") | |
| self.face_app = None | |
| self.face_detection_enabled = False | |
| # Load ControlNet for depth | |
| print("Loading ControlNet depth model...") | |
| self.controlnet_depth = ControlNetModel.from_pretrained( | |
| "diffusers/controlnet-zoe-depth-sdxl-1.0", | |
| torch_dtype=self.dtype | |
| ).to(self.device) | |
| # Load custom VAE from HuggingFace Hub | |
| print("Loading custom VAE (pixelate) from HuggingFace Hub...") | |
| try: | |
| vae_path = hf_hub_download( | |
| repo_id=MODEL_REPO, | |
| filename="pixelate.safetensors", | |
| repo_type="model" | |
| ) | |
| self.vae = AutoencoderKL.from_single_file( | |
| vae_path, | |
| torch_dtype=self.dtype | |
| ).to(self.device) | |
| print("✓ Custom VAE loaded successfully") | |
| except Exception as e: | |
| print(f"Warning: Could not load custom VAE: {e}") | |
| print("Using default SDXL VAE") | |
| self.vae = AutoencoderKL.from_pretrained( | |
| "madebyollin/sdxl-vae-fp16-fix", | |
| torch_dtype=self.dtype | |
| ).to(self.device) | |
| # Load depth estimator for preprocessing | |
| print("Loading depth estimator...") | |
| self.depth_estimator = transformers_pipeline( | |
| 'depth-estimation', | |
| model="Intel/dpt-hybrid-midas", | |
| device=self.device if self.device == "cuda" else -1 | |
| ) | |
| # Load SDXL checkpoint from HuggingFace Hub | |
| print("Loading SDXL checkpoint (horizon) from HuggingFace Hub...") | |
| try: | |
| model_path = hf_hub_download( | |
| repo_id=MODEL_REPO, | |
| filename="horizon.safetensors", | |
| repo_type="model" | |
| ) | |
| self.pipe = StableDiffusionXLControlNetPipeline.from_single_file( | |
| model_path, | |
| controlnet=self.controlnet_depth, | |
| vae=self.vae, | |
| torch_dtype=self.dtype, | |
| use_safetensors=True | |
| ).to(self.device) | |
| print("✓ Custom checkpoint loaded successfully") | |
| except Exception as e: | |
| print(f"Warning: Could not load custom checkpoint: {e}") | |
| print("Using default SDXL") | |
| self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained( | |
| "stabilityai/stable-diffusion-xl-base-1.0", | |
| controlnet=self.controlnet_depth, | |
| vae=self.vae, | |
| torch_dtype=self.dtype, | |
| use_safetensors=True | |
| ).to(self.device) | |
| # Load LORA from HuggingFace Hub (requires PEFT) | |
| print("Loading LORA (retroart) from HuggingFace Hub...") | |
| try: | |
| lora_path = hf_hub_download( | |
| repo_id=MODEL_REPO, | |
| filename="retroart.safetensors", | |
| repo_type="model" | |
| ) | |
| self.pipe.load_lora_weights(lora_path) | |
| print("✓ LORA loaded successfully") | |
| except Exception as e: | |
| print(f"Warning: Could not load LORA: {e}") | |
| print("Running without LORA") | |
| # Optimize pipeline | |
| self.pipe.scheduler = DPMSolverMultistepScheduler.from_config( | |
| self.pipe.scheduler.config | |
| ) | |
| # For ZeroGPU, don't use model_cpu_offload | |
| # self.pipe.enable_model_cpu_offload() | |
| self.pipe.enable_vae_slicing() | |
| # Enable attention slicing for memory efficiency | |
| self.pipe.unet.set_attn_processor(AttnProcessor2_0()) | |
| # Try to enable xformers if available (only works on GPU) | |
| if self.device == "cuda": | |
| try: | |
| self.pipe.enable_xformers_memory_efficient_attention() | |
| print("✓ xformers enabled") | |
| except Exception as e: | |
| print(f"⚠️ xformers not available: {e}") | |
| print("Model initialization complete!") | |
| def get_depth_map(self, image): | |
| """Generate depth map from input image""" | |
| depth = self.depth_estimator(image) | |
| depth_image = depth['depth'] | |
| # Convert to numpy array | |
| depth_array = np.array(depth_image) | |
| # Normalize to 0-255 | |
| depth_normalized = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min()) * 255 | |
| depth_normalized = depth_normalized.astype(np.uint8) | |
| # Convert to 3-channel image | |
| depth_colored = cv2.cvtColor(depth_normalized, cv2.COLOR_GRAY2RGB) | |
| return Image.fromarray(depth_colored) | |
| def detect_faces(self, image): | |
| """Detect faces in the image using antelopev2""" | |
| if not self.face_detection_enabled or self.face_app is None: | |
| return [] | |
| try: | |
| img_array = np.array(image) | |
| faces = self.face_app.get(img_array) | |
| return faces | |
| except Exception as e: | |
| print(f"Face detection error: {e}") | |
| return [] | |
| def calculate_target_size(self, original_width, original_height, max_dimension=1024): | |
| """Calculate target size maintaining aspect ratio""" | |
| aspect_ratio = original_width / original_height | |
| if original_width > original_height: | |
| new_width = min(original_width, max_dimension) | |
| new_height = int(new_width / aspect_ratio) | |
| else: | |
| new_height = min(original_height, max_dimension) | |
| new_width = int(new_height * aspect_ratio) | |
| # Round to nearest multiple of 8 (required for diffusion models) | |
| new_width = (new_width // 8) * 8 | |
| new_height = (new_height // 8) * 8 | |
| return new_width, new_height | |
| def generate_retro_art( | |
| self, | |
| input_image, | |
| prompt="retro pixel art game, 16-bit style, vibrant colors", | |
| negative_prompt="blurry, low quality, modern, photorealistic, 3d render", | |
| num_inference_steps=30, | |
| guidance_scale=7.5, | |
| controlnet_conditioning_scale=0.8, | |
| lora_scale=0.85 | |
| ): | |
| """Main generation function""" | |
| # Resize image maintaining aspect ratio | |
| original_width, original_height = input_image.size | |
| target_width, target_height = self.calculate_target_size(original_width, original_height) | |
| print(f"Resizing from {original_width}x{original_height} to {target_width}x{target_height}") | |
| resized_image = input_image.resize((target_width, target_height), Image.LANCZOS) | |
| # Detect faces | |
| faces = self.detect_faces(resized_image) | |
| has_faces = len(faces) > 0 | |
| if has_faces: | |
| print(f"Detected {len(faces)} face(s)") | |
| # Enhance prompt for face preservation | |
| prompt = f"portrait, detailed face, {prompt}" | |
| # Generate depth map | |
| print("Generating depth map...") | |
| depth_image = self.get_depth_map(resized_image) | |
| depth_image = depth_image.resize((target_width, target_height), Image.LANCZOS) | |
| # Set LORA scale | |
| self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale]) | |
| # Generate image | |
| print("Generating retro art...") | |
| result = self.pipe( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| image=depth_image, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| controlnet_conditioning_scale=controlnet_conditioning_scale, | |
| width=target_width, | |
| height=target_height, | |
| generator=torch.Generator(device=self.device).manual_seed(42) | |
| ) | |
| return result.images[0] | |
| # Initialize the converter | |
| print("Initializing RetroArt Converter...") | |
| converter = RetroArtConverter() | |
| # Gradio interface with ZeroGPU support | |
| def process_image( | |
| image, | |
| prompt, | |
| negative_prompt, | |
| steps, | |
| guidance_scale, | |
| controlnet_scale, | |
| lora_scale | |
| ): | |
| if image is None: | |
| return None | |
| try: | |
| result = converter.generate_retro_art( | |
| input_image=image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| num_inference_steps=int(steps), | |
| guidance_scale=guidance_scale, | |
| controlnet_conditioning_scale=controlnet_scale, | |
| lora_scale=lora_scale | |
| ) | |
| return result | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| raise gr.Error(f"Generation failed: {str(e)}") | |
| # Create Gradio interface | |
| with gr.Blocks(title="RetroArt Converter") as demo: | |
| gr.Markdown(""" | |
| # 🎮 RetroArt Converter | |
| Convert any image into retro game art style! | |
| **Features:** | |
| - Custom SDXL checkpoint (Horizon) | |
| - Pixelate VAE for authentic retro look | |
| - RetroArt LORA for style enhancement | |
| - Face preservation with InstantID | |
| - Depth-aware generation with ControlNet | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_image = gr.Image(label="Input Image", type="pil") | |
| prompt = gr.Textbox( | |
| label="Prompt", | |
| value="retro pixel art game, 16-bit style, vibrant colors, detailed", | |
| lines=3 | |
| ) | |
| negative_prompt = gr.Textbox( | |
| label="Negative Prompt", | |
| value="blurry, low quality, modern, photorealistic, 3d render, ugly, distorted", | |
| lines=2 | |
| ) | |
| with gr.Accordion("Advanced Settings", open=False): | |
| steps = gr.Slider( | |
| minimum=20, | |
| maximum=50, | |
| value=30, | |
| step=1, | |
| label="Inference Steps" | |
| ) | |
| guidance_scale = gr.Slider( | |
| minimum=1, | |
| maximum=15, | |
| value=7.5, | |
| step=0.5, | |
| label="Guidance Scale" | |
| ) | |
| controlnet_scale = gr.Slider( | |
| minimum=0, | |
| maximum=2, | |
| value=0.8, | |
| step=0.1, | |
| label="ControlNet Depth Scale" | |
| ) | |
| lora_scale = gr.Slider( | |
| minimum=0, | |
| maximum=2, | |
| value=0.85, | |
| step=0.05, | |
| label="RetroArt LORA Scale" | |
| ) | |
| generate_btn = gr.Button("🎨 Generate Retro Art", variant="primary") | |
| with gr.Column(): | |
| output_image = gr.Image(label="Retro Art Output") | |
| gr.Examples( | |
| examples=[ | |
| ["example_portrait.jpg", "retro pixel art portrait, 16-bit game character", "blurry, modern", 30, 7.5, 0.8, 0.85], | |
| ], | |
| inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale], | |
| outputs=[output_image], | |
| fn=process_image, | |
| cache_examples=False | |
| ) | |
| generate_btn.click( | |
| fn=process_image, | |
| inputs=[input_image, prompt, negative_prompt, steps, guidance_scale, controlnet_scale, lora_scale], | |
| outputs=[output_image] | |
| ) | |
| # Launch with API enabled | |
| if __name__ == "__main__": | |
| demo.queue(max_size=20) | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_api=True # Enable API | |
| ) |