Spaces:
Running
Running
| import torch | |
| from diffusers import FluxPipeline | |
| # Optimized for Apple Silicon (MPS) - 16 CPU cores, MPS available | |
| # Memory-efficient configuration for Apple Silicon | |
| # Load pipeline with bfloat16 for better MPS performance | |
| pipe = FluxPipeline.from_pretrained( | |
| "black-forest-labs/FLUX.1-schnell", | |
| torch_dtype=torch.bfloat16, | |
| use_safetensors=True | |
| ) | |
| # Move to MPS device for GPU acceleration on Apple Silicon | |
| pipe.to("mps") | |
| # Apple Silicon optimizations | |
| pipe.enable_attention_slicing() # Reduce memory usage | |
| pipe.enable_vae_slicing() # VAE memory optimization | |
| # Optional: Enable model CPU offload if memory is tight | |
| # pipe.enable_model_cpu_offload() | |
| # For Apple Silicon, compile the UNet for speed (if supported) | |
| try: | |
| pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) | |
| except Exception: | |
| print("Torch compile not supported, proceeding without compilation") | |
| prompt = "A cat holding a sign that says hello world" | |
| # Generate image with optimized settings for Apple Silicon | |
| with torch.inference_mode(): | |
| out = pipe( | |
| prompt=prompt, | |
| guidance_scale=0.0, # FLUX.1-schnell works best with guidance_scale=0 | |
| height=768, | |
| width=1360, | |
| num_inference_steps=4, # FLUX.1-schnell is optimized for 4 steps | |
| max_sequence_length=256, # Reduced for memory efficiency | |
| generator=torch.Generator(device="mps").manual_seed(42) # Reproducible results | |
| ).images[0] | |
| # Save the generated image | |
| out.save("image.png") | |
| print("Image generated and saved as 'image.png'") | |
| print("Optimizations applied: MPS device, bfloat16 precision, attention slicing, VAE slicing") |