Spaces:

chansung
/

auto-diffuser-config

Running

App Files Files Community

auto-diffuser-config / sample_optimized_apple_silicon.py

chansung

Upload folder using huggingface_hub

80a1334 verified 6 months ago

raw

history blame contribute delete

1.67 kB

	import torch
	from diffusers import FluxPipeline

	# Optimized for Apple Silicon (MPS) - 16 CPU cores, MPS available
	# Memory-efficient configuration for Apple Silicon

	# Load pipeline with bfloat16 for better MPS performance
	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-schnell",
	torch_dtype=torch.bfloat16,
	use_safetensors=True
	)

	# Move to MPS device for GPU acceleration on Apple Silicon
	pipe.to("mps")

	# Apple Silicon optimizations
	pipe.enable_attention_slicing() # Reduce memory usage
	pipe.enable_vae_slicing() # VAE memory optimization

	# Optional: Enable model CPU offload if memory is tight
	# pipe.enable_model_cpu_offload()

	# For Apple Silicon, compile the UNet for speed (if supported)
	try:
	pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
	except Exception:
	print("Torch compile not supported, proceeding without compilation")

	prompt = "A cat holding a sign that says hello world"

	# Generate image with optimized settings for Apple Silicon
	with torch.inference_mode():
	out = pipe(
	prompt=prompt,
	guidance_scale=0.0, # FLUX.1-schnell works best with guidance_scale=0
	height=768,
	width=1360,
	num_inference_steps=4, # FLUX.1-schnell is optimized for 4 steps
	max_sequence_length=256, # Reduced for memory efficiency
	generator=torch.Generator(device="mps").manual_seed(42) # Reproducible results
	).images[0]

	# Save the generated image
	out.save("image.png")

	print("Image generated and saved as 'image.png'")
	print("Optimizations applied: MPS device, bfloat16 precision, attention slicing, VAE slicing")