Inference

from diffusers import AutoencoderKL, AuraFlowPipeline, AuraFlowTransformer2DModel
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
from flashpack import FlashPackMixin
from flashpack.integrations.diffusers import FlashPackDiffusionPipeline
from flashpack.integrations.diffusers.model import FlashPackDiffusersModelMixin
from flashpack.integrations.transformers import FlashPackTransformersModelMixin
from transformers import UMT5EncoderModel, T5Tokenizer
import torch

class TransformerModel(AuraFlowTransformer2DModel, FlashPackDiffusersModelMixin):
    pass

class TextEncoder(UMT5EncoderModel, FlashPackTransformersModelMixin):
    flashpack_ignore_names = ['encoder.embed_tokens.weight']

class AuraFlashpackPipeline(AuraFlowPipeline, FlashPackDiffusionPipeline):
    def __init__(self, tokenizer: T5Tokenizer, text_encoder: TextEncoder, vae: AutoencoderKL, transformer: TransformerModel, scheduler: FlowMatchEulerDiscreteScheduler):
        super().__init__(tokenizer, text_encoder, vae, transformer, scheduler)

if __name__ == '__main__':
    model_path = '/path/to/pony-v7-flashpack'
    text_encoder = TextEncoder.from_pretrained_flashpack(model_path, subfolder='text_encoder', torch_dtype=torch.bfloat16)
    transformer = TransformerModel.from_pretrained_flashpack(model_path, subfolder='transformer', torch_dtype=torch.bfloat16)
    pipeline = AuraFlashpackPipeline.from_pretrained_flashpack(
        model_path,
        text_encoder=text_encoder,
        transformer=transformer,
        torch_dtype=torch.bfloat16
    )
    pipeline.enable_model_cpu_offload()
    with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
        image = pipeline(
            'style_cluster_258, A peaceful woodland scene with a towering, gnarled oak tree in the center, its sprawling limbs reaching skyward. The tree is encircled by dense ferns and wildflowers, with a shallow creek in the foreground mirroring the lush surroundings. A young woman in a flowing dress stands on a mossy boulder to the right, admiring the ancient tree. The afternoon light filters through the leaves, creating golden patterns across the forest floor.',
            num_inference_steps=40,
            generator=torch.Generator().manual_seed(0)
        ).images[0]

In float16, higher precision, slow inference, it requires at least 16GB VRAM.

from diffusers import AutoencoderKL, AuraFlowPipeline, AuraFlowTransformer2DModel
from diffusers.schedulers import FlowMatchEulerDiscreteScheduler
from flashpack import FlashPackMixin
from flashpack.integrations.diffusers import FlashPackDiffusionPipeline
from flashpack.integrations.diffusers.model import FlashPackDiffusersModelMixin
from flashpack.integrations.transformers import FlashPackTransformersModelMixin
from transformers import UMT5EncoderModel, T5Tokenizer
import torch

class TransformerModel(AuraFlowTransformer2DModel, FlashPackDiffusersModelMixin):
    pass

class TextEncoder(UMT5EncoderModel, FlashPackTransformersModelMixin):
    flashpack_ignore_names = ['encoder.embed_tokens.weight']

class AuraFlashpackPipeline(AuraFlowPipeline, FlashPackDiffusionPipeline):
    def __init__(self, tokenizer: T5Tokenizer, text_encoder: TextEncoder, vae: AutoencoderKL, transformer: TransformerModel, scheduler: FlowMatchEulerDiscreteScheduler):
        super().__init__(tokenizer, text_encoder, vae, transformer, scheduler)

if __name__ == '__main__':
    model_path = '/path/to/pony-v7-flashpack'
    text_encoder = TextEncoder.from_pretrained_flashpack(model_path, subfolder='text_encoder', torch_dtype=torch.float16)
    transformer = TransformerModel.from_pretrained_flashpack(model_path, subfolder='transformer', torch_dtype=torch.float16)
    pipeline = AuraFlashpackPipeline.from_pretrained_flashpack(
        model_path,
        text_encoder=text_encoder,
        transformer=transformer
    )
    pipeline.enable_model_cpu_offload()
    with torch.autocast(device_type='cuda', dtype=torch.float32):
        image = pipeline(
            'style_cluster_258, A peaceful woodland scene with a towering, gnarled oak tree in the center, its sprawling limbs reaching skyward. The tree is encircled by dense ferns and wildflowers, with a shallow creek in the foreground mirroring the lush surroundings. A young woman in a flowing dress stands on a mossy boulder to the right, admiring the ancient tree. The afternoon light filters through the leaves, creating golden patterns across the forest floor.',
            num_inference_steps=40,
            generator=torch.Generator().manual_seed(0)
        ).images[0]
    image.save('preview.png')

References

License

You can use this model and its outputs commercially unless you provide an inference service or application, have a company with over 1M revenue or use in professional video production. This limitations do not apply if you use first party commercial APIs.

Downloads last month
62
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support