FLUX.1-Kontext-Dev

Running on Zero

App Files Files Community

cbensimon HF Staff commited on Jul 16

Commit

b63cd34

1 Parent(s): 2e80751

capture_component_call

Browse files

Files changed (4) hide show

app.py +6 -1
optimization.py +20 -29
pipeline_utils.py +40 -0
zerogpu.py +2 -2

app.py CHANGED Viewed

@@ -18,7 +18,12 @@ from optimization import optimize_pipeline_
 MAX_SEED = np.iinfo(np.int32).max
 pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
-optimize_pipeline_(pipe)
 @spaces.GPU
 def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28, progress=gr.Progress(track_tqdm=True)):

 MAX_SEED = np.iinfo(np.int32).max
 pipe = FluxKontextPipeline.from_pretrained("black-forest-labs/FLUX.1-Kontext-dev", torch_dtype=torch.bfloat16).to("cuda")
+optimize_pipeline_(pipe,
+    image=Image.new('RGB', (512, 512)),
+    prompt='prompt',
+    guidance_scale=2.5,
+)
 @spaces.GPU
 def infer(input_image, prompt, seed=42, randomize_seed=False, guidance_scale=2.5, steps=28, progress=gr.Progress(track_tqdm=True)):

optimization.py CHANGED Viewed

@@ -1,49 +1,40 @@
 """
 """
 import spaces
 import torch
-from diffusers.pipelines.flux.pipeline_flux import FluxPipeline
 from zerogpu import aoti_compile
-def _example_tensor(*shape):
-    return torch.randn(*shape, device='cuda', dtype=torch.bfloat16)
-def optimize_pipeline_(pipeline: FluxPipeline):
-    is_timestep_distilled = not pipeline.transformer.config.guidance_embeds
-    seq_length = 256 if is_timestep_distilled else 512
-    transformer_kwargs = {
-        'hidden_states': _example_tensor(1, 4096, 64),
-        'timestep': torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
-        'guidance': None if is_timestep_distilled else torch.tensor([1.], device='cuda', dtype=torch.bfloat16),
-        'pooled_projections': _example_tensor(1, 768),
-        'encoder_hidden_states': _example_tensor(1, seq_length, 4096),
-        'txt_ids': _example_tensor(seq_length, 3),
-        'img_ids': _example_tensor(4096, 3),
-        'joint_attention_kwargs': {},
-        'return_dict': False,
-    }
-    inductor_configs = {
-        'conv_1x1_as_mm': True,
-        'epilogue_fusion': False,
-        'coordinate_descent_tuning': True,
-        'coordinate_descent_check_all_directions': True,
-        'max_autotune': True,
-        'triton.cudagraphs': True,
-    }
     @spaces.GPU(duration=1500)
     def compile_transformer():
         pipeline.transformer.fuse_qkv_projections()
-        exported = torch.export.export(pipeline.transformer, args=(), kwargs=transformer_kwargs)
-        return aoti_compile(exported, inductor_configs)
     transformer_config = pipeline.transformer.config
     pipeline.transformer = compile_transformer()
-    pipeline.transformer.config = transformer_config

 """
 """
+from typing import Any
+from typing import Callable
+from typing import ParamSpec
 import spaces
 import torch
+from pipeline_utils import capture_component_call
 from zerogpu import aoti_compile
+P = ParamSpec('P')
+INDUCTOR_CONFIGS = {
+    'conv_1x1_as_mm': True,
+    'epilogue_fusion': False,
+    'coordinate_descent_tuning': True,
+    'coordinate_descent_check_all_directions': True,
+    'max_autotune': True,
+    'triton.cudagraphs': True,
+}
+def optimize_pipeline_(pipeline: Callable[P, Any], *args: P.args, **kwargs: P.kwargs):
     @spaces.GPU(duration=1500)
     def compile_transformer():
+        with capture_component_call(pipeline, 'transformer') as call:
+            pipeline(*args, **kwargs)
         pipeline.transformer.fuse_qkv_projections()
+        exported = torch.export.export(pipeline.transformer, args=call.args, kwargs=call.kwargs)
+        return aoti_compile(exported, INDUCTOR_CONFIGS)
     transformer_config = pipeline.transformer.config
     pipeline.transformer = compile_transformer()
+    pipeline.transformer.config = transformer_config # pyright: ignore[reportAttributeAccessIssue]

pipeline_utils.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""
+"""
+import contextlib
+from unittest.mock import patch
+from typing import Any
+class CapturedCallException(Exception):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.args = args
+        self.kwargs = kwargs
+class CapturedCall:
+    def __init__(self):
+        self.args: tuple[Any, ...] = ()
+        self.kwargs: dict[str, Any] = {}
+@contextlib.contextmanager
+def capture_component_call(
+    pipeline: Any,
+    component_name: str,
+    component_method='forward',
+):
+    component = getattr(pipeline, component_name)
+    captured_call = CapturedCall()
+    def capture_call(*args, **kwargs):
+        raise CapturedCallException(*args, **kwargs)
+    with patch.object(component, component_method, new=capture_call):
+        try:
+            yield captured_call
+        except CapturedCallException as e:
+            captured_call.args = e.args
+            captured_call.kwargs = e.kwargs

zerogpu.py CHANGED Viewed

@@ -51,12 +51,12 @@ def aoti_compile(
     inductor_configs: dict[str, Any] | None = None,
 ):
     inductor_configs = (inductor_configs or {}) | INDUCTOR_CONFIGS_OVERRIDES
-    gm = exported_program.module()
     assert exported_program.example_inputs is not None
     args, kwargs = exported_program.example_inputs
     artifacts = torch._inductor.aot_compile(gm, args, kwargs, options=inductor_configs)
     archive_file = BytesIO()
-    files = [file for file in artifacts if isinstance(file, str)]
     package_aoti(archive_file, files)
     weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
     return ZeroGPUCompiledModel(archive_file, weights)

     inductor_configs: dict[str, Any] | None = None,
 ):
     inductor_configs = (inductor_configs or {}) | INDUCTOR_CONFIGS_OVERRIDES
+    gm = cast(torch.fx.GraphModule, exported_program.module())
     assert exported_program.example_inputs is not None
     args, kwargs = exported_program.example_inputs
     artifacts = torch._inductor.aot_compile(gm, args, kwargs, options=inductor_configs)
     archive_file = BytesIO()
+    files: list[str | Weights] = [file for file in artifacts if isinstance(file, str)]
     package_aoti(archive_file, files)
     weights, = (artifact for artifact in artifacts if isinstance(artifact, Weights))
     return ZeroGPUCompiledModel(archive_file, weights)