flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

d2c614b

1 Parent(s): 12f1d71

simplified pipeline

Browse files

Files changed (1) hide show

app.py +36 -69

app.py CHANGED Viewed

@@ -1,108 +1,75 @@
 import spaces
 import gradio as gr
-import numpy as np
-import random
 import torch
-import torch.nn as nn
 from PIL import Image
-from torchvision import transforms
 from diffusers import DiffusionPipeline
 # Constants
-dtype = torch.bfloat16
-device = "cuda" if torch.cuda.is_available() else "cpu"
-MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
-LATENT_CHANNELS = 16
-TRANSFORMER_IN_CHANNELS = 64
-SCALING_FACTOR = 0.3611
 # Load FLUX model
-pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
 pipe.enable_model_cpu_offload()
-pipe.vae.enable_slicing()
-pipe.vae.enable_tiling()
-# Add a projection layer to match transformer input
-projection = nn.Linear(LATENT_CHANNELS, TRANSFORMER_IN_CHANNELS).to(device).to(dtype)
-def preprocess_image(image, image_size):
-    preprocess = transforms.Compose([
-        transforms.Resize((image_size, image_size), interpolation=transforms.InterpolationMode.LANCZOS),
-        transforms.ToTensor(),
-        transforms.Normalize([0.5], [0.5])
-    ])
-    image = preprocess(image).unsqueeze(0).to(device, dtype=dtype)
-    return image
-def process_latents(latents, height, width):
-    print(f"Input latent shape: {latents.shape}")
-    # Ensure latents are the correct shape
-    if latents.shape[2:] != (height // 8, width // 8):
-        latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear')
-    print(f"Latent shape after potential interpolation: {latents.shape}")
-    # Reshape latents to [batch_size, seq_len, channels]
-    latents = latents.permute(0, 2, 3, 1).reshape(1, -1, LATENT_CHANNELS)
-    print(f"Reshaped latent shape: {latents.shape}")
-    # Project latents from 16 to 64 dimensions
-    latents = projection(latents)
-    print(f"Projected latent shape: {latents.shape}")
-    return latents
 @spaces.GPU()
-def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
     try:
         if init_image is None:
             # text2img case
             image = pipe(
                 prompt=prompt,
                 height=height,
                 width=width,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
-                guidance_scale=0.0
             ).images[0]
         else:
             # img2img case
-            init_image = init_image.convert("RGB")
-            init_image = preprocess_image(init_image, 1024)  # Using 1024 as FLUX VAE sample size
-            # Encode the image using FLUX VAE
-            latents = pipe.vae.encode(init_image).latent_dist.sample() * SCALING_FACTOR
-            print(f"Initial latent shape from VAE: {latents.shape}")
-            # Process latents to match transformer input
-            latents = process_latents(latents, height, width)
-            print(f"x_embedder weight shape: {pipe.transformer.x_embedder.weight.shape}")
-            print(f"First transformer block input shape: {pipe.transformer.transformer_blocks[0].attn.to_q.weight.shape}")
             image = pipe(
                 prompt=prompt,
-                height=height,
-                width=width,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
-                guidance_scale=0.0,
-                latents=latents
             ).images[0]
         return image, seed
     except Exception as e:
-        print(f"Error during inference: {e}")
         import traceback
         traceback.print_exc()
-        return Image.new("RGB", (width, height), (255, 0, 0)), seed  # Red fallback image
-# Gradio interface setup
 with gr.Blocks() as demo:
     with gr.Row():
         prompt = gr.Textbox(label="Prompt")
@@ -116,15 +83,15 @@ with gr.Blocks() as demo:
         seed_output = gr.Number(label="Seed")
     with gr.Accordion("Advanced Settings", open=False):
-        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
-        randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
         width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
         height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
         num_inference_steps = gr.Slider(label="Number of inference steps", minimum=1, maximum=50, step=1, value=4)
     generate.click(
         infer,
-        inputs=[prompt, init_image, seed, randomize_seed, width, height, num_inference_steps],
         outputs=[result, seed_output]
     )

 import spaces
 import gradio as gr
 import torch
 from PIL import Image
 from diffusers import DiffusionPipeline
 # Constants
+MAX_SEED = 2**32 - 1
 MAX_IMAGE_SIZE = 2048
 # Load FLUX model
+pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.float16)
+pipe.to("cuda")
 pipe.enable_model_cpu_offload()
+pipe.enable_vae_slicing()
+def print_model_shapes(pipe):
+    print("Model component shapes:")
+    print(f"VAE Encoder: {pipe.vae.encoder}")
+    print(f"VAE Decoder: {pipe.vae.decoder}")
+    print(f"x_embedder shape: {pipe.transformer.x_embedder.weight.shape}")
+    print(f"First transformer block shape: {pipe.transformer.transformer_blocks[0].attn.to_q.weight.shape}")
+print_model_shapes(pipe)
 @spaces.GPU()
+def infer(prompt, init_image=None, seed=None, width=1024, height=1024, num_inference_steps=4, guidance_scale=0.0):
+    generator = torch.Generator(device="cuda").manual_seed(seed) if seed is not None else None
     try:
         if init_image is None:
             # text2img case
+            print("Running text-to-image generation")
             image = pipe(
                 prompt=prompt,
                 height=height,
                 width=width,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
+                guidance_scale=guidance_scale
             ).images[0]
         else:
             # img2img case
+            print("Running image-to-image generation")
+            init_image = init_image.convert("RGB").resize((width, height))
             image = pipe(
                 prompt=prompt,
+                image=init_image,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
+                guidance_scale=guidance_scale
             ).images[0]
         return image, seed
+    except RuntimeError as e:
+        if "mat1 and mat2 shapes cannot be multiplied" in str(e):
+            print("Matrix multiplication error detected. Tensor shapes:")
+            print(e)
+            # Here you could add code to print shapes of specific tensors if needed
+        else:
+            print(f"RuntimeError during inference: {e}")
+        import traceback
+        traceback.print_exc()
+        return Image.new("RGB", (width, height), (255, 0, 0)), seed
     except Exception as e:
+        print(f"Unexpected error during inference: {e}")
         import traceback
         traceback.print_exc()
+        return Image.new("RGB", (width, height), (255, 0, 0)), seed
+# Gradio interface
 with gr.Blocks() as demo:
     with gr.Row():
         prompt = gr.Textbox(label="Prompt")
         seed_output = gr.Number(label="Seed")
     with gr.Accordion("Advanced Settings", open=False):
+        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=None)
         width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
         height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
         num_inference_steps = gr.Slider(label="Number of inference steps", minimum=1, maximum=50, step=1, value=4)
+        guidance_scale = gr.Slider(label="Guidance scale", minimum=0, maximum=20, step=0.1, value=0.0)
     generate.click(
         infer,
+        inputs=[prompt, init_image, seed, width, height, num_inference_steps, guidance_scale],
         outputs=[result, seed_output]
     )