flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

86f0308

1 Parent(s): 448d742

remove projection layer and let x embedder handle it

Browse files

Files changed (1) hide show

app.py +12 -14

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import gradio as gr
 import numpy as np
 import random
 import torch
-import torch.nn as nn
 from PIL import Image
 from torchvision import transforms
 from diffusers import DiffusionPipeline
@@ -20,9 +19,6 @@ pipe.enable_model_cpu_offload()
 pipe.vae.enable_slicing()
 pipe.vae.enable_tiling()
-# Add a projection layer to match x_embedder input
-projection = nn.Linear(32 * 128 * 128, 64).to(device).to(dtype)
 def preprocess_image(image, image_size):
     preprocess = transforms.Compose([
         transforms.Resize((image_size, image_size), interpolation=transforms.InterpolationMode.LANCZOS),
@@ -33,19 +29,18 @@ def preprocess_image(image, image_size):
     return image
 def process_latents(latents, height, width):
-    # Ensure latents are the correct shape (should be [1, 32, 128, 128])
-    latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear')
-    print(f"Latent shape after interpolation: {latents.shape}")
-    # Flatten the latents
-    latents_flat = latents.reshape(1, -1)
-    print(f"Flattened latent shape: {latents_flat.shape}")
-    # Project to 64 dimensions
-    latents_projected = projection(latents_flat)
-    print(f"Projected latent shape: {latents_projected.shape}")
-    return latents_projected
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
@@ -76,6 +71,9 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
             # Process latents to match x_embedder input
             latents = process_latents(latents, height, width)
             image = pipe(
                 prompt=prompt,
                 height=height,

 import numpy as np
 import random
 import torch
 from PIL import Image
 from torchvision import transforms
 from diffusers import DiffusionPipeline
 pipe.vae.enable_slicing()
 pipe.vae.enable_tiling()
 def preprocess_image(image, image_size):
     preprocess = transforms.Compose([
         transforms.Resize((image_size, image_size), interpolation=transforms.InterpolationMode.LANCZOS),
     return image
 def process_latents(latents, height, width):
+    print(f"Input latent shape: {latents.shape}")
+    # Ensure latents are the correct shape
+    if latents.shape[2:] != (height // 8, width // 8):
+        latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear')
+    print(f"Latent shape after potential interpolation: {latents.shape}")
+    # Reshape latents to [batch_size, seq_len, channels]
+    latents = latents.permute(0, 2, 3, 1).reshape(1, -1, latents.shape[1])
+    print(f"Reshaped latent shape: {latents.shape}")
+    return latents
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
             # Process latents to match x_embedder input
             latents = process_latents(latents, height, width)
+            print(f"x_embedder weight shape: {pipe.transformer.x_embedder.weight.shape}")
+            print(f"First transformer block input shape: {pipe.transformer.transformer_blocks[0].attn.to_q.weight.shape}")
             image = pipe(
                 prompt=prompt,
                 height=height,