flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

448d742

1 Parent(s): 6b927be

mapped weights and tried transform projection

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import gradio as gr
 import numpy as np
 import random
 import torch
 from PIL import Image
 from torchvision import transforms
 from diffusers import DiffusionPipeline
@@ -19,6 +20,9 @@ pipe.enable_model_cpu_offload()
 pipe.vae.enable_slicing()
 pipe.vae.enable_tiling()
 def preprocess_image(image, image_size):
     preprocess = transforms.Compose([
         transforms.Resize((image_size, image_size), interpolation=transforms.InterpolationMode.LANCZOS),
@@ -28,14 +32,20 @@ def preprocess_image(image, image_size):
     image = preprocess(image).unsqueeze(0).to(device, dtype=dtype)
     return image
-def check_shapes(latents):
-    print(f"Latent shape: {latents.shape}")
-    if len(latents.shape) == 4:
-        print(f"Expected transformer input shape: {(1, latents.shape[1] * latents.shape[2] * latents.shape[3])}")
-    elif len(latents.shape) == 2:
-        print(f"Reshaped latent shape: {latents.shape}")
-    else:
-        print(f"Unexpected latent shape: {latents.shape}")
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
@@ -61,27 +71,10 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
             # Encode the image using FLUX VAE
             latents = pipe.vae.encode(init_image).latent_dist.sample() * 0.18215
-            # Ensure latents are the correct shape
-            latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear')
-            # Check shapes before reshaping
-            check_shapes(latents)
-            # Reshape latents to match the expected input shape of the transformer
-            latents = latents.reshape(1, -1)
-            # Check shapes after reshaping
-            check_shapes(latents)
-            # Print the type and shape of each argument
-            print(f"prompt type: {type(prompt)}, value: {prompt}")
-            print(f"height type: {type(height)}, value: {height}")
-            print(f"width type: {type(width)}, value: {width}")
-            print(f"num_inference_steps type: {type(num_inference_steps)}, value: {num_inference_steps}")
-            print(f"generator type: {type(generator)}")
-            print(f"guidance_scale type: {type(0.0)}, value: 0.0")
-            print(f"latents type: {type(latents)}, shape: {latents.shape}")
             image = pipe(
                 prompt=prompt,

 import numpy as np
 import random
 import torch
+import torch.nn as nn
 from PIL import Image
 from torchvision import transforms
 from diffusers import DiffusionPipeline
 pipe.vae.enable_slicing()
 pipe.vae.enable_tiling()
+# Add a projection layer to match x_embedder input
+projection = nn.Linear(32 * 128 * 128, 64).to(device).to(dtype)
 def preprocess_image(image, image_size):
     preprocess = transforms.Compose([
         transforms.Resize((image_size, image_size), interpolation=transforms.InterpolationMode.LANCZOS),
     image = preprocess(image).unsqueeze(0).to(device, dtype=dtype)
     return image
+def process_latents(latents, height, width):
+    # Ensure latents are the correct shape (should be [1, 32, 128, 128])
+    latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear')
+    print(f"Latent shape after interpolation: {latents.shape}")
+    # Flatten the latents
+    latents_flat = latents.reshape(1, -1)
+    print(f"Flattened latent shape: {latents_flat.shape}")
+    # Project to 64 dimensions
+    latents_projected = projection(latents_flat)
+    print(f"Projected latent shape: {latents_projected.shape}")
+    return latents_projected
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
             # Encode the image using FLUX VAE
             latents = pipe.vae.encode(init_image).latent_dist.sample() * 0.18215
+            print(f"Initial latent shape from VAE: {latents.shape}")
+            # Process latents to match x_embedder input
+            latents = process_latents(latents, height, width)
             image = pipe(
                 prompt=prompt,