flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

5b33905

1 Parent(s): 409e82d

console logging for txt2img

Browse files

Files changed (1) hide show

app.py +31 -18

app.py CHANGED Viewed

@@ -42,12 +42,32 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
     print(f"Using seed: {seed}")
     generator = torch.Generator().manual_seed(seed)
-    # Get the expected image size for the VAE
-    vae_image_size = pipe.vae.config.sample_size
-    print(f"Expected VAE image size: {vae_image_size}")
-    if init_image is not None:
         print("Initial image provided, processing img2img")
         init_image = init_image.convert("RGB")
         init_image = preprocess_image(init_image, vae_image_size)
         latents = encode_image(init_image, pipe.vae)
@@ -66,11 +86,13 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
             latents = conv(latents)
             print(f"Latents shape after channel conversion: {latents.shape}")
         # Reshape latents to match the transformer's input expectations
-        latents = latents.view(1, 64, height // 8, width // 8)
-        print(f"Latents shape after reshaping: {latents.shape}")
-        # Avoid flattening, ensure latents are in the expected shape for the transformer
         # Adding extra debug to understand what transformer expects
         try:
             print("Calling the transformer with latents")
@@ -91,16 +113,6 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
             guidance_scale=0.0,
             latents=latents
         ).images[0]
-    else:
-        print("No initial image provided, processing text2img")
-        image = pipe(
-            prompt=prompt,
-            height=height,
-            width=width,
-            num_inference_steps=num_inference_steps,
-            generator=generator,
-            guidance_scale=0.0
-        ).images[0]
     print("Inference complete")
     return image, seed
@@ -109,6 +121,7 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
 # Define example prompts
 examples = [
     "a tiny astronaut hatching from an egg on the moon",

     print(f"Using seed: {seed}")
     generator = torch.Generator().manual_seed(seed)
+    if init_image is None:
+        print("No initial image provided, processing text2img")
+        # Process text2img
+        try:
+            print("Calling the diffusion pipeline without latents")
+            result = pipe(
+                prompt=prompt,
+                height=height,
+                width=width,
+                num_inference_steps=num_inference_steps,
+                generator=generator,
+                guidance_scale=0.0
+            )
+            image = result.images[0]
+            latents = result.latents
+            # Log the latent shapes from text2img process
+            print(f"Latents shape from text2img: {latents.shape}")
+        except Exception as e:
+            print(f"Pipeline call failed with error: {e}")
+            raise
+    else:
         print("Initial image provided, processing img2img")
+        vae_image_size = pipe.vae.config.sample_size
+        print(f"Expected VAE image size: {vae_image_size}")
         init_image = init_image.convert("RGB")
         init_image = preprocess_image(init_image, vae_image_size)
         latents = encode_image(init_image, pipe.vae)
             latents = conv(latents)
             print(f"Latents shape after channel conversion: {latents.shape}")
+        # Debugging input shape before calling transformer
+        print(f"Latents shape before reshaping for transformer: {latents.shape}")
         # Reshape latents to match the transformer's input expectations
+        latents = latents.permute(0, 2, 3, 1).contiguous().view(-1, 64)  # Assuming the transformer expects (batch, sequence, feature)
+        print(f"Latents shape after reshaping for transformer: {latents.shape}")
         # Adding extra debug to understand what transformer expects
         try:
             print("Calling the transformer with latents")
             guidance_scale=0.0,
             latents=latents
         ).images[0]
     print("Inference complete")
     return image, seed
 # Define example prompts
 examples = [
     "a tiny astronaut hatching from an egg on the moon",