flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

22e5a11

1 Parent(s): 29a504c

log the tensore shape

Browse files

Files changed (1) hide show

app.py +20 -19

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import spaces
 import torch
 from PIL import Image
 from torchvision import transforms
-from diffusers import DiffusionPipeline
 # Define constants
 dtype = torch.bfloat16
@@ -34,6 +34,10 @@ def encode_image(image, vae):
     print(f"Latents shape after encoding: {latents.shape}")
     return latents
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
     print(f"Inference started with prompt: {prompt}")
@@ -44,7 +48,6 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
     if init_image is None:
         print("No initial image provided, processing text2img")
-        # Process text2img
         try:
             print("Calling the diffusion pipeline for text2img")
             result = pipe(
@@ -58,12 +61,16 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
             image = result.images[0]
             print(f"Generated image shape: {image.size}")
-            # Since the 'latents' attribute is not present, we need to inspect other attributes
-            print(f"Result attributes: {dir(result)}")
         except Exception as e:
             print(f"Pipeline call failed with error: {e}")
             raise
     else:
         print("Initial image provided, processing img2img")
         vae_image_size = pipe.vae.config.sample_size
@@ -72,28 +79,21 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
         init_image = preprocess_image(init_image, vae_image_size)
         latents = encode_image(init_image, pipe.vae)
-        # Interpolating latents
-        print(f"Interpolating latents to size: {(height // 8, width // 8)}")
         latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8))
-        print(f"Latents shape after interpolation: {latents.shape}")
-        # Convert latent channels to 64 as expected by the transformer
         latent_channels = pipe.vae.config.latent_channels
         print(f"Expected latent channels: 64, current latent channels: {latent_channels}")
         if latent_channels != 64:
             print(f"Converting latent channels from {latent_channels} to 64")
             conv = torch.nn.Conv2d(latent_channels, 64, kernel_size=1).to(device, dtype=dtype)
             latents = conv(latents)
-            print(f"Latents shape after channel conversion: {latents.shape}")
-        # Debugging input shape before calling transformer
-        print(f"Latents shape before reshaping for transformer: {latents.shape}")
-        # Reshape latents to match the transformer's input expectations
-        latents = latents.permute(0, 2, 3, 1).contiguous().view(-1, 64)  # Assuming the transformer expects (batch, sequence, feature)
-        print(f"Latents shape after reshaping for transformer: {latents.shape}")
-        # Adding extra debug to understand what transformer expects
         try:
             print("Calling the transformer with latents")
             # Dummy call to transformer to understand the shape requirement
@@ -103,8 +103,8 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
             print(f"Transformer call failed with error: {e}")
             raise
-        print("Calling the diffusion pipeline with latents")
         try:
             image = pipe(
                 prompt=prompt,
                 height=height,
@@ -121,6 +121,7 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, he
     print("Inference complete")
     return image, seed
 # Define example prompts
 examples = [
     "a tiny astronaut hatching from an egg on the moon",

 import torch
 from PIL import Image
 from torchvision import transforms
+from diffusers import DiffusionPipeline, AutoencoderKL
 # Define constants
 dtype = torch.bfloat16
     print(f"Latents shape after encoding: {latents.shape}")
     return latents
+# A utility function to log shapes and other relevant information
+def log_tensor_info(tensor, name):
+    print(f"{name} shape: {tensor.shape} dtype: {tensor.dtype} device: {tensor.device}")
 @spaces.GPU()
 def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=4, progress=gr.Progress(track_tqdm=True)):
     print(f"Inference started with prompt: {prompt}")
     if init_image is None:
         print("No initial image provided, processing text2img")
         try:
             print("Calling the diffusion pipeline for text2img")
             result = pipe(
             image = result.images[0]
             print(f"Generated image shape: {image.size}")
+            # Inspect the output and log relevant details
+            print("Logging detailed information for text2img:")
+            for name, param in pipe.named_parameters():
+                if 'weight' in name:
+                    log_tensor_info(param, name)
+            print("Logging complete.")
         except Exception as e:
             print(f"Pipeline call failed with error: {e}")
             raise
     else:
         print("Initial image provided, processing img2img")
         vae_image_size = pipe.vae.config.sample_size
         init_image = preprocess_image(init_image, vae_image_size)
         latents = encode_image(init_image, pipe.vae)
+        print("Interpolating latents to match model's input size...")
         latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8))
+        log_tensor_info(latents, "Latents after interpolation")
         latent_channels = pipe.vae.config.latent_channels
         print(f"Expected latent channels: 64, current latent channels: {latent_channels}")
         if latent_channels != 64:
             print(f"Converting latent channels from {latent_channels} to 64")
             conv = torch.nn.Conv2d(latent_channels, 64, kernel_size=1).to(device, dtype=dtype)
             latents = conv(latents)
+            log_tensor_info(latents, "Latents after channel conversion")
+        latents = latents.permute(0, 2, 3, 1).contiguous().view(-1, 64)
+        log_tensor_info(latents, "Latents after reshaping for transformer")
         try:
             print("Calling the transformer with latents")
             # Dummy call to transformer to understand the shape requirement
             print(f"Transformer call failed with error: {e}")
             raise
         try:
+            print("Calling the diffusion pipeline with latents")
             image = pipe(
                 prompt=prompt,
                 height=height,
     print("Inference complete")
     return image, seed
 # Define example prompts
 examples = [
     "a tiny astronaut hatching from an egg on the moon",