flux-lightning

Runtime error

App Files Files Community

Jordan Legg commited on Aug 8, 2024

Commit

817a141

1 Parent(s): bf5cb46

change image to latents

Browse files

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 2048
 MIN_IMAGE_SIZE = 256
 DEFAULT_IMAGE_SIZE = 1024
-MAX_PROMPT_LENGTH = 256  # Changed to 256 as per FLUX.1 schnell requirements
 # Check for GPU availability
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -23,10 +23,7 @@ dtype = torch.float16 if device == "cuda" else torch.float32
 def load_model():
     try:
-        pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
-        pipe.enable_model_cpu_offload()
-        pipe.enable_attention_slicing()
-        return pipe
     except Exception as e:
         raise RuntimeError(f"Failed to load the model: {str(e)}")
@@ -72,21 +69,30 @@ def infer(prompt, init_image=None, seed=42, randomize_seed=False, width=DEFAULT_
         max_sequence_length = min(MAX_PROMPT_LENGTH, len(prompt))
         if init_image is not None:
             init_image = init_image.convert("RGB")
             init_image = preprocess_image(init_image, (height, width))
-            latents = encode_image(init_image, pipe.vae)
-            latents = torch.nn.functional.interpolate(latents, size=(height // 8, width // 8), mode='bilinear', align_corners=False)
             image = pipe(
                 prompt=prompt,
-                image=latents,  # Changed from latents=latents to image=latents
                 height=height,
                 width=width,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
                 guidance_scale=0.0,
                 max_sequence_length=max_sequence_length
             ).images[0]
         else:
             image = pipe(
                 prompt=prompt,
                 height=height,

 MAX_IMAGE_SIZE = 2048
 MIN_IMAGE_SIZE = 256
 DEFAULT_IMAGE_SIZE = 1024
+MAX_PROMPT_LENGTH = 500
 # Check for GPU availability
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def load_model():
     try:
+        return DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=dtype).to(device)
     except Exception as e:
         raise RuntimeError(f"Failed to load the model: {str(e)}")
         max_sequence_length = min(MAX_PROMPT_LENGTH, len(prompt))
         if init_image is not None:
+            # Process img2img
             init_image = init_image.convert("RGB")
             init_image = preprocess_image(init_image, (height, width))
+            # Encode the image using the VAE
+            with torch.no_grad():
+                init_latents = pipe.vae.encode(init_image).latent_dist.sample(generator=generator)
+                init_latents = 0.18215 * init_latents
+            # Ensure latents are correctly shaped
+            init_latents = torch.nn.functional.interpolate(init_latents, size=(height // 8, width // 8), mode='bilinear', align_corners=False)
             image = pipe(
                 prompt=prompt,
                 height=height,
                 width=width,
                 num_inference_steps=num_inference_steps,
                 generator=generator,
                 guidance_scale=0.0,
+                latents=init_latents,  # Use latents instead of image
                 max_sequence_length=max_sequence_length
             ).images[0]
         else:
+            # Process text2img
             image = pipe(
                 prompt=prompt,
                 height=height,