Spaces:

1inkusFace
/

StableDiffusion-3.5-Large-IP

Running on Zero

App Files Files Community

1inkusFace commited on Apr 5

Commit

8a11110

verified ·

1 Parent(s): 19f9fef

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -15

app.py CHANGED Viewed

@@ -15,9 +15,7 @@ from models.transformer_sd3 import SD3Transformer2DModel
 #from diffusers import StableDiffusion3Pipeline
 from transformers import CLIPTextModelWithProjection, T5EncoderModel
 from transformers import CLIPTokenizer, T5TokenizerFast
-#from diffusers import SD3Transformer2DModel, AutoencoderKL
 from diffusers import AutoencoderKL
-#from models.transformer_sd3 import SD3Transformer2DModel
 from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
 from image_gen_aux import UpscaleWithModel
@@ -59,7 +57,7 @@ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 torch_dtype = torch.bfloat16
 transformer = SD3Transformer2DModel.from_pretrained(
-    model_path, subfolder="transformer", torch_dtype=torch.bfloat16
 )
 vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, low_cpu_mem_usage=False, subfolder='vae', torch_dtype=torch.float32, token=True)
@@ -74,21 +72,19 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
     #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
     #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
     tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
-    torch_dtype=torch.bfloat16,
     transformer=transformer,
     vae=None
     #use_safetensors=False,
 )
-#pipe.to(device=device, dtype=torch.bfloat16)
-pipe.to(device)
 pipe.vae=vaeX.to(device)
 text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
 text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
 text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
@@ -120,11 +116,9 @@ def infer(
     image_encoder_path=None,
     progress=gr.Progress(track_tqdm=True),
 ):
     pipe.text_encoder=text_encoder
     pipe.text_encoder_2=text_encoder_2
     pipe.text_encoder_3=text_encoder_3
     pipe.init_ipadapter(
         ip_adapter_path=ipadapter_path,
         image_encoder_path=image_encoder_path,
@@ -140,25 +134,25 @@ def infer(
         sd_image_a = Image.open(latent_file.name).convert('RGB')
         print("-- using image file and loading ip-adapter --")
         #sd_image_a.resize((height,width), Image.LANCZOS)
-        sd_image_a.resize((384,384), Image.LANCZOS)
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
-            sd_image_b.resize((384,384), Image.LANCZOS)
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
-            sd_image_c.resize((384,384), Image.LANCZOS)
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
-            sd_image_d.resize((384,384), Image.LANCZOS)
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
-            sd_image_e.resize((384,384), Image.LANCZOS)
         else:
             sd_image_e = None
         print('-- generating image --')

 #from diffusers import StableDiffusion3Pipeline
 from transformers import CLIPTextModelWithProjection, T5EncoderModel
 from transformers import CLIPTokenizer, T5TokenizerFast
 from diffusers import AutoencoderKL
 from pipeline_stable_diffusion_3_ipa import StableDiffusion3Pipeline
 from image_gen_aux import UpscaleWithModel
 torch_dtype = torch.bfloat16
 transformer = SD3Transformer2DModel.from_pretrained(
+    model_path, subfolder="transformer" #, torch_dtype=torch.bfloat16
 )
 vaeX=AutoencoderKL.from_pretrained("ford442/stable-diffusion-3.5-large-fp32", safety_checker=None, use_safetensors=True, low_cpu_mem_usage=False, subfolder='vae', torch_dtype=torch.float32, token=True)
     #tokenizer=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer", token=True),
     #tokenizer_2=CLIPTokenizer.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", add_prefix_space=True, subfolder="tokenizer_2", token=True),
     tokenizer_3=T5TokenizerFast.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", use_fast=True, subfolder="tokenizer_3", token=True),
+    #torch_dtype=torch.bfloat16,
     transformer=transformer,
     vae=None
     #use_safetensors=False,
 )
+pipe.to(device=device, dtype=torch.bfloat16)
+#pipe.to(device)
 pipe.vae=vaeX.to(device)
 text_encoder=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder', token=True).to(device=device, dtype=torch.bfloat16)
 text_encoder_2=CLIPTextModelWithProjection.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
 text_encoder_3=T5EncoderModel.from_pretrained("ford442/stable-diffusion-3.5-large-bf16", subfolder='text_encoder_3',token=True).to(device=device, dtype=torch.bfloat16)
 upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
     image_encoder_path=None,
     progress=gr.Progress(track_tqdm=True),
 ):
     pipe.text_encoder=text_encoder
     pipe.text_encoder_2=text_encoder_2
     pipe.text_encoder_3=text_encoder_3
     pipe.init_ipadapter(
         ip_adapter_path=ipadapter_path,
         image_encoder_path=image_encoder_path,
         sd_image_a = Image.open(latent_file.name).convert('RGB')
         print("-- using image file and loading ip-adapter --")
         #sd_image_a.resize((height,width), Image.LANCZOS)
+        sd_image_a.resize((width,height), Image.LANCZOS)
         if latent_file_2 is not None:  # Check if a latent file is provided
             sd_image_b = Image.open(latent_file_2.name).convert('RGB')
+            sd_image_b.resize((width,height), Image.LANCZOS)
         else:
             sd_image_b = None
         if latent_file_3 is not None:  # Check if a latent file is provided
             sd_image_c = Image.open(latent_file_3.name).convert('RGB')
+            sd_image_c.resize((width,height), Image.LANCZOS)
         else:
             sd_image_c = None
         if latent_file_4 is not None:  # Check if a latent file is provided
             sd_image_d = Image.open(latent_file_4.name).convert('RGB')
+            sd_image_d.resize((width,height), Image.LANCZOS)
         else:
             sd_image_d = None
         if latent_file_5 is not None:  # Check if a latent file is provided
             sd_image_e = Image.open(latent_file_5.name).convert('RGB')
+            sd_image_e.resize((width,height), Image.LANCZOS)
         else:
             sd_image_e = None
         print('-- generating image --')