Spaces:
Runtime error
Runtime error
Ahsen Khaliq
commited on
Commit
·
f5dff55
1
Parent(s):
fe6483f
Update app.py
Browse files
app.py
CHANGED
|
@@ -78,46 +78,50 @@ def tv_loss(input):
|
|
| 78 |
def range_loss(input):
|
| 79 |
return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
model_config
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
model
|
| 105 |
-
model.
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
| 116 |
|
| 117 |
-
def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed):
|
| 118 |
all_frames = []
|
| 119 |
prompts = [text]
|
| 120 |
-
image_prompts
|
|
|
|
|
|
|
|
|
|
| 121 |
batch_size = 1
|
| 122 |
clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
|
| 123 |
tv_scale = tv_scale # Controls the smoothness of the final output.
|
|
@@ -217,6 +221,6 @@ def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, r
|
|
| 217 |
title = "CLIP Guided Diffusion HQ"
|
| 218 |
description = "Gradio demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
|
| 219 |
article = "<p style='text-align: center'> By Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). It uses OpenAI's 256x256 unconditional ImageNet diffusion model (https://github.com/openai/guided-diffusion) together with CLIP (https://github.com/openai/CLIP) to connect text prompts with images. | <a href='https://colab.research.google.com/drive/12a_Wrfi2_gwwAuN3VvMTwVMz9TfqctNj' target='_blank'>Colab</a></p>"
|
| 220 |
-
iface = gr.Interface(inference, inputs=["text",gr.inputs.Image(type="file", label='initial image (optional)', optional=True),gr.inputs.Slider(minimum=0, maximum=45, step=1, default=0, label="skip_timesteps"), gr.inputs.Slider(minimum=0, maximum=3000, step=1, default=700, label="clip guidance scale (Controls how much the image should look like the prompt)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=150, label="tv_scale (Controls the smoothness of the final output)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=50, label="range_scale (Controls how far out of range RGB values are allowed to be)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=0, label="init_scale (This enhances the effect of the init image)"), gr.inputs.Number(default=0, label="Seed") ], outputs=["image","video"], title=title, description=description, article=article, examples=[["coral reef city by artistation artists", None, 0, 1000, 150, 50, 0, 0]],
|
| 221 |
enable_queue=True)
|
| 222 |
iface.launch()
|
|
|
|
| 78 |
def range_loss(input):
|
| 79 |
return (input - input.clamp(-1, 1)).pow(2).mean([1, 2, 3])
|
| 80 |
|
| 81 |
+
def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt,timestep_respacing):
|
| 82 |
+
# Model settings
|
| 83 |
+
model_config = model_and_diffusion_defaults()
|
| 84 |
+
model_config.update({
|
| 85 |
+
'attention_resolutions': '32, 16, 8',
|
| 86 |
+
'class_cond': False,
|
| 87 |
+
'diffusion_steps': 1000,
|
| 88 |
+
'rescale_timesteps': True,
|
| 89 |
+
'timestep_respacing': str(timestep_respacing), # Modify this value to decrease the number of
|
| 90 |
+
# timesteps.
|
| 91 |
+
'image_size': 256,
|
| 92 |
+
'learn_sigma': True,
|
| 93 |
+
'noise_schedule': 'linear',
|
| 94 |
+
'num_channels': 256,
|
| 95 |
+
'num_head_channels': 64,
|
| 96 |
+
'num_res_blocks': 2,
|
| 97 |
+
'resblock_updown': True,
|
| 98 |
+
'use_fp16': True,
|
| 99 |
+
'use_scale_shift_norm': True,
|
| 100 |
+
})
|
| 101 |
+
# Load models
|
| 102 |
+
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
| 103 |
+
print('Using device:', device)
|
| 104 |
+
model, diffusion = create_model_and_diffusion(**model_config)
|
| 105 |
+
model.load_state_dict(torch.load('256x256_diffusion_uncond.pt', map_location='cpu'))
|
| 106 |
+
model.requires_grad_(False).eval().to(device)
|
| 107 |
+
for name, param in model.named_parameters():
|
| 108 |
+
if 'qkv' in name or 'norm' in name or 'proj' in name:
|
| 109 |
+
param.requires_grad_()
|
| 110 |
+
if model_config['use_fp16']:
|
| 111 |
+
model.convert_to_fp16()
|
| 112 |
+
clip_model = clip.load('ViT-B/16', jit=False)[0].eval().requires_grad_(False).to(device)
|
| 113 |
+
clip_size = clip_model.visual.input_resolution
|
| 114 |
+
normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
|
| 115 |
+
std=[0.26862954, 0.26130258, 0.27577711])
|
| 116 |
+
lpips_model = lpips.LPIPS(net='vgg').to(device)
|
| 117 |
|
| 118 |
+
#def inference(text, init_image, skip_timesteps, clip_guidance_scale, tv_scale, range_scale, init_scale, seed, image_prompt):
|
| 119 |
all_frames = []
|
| 120 |
prompts = [text]
|
| 121 |
+
if image_prompts:
|
| 122 |
+
image_prompts = [image_prompt.name]
|
| 123 |
+
else:
|
| 124 |
+
image_prompts = []
|
| 125 |
batch_size = 1
|
| 126 |
clip_guidance_scale = clip_guidance_scale # Controls how much the image should look like the prompt.
|
| 127 |
tv_scale = tv_scale # Controls the smoothness of the final output.
|
|
|
|
| 221 |
title = "CLIP Guided Diffusion HQ"
|
| 222 |
description = "Gradio demo for CLIP Guided Diffusion. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
|
| 223 |
article = "<p style='text-align: center'> By Katherine Crowson (https://github.com/crowsonkb, https://twitter.com/RiversHaveWings). It uses OpenAI's 256x256 unconditional ImageNet diffusion model (https://github.com/openai/guided-diffusion) together with CLIP (https://github.com/openai/CLIP) to connect text prompts with images. | <a href='https://colab.research.google.com/drive/12a_Wrfi2_gwwAuN3VvMTwVMz9TfqctNj' target='_blank'>Colab</a></p>"
|
| 224 |
+
iface = gr.Interface(inference, inputs=["text",gr.inputs.Image(type="file", label='initial image (optional)', optional=True),gr.inputs.Slider(minimum=0, maximum=45, step=1, default=0, label="skip_timesteps"), gr.inputs.Slider(minimum=0, maximum=3000, step=1, default=700, label="clip guidance scale (Controls how much the image should look like the prompt)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=150, label="tv_scale (Controls the smoothness of the final output)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=50, label="range_scale (Controls how far out of range RGB values are allowed to be)"), gr.inputs.Slider(minimum=0, maximum=1000, step=1, default=0, label="init_scale (This enhances the effect of the init image)"), gr.inputs.Number(default=0, label="Seed"), gr.inputs.Image(type="file", label='image prompt (optional)', optional=True), gr.inputs.Slider(minimum=50, maximum=300, step=1, default=90, label="timestep respacing")], outputs=["image","video"], title=title, description=description, article=article, examples=[["coral reef city by artistation artists", None, 0, 1000, 150, 50, 0, 0]],
|
| 225 |
enable_queue=True)
|
| 226 |
iface.launch()
|