Spaces:
Runtime error
Runtime error
Joseph Catrambone
commited on
Commit
·
3dbb2cf
1
Parent(s):
b5ecd5f
Prevent models from forcing tensors to CUDA. Increase the default max_faces from 1 to 5.
Browse files- app.py +1 -1
- cldm/ddim_hacked.py +4 -3
- ldm/models/diffusion/ddim.py +4 -3
- ldm/models/diffusion/dpm_solver/sampler.py +4 -3
- ldm/models/diffusion/plms.py +4 -3
- ldm/modules/encoders/modules.py +18 -5
app.py
CHANGED
|
@@ -86,7 +86,7 @@ with block:
|
|
| 86 |
run_button = gr.Button(label="Run")
|
| 87 |
with gr.Accordion("Advanced options", open=False):
|
| 88 |
num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
|
| 89 |
-
max_faces = gr.Slider(label="Max Faces", minimum=1, maximum=
|
| 90 |
min_confidence = gr.Slider(label="Min Confidence", minimum=0.01, maximum=1.0, value=0.5, step=0.01)
|
| 91 |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
|
| 92 |
guess_mode = gr.Checkbox(label='Guess Mode', value=False)
|
|
|
|
| 86 |
run_button = gr.Button(label="Run")
|
| 87 |
with gr.Accordion("Advanced options", open=False):
|
| 88 |
num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
|
| 89 |
+
max_faces = gr.Slider(label="Max Faces", minimum=1, maximum=10, value=5, step=1)
|
| 90 |
min_confidence = gr.Slider(label="Min Confidence", minimum=0.01, maximum=1.0, value=0.5, step=0.01)
|
| 91 |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
|
| 92 |
guess_mode = gr.Checkbox(label='Guess Mode', value=False)
|
cldm/ddim_hacked.py
CHANGED
|
@@ -15,9 +15,10 @@ class DDIMSampler(object):
|
|
| 15 |
self.schedule = schedule
|
| 16 |
|
| 17 |
def register_buffer(self, name, attr):
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
setattr(self, name, attr)
|
| 22 |
|
| 23 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
|
|
|
| 15 |
self.schedule = schedule
|
| 16 |
|
| 17 |
def register_buffer(self, name, attr):
|
| 18 |
+
# Do not force attr to CUDA device by default. It may not exist.
|
| 19 |
+
#if type(attr) == torch.Tensor:
|
| 20 |
+
# if attr.device != torch.device("cuda"):
|
| 21 |
+
# attr = attr.to(torch.device("cuda"))
|
| 22 |
setattr(self, name, attr)
|
| 23 |
|
| 24 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
ldm/models/diffusion/ddim.py
CHANGED
|
@@ -15,9 +15,10 @@ class DDIMSampler(object):
|
|
| 15 |
self.schedule = schedule
|
| 16 |
|
| 17 |
def register_buffer(self, name, attr):
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
setattr(self, name, attr)
|
| 22 |
|
| 23 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
|
|
|
| 15 |
self.schedule = schedule
|
| 16 |
|
| 17 |
def register_buffer(self, name, attr):
|
| 18 |
+
# Do not force module to cuda by default.
|
| 19 |
+
#if type(attr) == torch.Tensor:
|
| 20 |
+
# if attr.device != torch.device("cuda"):
|
| 21 |
+
# attr = attr.to(torch.device("cuda"))
|
| 22 |
setattr(self, name, attr)
|
| 23 |
|
| 24 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
ldm/models/diffusion/dpm_solver/sampler.py
CHANGED
|
@@ -18,9 +18,10 @@ class DPMSolverSampler(object):
|
|
| 18 |
self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
|
| 19 |
|
| 20 |
def register_buffer(self, name, attr):
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
| 24 |
setattr(self, name, attr)
|
| 25 |
|
| 26 |
@torch.no_grad()
|
|
|
|
| 18 |
self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
|
| 19 |
|
| 20 |
def register_buffer(self, name, attr):
|
| 21 |
+
# This is in the original sampler.py, but it is forcing the attr to 'cuda' instead of the default device.
|
| 22 |
+
#if type(attr) == torch.Tensor:
|
| 23 |
+
# if attr.device != torch.device("cuda"):
|
| 24 |
+
# attr = attr.to(torch.device("cuda"))
|
| 25 |
setattr(self, name, attr)
|
| 26 |
|
| 27 |
@torch.no_grad()
|
ldm/models/diffusion/plms.py
CHANGED
|
@@ -17,9 +17,10 @@ class PLMSSampler(object):
|
|
| 17 |
self.schedule = schedule
|
| 18 |
|
| 19 |
def register_buffer(self, name, attr):
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
| 23 |
setattr(self, name, attr)
|
| 24 |
|
| 25 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
|
|
|
| 17 |
self.schedule = schedule
|
| 18 |
|
| 19 |
def register_buffer(self, name, attr):
|
| 20 |
+
# Do not force module to CUDA by default.
|
| 21 |
+
#if type(attr) == torch.Tensor:
|
| 22 |
+
# if attr.device != torch.device("cuda"):
|
| 23 |
+
# attr = attr.to(torch.device("cuda"))
|
| 24 |
setattr(self, name, attr)
|
| 25 |
|
| 26 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
ldm/modules/encoders/modules.py
CHANGED
|
@@ -8,6 +8,9 @@ import open_clip
|
|
| 8 |
from ldm.util import default, count_params
|
| 9 |
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
class AbstractEncoder(nn.Module):
|
| 12 |
def __init__(self):
|
| 13 |
super().__init__()
|
|
@@ -42,7 +45,9 @@ class ClassEmbedder(nn.Module):
|
|
| 42 |
c = self.embedding(c)
|
| 43 |
return c
|
| 44 |
|
| 45 |
-
def get_unconditional_conditioning(self, bs, device=
|
|
|
|
|
|
|
| 46 |
uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
|
| 47 |
uc = torch.ones((bs,), device=device) * uc_class
|
| 48 |
uc = {self.key: uc}
|
|
@@ -57,8 +62,10 @@ def disabled_train(self, mode=True):
|
|
| 57 |
|
| 58 |
class FrozenT5Embedder(AbstractEncoder):
|
| 59 |
"""Uses the T5 transformer encoder for text"""
|
| 60 |
-
def __init__(self, version="google/t5-v1_1-large", device=
|
| 61 |
super().__init__()
|
|
|
|
|
|
|
| 62 |
self.tokenizer = T5Tokenizer.from_pretrained(version)
|
| 63 |
self.transformer = T5EncoderModel.from_pretrained(version)
|
| 64 |
self.device = device
|
|
@@ -92,9 +99,11 @@ class FrozenCLIPEmbedder(AbstractEncoder):
|
|
| 92 |
"pooled",
|
| 93 |
"hidden"
|
| 94 |
]
|
| 95 |
-
def __init__(self, version="openai/clip-vit-large-patch14", device=
|
| 96 |
freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
|
| 97 |
super().__init__()
|
|
|
|
|
|
|
| 98 |
assert layer in self.LAYERS
|
| 99 |
self.tokenizer = CLIPTokenizer.from_pretrained(version)
|
| 100 |
self.transformer = CLIPTextModel.from_pretrained(version)
|
|
@@ -140,9 +149,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
|
|
| 140 |
"last",
|
| 141 |
"penultimate"
|
| 142 |
]
|
| 143 |
-
def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device=
|
| 144 |
freeze=True, layer="last"):
|
| 145 |
super().__init__()
|
|
|
|
|
|
|
| 146 |
assert layer in self.LAYERS
|
| 147 |
model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
|
| 148 |
del model.visual
|
|
@@ -194,9 +205,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
|
|
| 194 |
|
| 195 |
|
| 196 |
class FrozenCLIPT5Encoder(AbstractEncoder):
|
| 197 |
-
def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device=
|
| 198 |
clip_max_length=77, t5_max_length=77):
|
| 199 |
super().__init__()
|
|
|
|
|
|
|
| 200 |
self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
|
| 201 |
self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
|
| 202 |
print(f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder)*1.e-6:.2f} M parameters, "
|
|
|
|
| 8 |
from ldm.util import default, count_params
|
| 9 |
|
| 10 |
|
| 11 |
+
default_device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
|
| 12 |
+
|
| 13 |
+
|
| 14 |
class AbstractEncoder(nn.Module):
|
| 15 |
def __init__(self):
|
| 16 |
super().__init__()
|
|
|
|
| 45 |
c = self.embedding(c)
|
| 46 |
return c
|
| 47 |
|
| 48 |
+
def get_unconditional_conditioning(self, bs, device=None):
|
| 49 |
+
if device is None:
|
| 50 |
+
device = default_device
|
| 51 |
uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
|
| 52 |
uc = torch.ones((bs,), device=device) * uc_class
|
| 53 |
uc = {self.key: uc}
|
|
|
|
| 62 |
|
| 63 |
class FrozenT5Embedder(AbstractEncoder):
|
| 64 |
"""Uses the T5 transformer encoder for text"""
|
| 65 |
+
def __init__(self, version="google/t5-v1_1-large", device=None, max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
|
| 66 |
super().__init__()
|
| 67 |
+
if device is None:
|
| 68 |
+
device = default_device
|
| 69 |
self.tokenizer = T5Tokenizer.from_pretrained(version)
|
| 70 |
self.transformer = T5EncoderModel.from_pretrained(version)
|
| 71 |
self.device = device
|
|
|
|
| 99 |
"pooled",
|
| 100 |
"hidden"
|
| 101 |
]
|
| 102 |
+
def __init__(self, version="openai/clip-vit-large-patch14", device=None, max_length=77,
|
| 103 |
freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
|
| 104 |
super().__init__()
|
| 105 |
+
if device is None:
|
| 106 |
+
device = default_device
|
| 107 |
assert layer in self.LAYERS
|
| 108 |
self.tokenizer = CLIPTokenizer.from_pretrained(version)
|
| 109 |
self.transformer = CLIPTextModel.from_pretrained(version)
|
|
|
|
| 149 |
"last",
|
| 150 |
"penultimate"
|
| 151 |
]
|
| 152 |
+
def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device=None, max_length=77,
|
| 153 |
freeze=True, layer="last"):
|
| 154 |
super().__init__()
|
| 155 |
+
if device is None:
|
| 156 |
+
device = default_device
|
| 157 |
assert layer in self.LAYERS
|
| 158 |
model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
|
| 159 |
del model.visual
|
|
|
|
| 205 |
|
| 206 |
|
| 207 |
class FrozenCLIPT5Encoder(AbstractEncoder):
|
| 208 |
+
def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device=None,
|
| 209 |
clip_max_length=77, t5_max_length=77):
|
| 210 |
super().__init__()
|
| 211 |
+
if device is None:
|
| 212 |
+
device = default_device
|
| 213 |
self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
|
| 214 |
self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
|
| 215 |
print(f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder)*1.e-6:.2f} M parameters, "
|