Spaces:

RamAnanth1
/

ControlNet

Runtime error

App Files Files Community

RamAnanth1 commited on Mar 3, 2023

Commit

4be7ef1

1 Parent(s): 1e6d524

First attempt at porting to diffusers

Browse files

Files changed (1) hide show

app.py +112 -201

app.py CHANGED Viewed

@@ -4,182 +4,103 @@ import gradio as gr
 import numpy as np
 import torch
-from pytorch_lightning import seed_everything
-from util import resize_image, HWC3, apply_canny
-from ldm.models.diffusion.ddim import DDIMSampler
-from annotator.openpose import apply_openpose
-from cldm.model import create_model, load_state_dict
-from huggingface_hub import hf_hub_url, cached_download
-REPO_ID = "lllyasviel/ControlNet"
-canny_checkpoint = "models/control_sd15_canny.pth"
-scribble_checkpoint = "models/control_sd15_scribble.pth"
-pose_checkpoint = "models/control_sd15_openpose.pth"
-# REPO_ID = "webui/ControlNet-modules-safetensors"
-# canny_checkpoint = "control_canny-fp16.safetensors"
-# scribble_checkpoint = "control_scribble-fp16.safetensors"
-# pose_checkpoint = "control_openpose-fp16.safetensors"
-canny_model = create_model('./models/cldm_v15.yaml').cpu()
-canny_model.load_state_dict(load_state_dict(cached_download(
-    hf_hub_url(REPO_ID, canny_checkpoint)
-), location='cpu'))
-canny_model = canny_model.cuda()
-ddim_sampler = DDIMSampler(canny_model)
-pose_model = create_model('./models/cldm_v15.yaml').cpu()
-pose_model.load_state_dict(load_state_dict(cached_download(
-    hf_hub_url(REPO_ID, pose_checkpoint)
-), location='cpu'))
-pose_model = pose_model.cuda()
-ddim_sampler_pose = DDIMSampler(pose_model)
-scribble_model = create_model('./models/cldm_v15.yaml').cpu()
-scribble_model.load_state_dict(load_state_dict(cached_download(
-    hf_hub_url(REPO_ID, scribble_checkpoint)
-), location='cpu'))
-scribble_model = scribble_model.cuda()
-ddim_sampler_scribble = DDIMSampler(scribble_model)
-save_memory = False
-def process(input_image, prompt, input_control, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold):
-    # TODO: Add other control tasks
-    if input_control == "Scribble":
-        return process_scribble(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta)
-    elif input_control == "Pose":
-        return process_pose(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, image_resolution, ddim_steps, scale, seed, eta)
-    return process_canny(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold)
-def process_canny(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold):
-    with torch.no_grad():
-        img = resize_image(HWC3(input_image), image_resolution)
-        H, W, C = img.shape
-        detected_map = apply_canny(img, low_threshold, high_threshold)
-        detected_map = HWC3(detected_map)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        seed_everything(seed)
-        if save_memory:
-            canny_model.low_vram_shift(is_diffusing=False)
-        cond = {"c_concat": [control], "c_crossattn": [canny_model.get_learned_conditioning([prompt + ', ' + a_prompt] * num_samples)]}
-        un_cond = {"c_concat": [control], "c_crossattn": [canny_model.get_learned_conditioning([n_prompt] * num_samples)]}
-        shape = (4, H // 8, W // 8)
-        if save_memory:
-            canny_model.low_vram_shift(is_diffusing=False)
-        samples, intermediates = ddim_sampler.sample(ddim_steps, num_samples,
-                                                     shape, cond, verbose=False, eta=eta,
-                                                     unconditional_guidance_scale=scale,
-                                                     unconditional_conditioning=un_cond)
-        if save_memory:
-            canny_model.low_vram_shift(is_diffusing=False)
-        x_samples = canny_model.decode_first_stage(samples)
-        x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-    return [255 - detected_map] + results
-def process_scribble(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta):
-    with torch.no_grad():
-        img = resize_image(HWC3(input_image), image_resolution)
-        H, W, C = img.shape
-        detected_map = np.zeros_like(img, dtype=np.uint8)
-        detected_map[np.min(img, axis=2) < 127] = 255
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        seed_everything(seed)
-        if save_memory:
-            scribble_model.low_vram_shift(is_diffusing=False)
-        cond = {"c_concat": [control], "c_crossattn": [scribble_model.get_learned_conditioning([prompt + ', ' + a_prompt] * num_samples)]}
-        un_cond = {"c_concat": [control], "c_crossattn": [scribble_model.get_learned_conditioning([n_prompt] * num_samples)]}
-        shape = (4, H // 8, W // 8)
-        if save_memory:
-            scribble_model.low_vram_shift(is_diffusing=False)
-        samples, intermediates = ddim_sampler_scribble.sample(ddim_steps, num_samples,
-                                                     shape, cond, verbose=False, eta=eta,
-                                                     unconditional_guidance_scale=scale,
-                                                     unconditional_conditioning=un_cond)
-        if save_memory:
-            scribble_model.low_vram_shift(is_diffusing=False)
-        x_samples = scribble_model.decode_first_stage(samples)
-        x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-    return [255 - detected_map] + results
-def process_pose(input_image, prompt, a_prompt, n_prompt, num_samples, image_resolution, detect_resolution, ddim_steps, scale, seed, eta):
-    with torch.no_grad():
-        input_image = HWC3(input_image)
-        detected_map, _ = apply_openpose(resize_image(input_image, detect_resolution))
-        detected_map = HWC3(detected_map)
-        img = resize_image(input_image, image_resolution)
-        H, W, C = img.shape
-        detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_NEAREST)
-        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
-        control = torch.stack([control for _ in range(num_samples)], dim=0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        if seed == -1:
-            seed = random.randint(0, 65535)
-        seed_everything(seed)
-        if save_memory:
-            pose_model.low_vram_shift(is_diffusing=False)
-        cond = {"c_concat": [control], "c_crossattn": [pose_model.get_learned_conditioning([prompt + ', ' + a_prompt] * num_samples)]}
-        un_cond = {"c_concat": [control], "c_crossattn": [pose_model.get_learned_conditioning([n_prompt] * num_samples)]}
-        shape = (4, H // 8, W // 8)
-        if save_memory:
-            pose_model.low_vram_shift(is_diffusing=False)
-        samples, intermediates = ddim_sampler_pose.sample(ddim_steps, num_samples,
-                                                     shape, cond, verbose=False, eta=eta,
-                                                     unconditional_guidance_scale=scale,
-                                                     unconditional_conditioning=un_cond)
-        if save_memory:
-            pose_model.low_vram_shift(is_diffusing=False)
-        x_samples = pose_model.decode_first_stage(samples)
-        x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)
-        results = [x_samples[i] for i in range(num_samples)]
-    return [detected_map] + results
-def create_canvas(w, h):
-    new_control_options = ["Interactive Scribble"]
-    return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255
 block = gr.Blocks().queue()
 control_task_list = [
@@ -222,52 +143,42 @@ with block:
                 [
             "bird.png",
             "bird",
-            "Canny Edge Map",
-            "best quality, extremely detailed",
-            'longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality',
-             1,
-            512,
-            20,
-            9.0,
-            123490213,
-            0.0,
-            100,
-            200
         ],
-                [
-            "turtle.png",
-            "turtle",
-            "Scribble",
-            "best quality, extremely detailed",
-            'longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality',
-             1,
-            512,
-            20,
-            9.0,
-            123490213,
-            0.0,
-            100,
-            200
-        ],
-                  [
-            "pose1.png",
-            "Chef in the Kitchen",
-            "Pose",
-            "best quality, extremely detailed",
-            'longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality',
-             1,
-            512,
-            20,
-            9.0,
-            123490213,
-            0.0,
-            100,
-            200
-        ]
     ]
     examples = gr.Examples(examples=examples_list,inputs = [input_image, prompt, input_control, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold], outputs = [result_gallery], cache_examples = True, fn = process)
     gr.Markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=RamAnanth1.ControlNet)")

 import numpy as np
 import torch
+from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
+from diffusers import UniPCMultistepScheduler
+from PIL import Image
+from controlnet_aux import OpenposeDetector
+# Constants
+low_threshold = 100
+high_threshold = 200
+# Models
+controlnet_canny = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
+pipe_canny = StableDiffusionControlNetPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", controlnet=controlnet_canny, safety_checker=None, torch_dtype=torch.float16
+)
+pipe_canny.scheduler = UniPCMultistepScheduler.from_config(pipe_canny.scheduler.config)
+# This command loads the individual model components on GPU on-demand. So, we don't
+# need to explicitly call pipe.to("cuda").
+pipe_canny.enable_model_cpu_offload()
+pipe_canny.enable_xformers_memory_efficient_attention()
+# Generator seed,
+generator = torch.manual_seed(0)
+pose_model = OpenposeDetector.from_pretrained("lllyasviel/ControlNet")
+controlnet_pose = ControlNetModel.from_pretrained(
+    "lllyasviel/sd-controlnet-openpose", torch_dtype=torch.float16
+)
+pipe_pose = StableDiffusionControlNetPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", controlnet=controlnet_pose, safety_checker=None, torch_dtype=torch.float16
+)
+pipe_pose.scheduler = UniPCMultistepScheduler.from_config(pipe_pose.scheduler.config)
+# This command loads the individual model components on GPU on-demand. So, we don't
+# need to explicitly call pipe.to("cuda").
+pipe_pose.enable_model_cpu_offload()
+# xformers
+pipe_pose.enable_xformers_memory_efficient_attention()
+from pytorch_lightning import seed_everything
+from util import resize_image, HWC3, apply_canny
+from ldm.models.diffusion.ddim import DDIMSampler
+from annotator.openpose import apply_openpose
+from cldm.model import create_model, load_state_dict
+def get_canny_filter(image):
+    if not isinstance(image, np.ndarray):
+        image = np.array(image)
+    image = cv2.Canny(image, low_threshold, high_threshold)
+    image = image[:
+, :, None]
+    image = np.concatenate([image, image, image], axis=2)
+    canny_image = Image.fromarray(image)
+    return canny_image
+def get_pose(image):
+    return pose_model(image)
+def process(input_image, prompt, input_control):
+    # TODO: Add other control tasks
+    if input_control == "Scribble":
+        return process_canny(input_image, prompt)
+    elif input_control == "Pose":
+        return process_pose(input_image, prompt)
+    return process_canny(input_image, prompt)
+def process_canny(input_image, prompt):
+    canny_image = get_canny_filter(input_image)
+    output = pipe_canny(
+        prompt,
+        canny_image,
+        generator=generator,
+        num_images_per_prompt=1,
+        num_inference_steps=20,
+    )
+    return [canny_image,output.images[0]]
+def process_pose(input_image, prompt):
+    pose_image = get_pose(input_image)
+    output = pipe_pose(
+        prompt,
+        pose_image,
+        generator=generator,
+        num_images_per_prompt=1,
+        num_inference_steps=20,
+    )
+    return [pose_image,output.images[0]]
 block = gr.Blocks().queue()
 control_task_list = [
                 [
             "bird.png",
             "bird",
+            "Canny Edge Map"
         ],
+        #         [
+        #     "turtle.png",
+        #     "turtle",
+        #     "Scribble",
+        #     "best quality, extremely detailed",
+        #     'longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality',
+        #      1,
+        #     512,
+        #     20,
+        #     9.0,
+        #     123490213,
+        #     0.0,
+        #     100,
+        #     200
+        # ],
+        #           [
+        #     "pose1.png",
+        #     "Chef in the Kitchen",
+        #     "Pose",
+        #     "best quality, extremely detailed",
+        #     'longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, fewer digits, cropped, worst quality, low quality',
+        #      1,
+        #     512,
+        #     20,
+        #     9.0,
+        #     123490213,
+        #     0.0,
+        #     100,
+        #     200
+        # ]
     ]
     examples = gr.Examples(examples=examples_list,inputs = [input_image, prompt, input_control, a_prompt, n_prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold], outputs = [result_gallery], cache_examples = True, fn = process)
     gr.Markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=RamAnanth1.ControlNet)")