| import torch | |
| from diffusers.utils import load_image | |
| # before merging, please import via local path | |
| from controlnet_qwenimage import QwenImageControlNetModel | |
| from transformer_qwenimage import QwenImageTransformer2DModel | |
| from pipeline_qwenimage_controlnet import QwenImageControlNetPipeline | |
| if __name__ == "__main__": | |
| base_model = "Qwen/Qwen-Image" | |
| controlnet_model = "InstantX/Qwen-Image-ControlNet-Union" | |
| controlnet = QwenImageControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16) | |
| transformer = QwenImageTransformer2DModel.from_pretrained(base_model, subfolder="transformer", torch_dtype=torch.bfloat16) | |
| pipe = QwenImageControlNetPipeline.from_pretrained( | |
| base_model, controlnet=controlnet, transformer=transformer, torch_dtype=torch.bfloat16 | |
| ) | |
| pipe.to("cuda") | |
| # canny | |
| # it is highly suggested to add 'TEXT' into prompt | |
| control_image = load_image("conds/canny.png") | |
| prompt = "Aesthetics art, traditional asian pagoda, elaborate golden accents, sky blue and white color palette, swirling cloud pattern, digital illustration, east asian architecture, ornamental rooftop, intricate detailing on building, cultural representation." | |
| controlnet_conditioning_scale = 1.0 | |
| # soft edge, recommended scale: 0.8 - 1.0 | |
| # control_image = load_image("conds/soft_edge.png") | |
| # prompt = "Photograph of a young man with light brown hair jumping mid-air off a large, reddish-brown rock. He's wearing a navy blue sweater, light blue shirt, gray pants, and brown shoes. His arms are outstretched, and he has a slight smile on his face. The background features a cloudy sky and a distant, leafless tree line. The grass around the rock is patchy." | |
| # controlnet_conditioning_scale = 0.9 | |
| # depth | |
| # control_image = load_image("conds/depth.png") | |
| # prompt = "A swanky, minimalist living room with a huge floor-to-ceiling window letting in loads of natural light. A beige couch with white cushions sits on a wooden floor, with a matching coffee table in front. The walls are a soft, warm beige, decorated with two framed botanical prints. A potted plant chills in the corner near the window. Sunlight pours through the leaves outside, casting cool shadows on the floor." | |
| # controlnet_conditioning_scale = 0.9 | |
| # pose | |
| # control_image = load_image("conds/pose.png") | |
| # prompt = "Photograph of a young man with light brown hair and a beard, wearing a beige flat cap, black leather jacket, gray shirt, brown pants, and white sneakers. He's sitting on a concrete ledge in front of a large circular window, with a cityscape reflected in the glass. The wall is cream-colored, and the sky is clear blue. His shadow is cast on the wall." | |
| # controlnet_conditioning_scale = 1.0 | |
| image = pipe( | |
| prompt=prompt, | |
| negative_prompt=" ", | |
| control_image=control_image, | |
| controlnet_conditioning_scale=controlnet_conditioning_scale, | |
| width=control_image.size[0], | |
| height=control_image.size[1], | |
| num_inference_steps=30, | |
| true_cfg_scale=4.0, | |
| generator=torch.Generator(device="cuda").manual_seed(42), | |
| ).images[0] | |
| image.save(f"qwenimage_cn_union_result.png") | 

