Spaces:

JasonSmithSO
/

FooocusEnhanced

Configuration error

App Files Files Community

FooocusEnhanced / ComfyUI-KwaiKolorsWrapper /nodes.py

JasonSmithSO

Upload 578 files

8866644 verified 4 months ago

raw

history blame contribute delete

18.6 kB

	import torch
	import os
	import random
	import re
	import gc
	import json
	import psutil
	import comfy.model_management as mm
	from comfy.utils import ProgressBar, load_torch_file

	import folder_paths

	script_directory = os.path.dirname(os.path.abspath(__file__))

	folder_paths.add_model_folder_path("llms", os.path.join(folder_paths.models_dir, "llms", "checkpoints"))

	from .kolors.pipelines.pipeline_stable_diffusion_xl_chatglm_256 import StableDiffusionXLPipeline
	from .kolors.models.modeling_chatglm import ChatGLMModel, ChatGLMConfig
	from .kolors.models.tokenization_chatglm import ChatGLMTokenizer
	from diffusers import UNet2DConditionModel
	from diffusers import (DPMSolverMultistepScheduler,
	EulerDiscreteScheduler,
	EulerAncestralDiscreteScheduler,
	DEISMultistepScheduler,
	UniPCMultistepScheduler
	)

	from contextlib import nullcontext
	try:
	from accelerate import init_empty_weights
	from accelerate.utils import set_module_tensor_to_device
	is_accelerate_available = True
	except:
	pass
	from comfy.utils import ProgressBar

	class DownloadAndLoadKolorsModel:
	@classmethod
	def INPUT_TYPES(s):
	return {"required": {
	"model": (
	[
	'Kwai-Kolors/Kolors',
	],
	),
	"precision": ([ 'fp16'],
	{
	"default": 'fp16'
	}),
	},
	}

	RETURN_TYPES = ("KOLORSMODEL",)
	RETURN_NAMES = ("kolors_model",)
	FUNCTION = "loadmodel"
	CATEGORY = "KwaiKolorsWrapper"

	def loadmodel(self, model, precision):
	device = mm.get_torch_device()
	offload_device = mm.unet_offload_device()
	dtype = {"bf16": torch.bfloat16, "fp16": torch.float16, "fp32": torch.float32}[precision]

	pbar = ProgressBar(4)

	model_name = model.rsplit('/', 1)[-1]
	model_path = os.path.join(folder_paths.models_dir, "diffusers", model_name)

	if not os.path.exists(model_path):
	print(f"Downloading Kolor model to: {model_path}")
	from huggingface_hub import snapshot_download
	snapshot_download(repo_id=model,
	allow_patterns=['fp16.safetensors', '*.json'],
	ignore_patterns=['vae/', 'text_encoder/', 'tokenizer/*'],
	local_dir=model_path,
	local_dir_use_symlinks=False)
	pbar.update(1)

	ram_rss_start = psutil.Process().memory_info().rss
	scheduler = EulerDiscreteScheduler.from_pretrained(model_path, subfolder= 'scheduler')

	print(f'Load UNET...')
	unet = UNet2DConditionModel.from_pretrained(model_path, subfolder= 'unet', variant="fp16", revision=None, low_cpu_mem_usage=True).to(dtype).eval()
	ram_rss_end = psutil.Process().memory_info().rss
	print(f'Kolors-unet: RAM allocated = {(ram_rss_end-ram_rss_start)/(102410241024):.3f}GB')
	pipeline = StableDiffusionXLPipeline(
	unet=unet,
	scheduler=scheduler,
	)

	kolors_model = {
	'pipeline': pipeline,
	'dtype': dtype
	}

	return (kolors_model,)

	class LoadChatGLM3:
	@classmethod
	def INPUT_TYPES(s):
	return {"required": {
	"chatglm3_checkpoint": (folder_paths.get_filename_list("llms"),),
	},
	}

	RETURN_TYPES = ("CHATGLM3MODEL",)
	RETURN_NAMES = ("chatglm3_model",)
	FUNCTION = "loadmodel"
	CATEGORY = "KwaiKolorsWrapper"

	def loadmodel(self, chatglm3_checkpoint):
	device=mm.get_torch_device()
	offload_device=mm.unet_offload_device()
	print(f'chatglm3: device={device}, offload_device={offload_device}')

	pbar = ProgressBar(2)
	chatglm3_path = folder_paths.get_full_path("llms", chatglm3_checkpoint)
	print("Load TEXT_ENCODER...")
	text_encoder_config = os.path.join(script_directory, 'configs', 'text_encoder_config.json')
	with open(text_encoder_config, 'r') as file:
	config = json.load(file)

	text_encoder_config = ChatGLMConfig(**config)
	with (init_empty_weights() if is_accelerate_available else nullcontext()):
	text_encoder = ChatGLMModel(text_encoder_config)
	if '4bit' in chatglm3_checkpoint:
	text_encoder.quantize(4)
	elif '8bit' in chatglm3_checkpoint:
	text_encoder.quantize(8)

	text_encoder_sd = load_torch_file(chatglm3_path)

	if is_accelerate_available:
	for key in text_encoder_sd:
	set_module_tensor_to_device(text_encoder, key, device=offload_device, value=text_encoder_sd[key])
	else:
	text_encoder.load_state_dict()

	tokenizer_path = os.path.join(script_directory,'configs',"tokenizer")
	tokenizer = ChatGLMTokenizer.from_pretrained(tokenizer_path)
	pbar.update(1)

	chatglm3_model = {
	'text_encoder': text_encoder,
	'tokenizer': tokenizer
	}

	return (chatglm3_model,)

	class DownloadAndLoadChatGLM3:
	@classmethod
	def INPUT_TYPES(s):
	return {"required": {
	"precision": ([ 'fp16', 'quant4', 'quant8'],
	{
	"default": 'fp16'
	}),
	},
	}

	RETURN_TYPES = ("CHATGLM3MODEL",)
	RETURN_NAMES = ("chatglm3_model",)
	FUNCTION = "loadmodel"
	CATEGORY = "KwaiKolorsWrapper"

	def loadmodel(self, precision):

	pbar = ProgressBar(2)
	model = "Kwai-Kolors/Kolors"
	model_name = model.rsplit('/', 1)[-1]
	model_path = os.path.join(folder_paths.models_dir, "diffusers", model_name)
	text_encoder_path = os.path.join(model_path, "text_encoder")

	if not os.path.exists(text_encoder_path):
	print(f"Downloading ChatGLM3 to: {text_encoder_path}")
	from huggingface_hub import snapshot_download
	snapshot_download(repo_id=model,
	allow_patterns=['text_encoder/*'],
	ignore_patterns=['.py', '.pyc'],
	local_dir=model_path,
	local_dir_use_symlinks=False)
	pbar.update(1)

	ram_rss_start = psutil.Process().memory_info().rss
	device = mm.get_torch_device()
	offload_device = mm.unet_offload_device()
	print(f"Load TEXT_ENCODER..., {precision}, {offload_device}")
	text_encoder = ChatGLMModel.from_pretrained(
	text_encoder_path,
	torch_dtype=torch.float16
	).to(offload_device)
	if precision == 'quant8':
	text_encoder.quantize(8)
	elif precision == 'quant4':
	text_encoder.quantize(4)
	#device_text = next(text_encoder.parameters()).device
	#print(f'chatglm3: device={device_text}, torch_device={device}, offload_device={offload_device}')

	tokenizer = ChatGLMTokenizer.from_pretrained(text_encoder_path)
	pbar.update(1)

	chatglm3_model = {
	'text_encoder': text_encoder,
	'tokenizer': tokenizer
	}
	ram_rss_end = psutil.Process().memory_info().rss
	print(f'chatglm3: RAM allocated = {(ram_rss_end-ram_rss_start)/(102410241024):.3f}GB')
	return (chatglm3_model,)

	class KolorsTextEncode:
	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"chatglm3_model": ("CHATGLM3MODEL", ),
	"prompt": ("STRING", {"multiline": True, "default": "",}),
	"negative_prompt": ("STRING", {"multiline": True, "default": "",}),
	"num_images_per_prompt": ("INT", {"default": 1, "min": 1, "max": 128, "step": 1}),
	},
	}

	RETURN_TYPES = ("KOLORS_EMBEDS",)
	RETURN_NAMES =("kolors_embeds",)
	FUNCTION = "encode"
	CATEGORY = "KwaiKolorsWrapper"

	def encode(self, chatglm3_model, prompt, negative_prompt, num_images_per_prompt):
	device = mm.get_torch_device()
	offload_device = mm.unet_offload_device()
	mm.unload_all_models()
	mm.soft_empty_cache()
	# Function to randomly select an option from the brackets
	def choose_random_option(match):
	options = match.group(1).split('\|')
	return random.choice(options)

	# Randomly choose between options in brackets for prompt and negative_prompt
	prompt = re.sub(r'\{([^{}]*)\}', choose_random_option, prompt)
	negative_prompt = re.sub(r'\{([^{}]*)\}', choose_random_option, negative_prompt)

	if "\|" in prompt:
	prompt = prompt.split("\|")
	negative_prompt = [negative_prompt] * len(prompt) # Replicate negative_prompt to match length of prompt list


	print(prompt)
	do_classifier_free_guidance = True

	if prompt is not None and isinstance(prompt, str):
	batch_size = 1
	elif prompt is not None and isinstance(prompt, list):
	batch_size = len(prompt)

	# Define tokenizers and text encoders
	tokenizer = chatglm3_model['tokenizer']
	text_encoder = chatglm3_model['text_encoder']

	text_encoder.to(device)

	text_inputs = tokenizer(
	prompt,
	padding="max_length",
	max_length=256,
	truncation=True,
	return_tensors="pt",
	).to(device)

	output = text_encoder(
	input_ids=text_inputs['input_ids'] ,
	attention_mask=text_inputs['attention_mask'],
	position_ids=text_inputs['position_ids'],
	output_hidden_states=True)

	prompt_embeds = output.hidden_states[-2].permute(1, 0, 2).clone() # [batch_size, 77, 4096]
	text_proj = output.hidden_states[-1][-1, :, :].clone() # [batch_size, 4096]
	bs_embed, seq_len, _ = prompt_embeds.shape
	prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
	prompt_embeds = prompt_embeds.view(bs_embed * num_images_per_prompt, seq_len, -1)


	if do_classifier_free_guidance:
	uncond_tokens = []
	if negative_prompt is None:
	uncond_tokens = [""] * batch_size
	elif prompt is not None and type(prompt) is not type(negative_prompt):
	raise TypeError(
	f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
	f" {type(prompt)}."
	)
	elif isinstance(negative_prompt, str):
	uncond_tokens = [negative_prompt]
	elif batch_size != len(negative_prompt):
	raise ValueError(
	f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
	f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
	" the batch size of `prompt`."
	)
	else:
	uncond_tokens = negative_prompt


	max_length = prompt_embeds.shape[1]
	uncond_input = tokenizer(
	uncond_tokens,
	padding="max_length",
	max_length=max_length,
	truncation=True,
	return_tensors="pt",
	).to(device)
	output = text_encoder(
	input_ids=uncond_input['input_ids'] ,
	attention_mask=uncond_input['attention_mask'],
	position_ids=uncond_input['position_ids'],
	output_hidden_states=True)
	negative_prompt_embeds = output.hidden_states[-2].permute(1, 0, 2).clone() # [batch_size, 77, 4096]
	negative_text_proj = output.hidden_states[-1][-1, :, :].clone() # [batch_size, 4096]

	if do_classifier_free_guidance:
	# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
	seq_len = negative_prompt_embeds.shape[1]

	negative_prompt_embeds = negative_prompt_embeds.to(dtype=text_encoder.dtype, device=device)

	negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
	negative_prompt_embeds = negative_prompt_embeds.view(
	batch_size * num_images_per_prompt, seq_len, -1
	)

	bs_embed = text_proj.shape[0]
	text_proj = text_proj.repeat(1, num_images_per_prompt).view(
	bs_embed * num_images_per_prompt, -1
	)
	negative_text_proj = negative_text_proj.repeat(1, num_images_per_prompt).view(
	bs_embed * num_images_per_prompt, -1
	)
	text_encoder.to(offload_device)
	mm.soft_empty_cache()
	gc.collect()
	kolors_embeds = {
	'prompt_embeds': prompt_embeds,
	'negative_prompt_embeds': negative_prompt_embeds,
	'pooled_prompt_embeds': text_proj,
	'negative_pooled_prompt_embeds': negative_text_proj
	}

	return (kolors_embeds,)


	class KolorsSampler:
	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"kolors_model": ("KOLORSMODEL", ),
	"kolors_embeds": ("KOLORS_EMBEDS", ),

	"width": ("INT", {"default": 1024, "min": 64, "max": 2048, "step": 64}),
	"height": ("INT", {"default": 1024, "min": 64, "max": 2048, "step": 64}),
	"seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
	"steps": ("INT", {"default": 25, "min": 1, "max": 200, "step": 1}),
	"cfg": ("FLOAT", {"default": 5.0, "min": 0.0, "max": 20.0, "step": 0.01}),

	"scheduler": (
	[
	"EulerDiscreteScheduler",
	"EulerAncestralDiscreteScheduler",
	"DPMSolverMultistepScheduler",
	"DPMSolverMultistepScheduler_SDE_karras",
	"UniPCMultistepScheduler",
	"DEISMultistepScheduler",
	],
	{
	"default": 'EulerDiscreteScheduler'
	}
	),
	},
	"optional": {
	"latent": ("LATENT", ),
	"denoise_strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.01}),
	}
	}

	RETURN_TYPES = ("LATENT",)
	RETURN_NAMES =("latent",)
	FUNCTION = "process"
	CATEGORY = "KwaiKolorsWrapper"

	def process(self, kolors_model, kolors_embeds, width, height, seed, steps, cfg, scheduler, latent=None, denoise_strength=1.0):
	device = mm.get_torch_device()
	offload_device = mm.unet_offload_device()

	vae_scaling_factor = 0.13025 #SDXL scaling factor

	mm.soft_empty_cache()
	gc.collect()

	pipeline = kolors_model['pipeline']

	scheduler_config = {
	"beta_schedule": "scaled_linear",
	"beta_start": 0.00085,
	"beta_end": 0.014,
	"dynamic_thresholding_ratio": 0.995,
	"num_train_timesteps": 1100,
	"prediction_type": "epsilon",
	"rescale_betas_zero_snr": False,
	"steps_offset": 1,
	"timestep_spacing": "leading",
	"trained_betas": None,
	}
	if scheduler == "DPMSolverMultistepScheduler":
	noise_scheduler = DPMSolverMultistepScheduler(**scheduler_config)
	elif scheduler == "DPMSolverMultistepScheduler_SDE_karras":
	scheduler_config.update({"algorithm_type": "sde-dpmsolver++"})
	scheduler_config.update({"use_karras_sigmas": True})
	noise_scheduler = DPMSolverMultistepScheduler(**scheduler_config)
	elif scheduler == "DEISMultistepScheduler":
	scheduler_config.pop("rescale_betas_zero_snr")
	noise_scheduler = DEISMultistepScheduler(**scheduler_config)
	elif scheduler == "EulerDiscreteScheduler":
	scheduler_config.update({"interpolation_type": "linear"})
	scheduler_config.pop("dynamic_thresholding_ratio")
	noise_scheduler = EulerDiscreteScheduler(**scheduler_config)
	elif scheduler == "EulerAncestralDiscreteScheduler":
	scheduler_config.pop("dynamic_thresholding_ratio")
	noise_scheduler = EulerAncestralDiscreteScheduler(**scheduler_config)
	elif scheduler == "UniPCMultistepScheduler":
	scheduler_config.pop("rescale_betas_zero_snr")
	noise_scheduler = UniPCMultistepScheduler(**scheduler_config)

	pipeline.scheduler = noise_scheduler

	generator= torch.Generator(device).manual_seed(seed)

	pipeline.unet.to(device)

	if latent is not None:
	samples_in = latent['samples']
	samples_in = samples_in * vae_scaling_factor
	samples_in = samples_in.to(pipeline.unet.dtype).to(device)

	latent_out = pipeline(
	prompt=None,
	latents=samples_in if latent is not None else None,
	prompt_embeds = kolors_embeds['prompt_embeds'],
	pooled_prompt_embeds = kolors_embeds['pooled_prompt_embeds'],
	negative_prompt_embeds = kolors_embeds['negative_prompt_embeds'],
	negative_pooled_prompt_embeds = kolors_embeds['negative_pooled_prompt_embeds'],
	height=height,
	width=width,
	num_inference_steps=steps,
	guidance_scale=cfg,
	num_images_per_prompt=1,
	generator= generator,
	strength=denoise_strength,
	).images

	pipeline.unet.to(offload_device)

	latent_out = latent_out / vae_scaling_factor

	return ({'samples': latent_out},)

	NODE_CLASS_MAPPINGS = {
	"DownloadAndLoadKolorsModel": DownloadAndLoadKolorsModel,
	"DownloadAndLoadChatGLM3": DownloadAndLoadChatGLM3,
	"KolorsSampler": KolorsSampler,
	"KolorsTextEncode": KolorsTextEncode,
	"LoadChatGLM3": LoadChatGLM3
	}
	NODE_DISPLAY_NAME_MAPPINGS = {
	"DownloadAndLoadKolorsModel": "(Down)load Kolors Model",
	"DownloadAndLoadChatGLM3": "(Down)load ChatGLM3 Model",
	"KolorsSampler": "Kolors Sampler",
	"KolorsTextEncode": "Kolors Text Encode",
	"LoadChatGLM3": "Load ChatGLM3 Model"
	}