Spaces:
Running
on
Zero
Running
on
Zero
| from typing_extensions import Literal, TypeAlias | |
| from models.wan_video_dit import WanModel | |
| from models.wan_video_text_encoder import WanTextEncoder | |
| from models.wan_video_image_encoder import WanImageEncoder | |
| from models.wan_video_vae import WanVideoVAE, WanVideoVAE38 | |
| from models.wan_video_motion_controller import WanMotionControllerModel | |
| from models.wan_video_vace import VaceWanModel | |
| model_loader_configs = [ | |
| ( | |
| None, | |
| "9269f8db9040a9d860eaca435be61814", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "aafcfd9672c3a2456dc46e1cb6e52c70", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "6bfcfb3b342cb286ce886889d519a77e", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "6d6ccde6845b95ad9114ab993d917893", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "6bfcfb3b342cb286ce886889d519a77e", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "349723183fc063b2bfc10bb2835cf677", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "efa44cddf936c70abd0ea28b6cbe946c", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "3ef3b1f8e1dab83d5b71fd7b617f859f", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "70ddad9d3a133785da5ea371aae09504", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "26bde73488a92e64cc20b0a7485b9e5b", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "ac6a5aa74f4a0aab6f64eb9a72f19901", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "b61c605c2adbd23124d152ed28e049ae", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "1f5ab7703c6fc803fdded85ff040c316", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "5b013604280dd715f8457c6ed6d6a626", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "a61453409b67cd3246cf0c3bebad47ba", | |
| ["wan_video_dit", "wan_video_vace"], | |
| [WanModel, VaceWanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "7a513e1f257a861512b1afd387a8ecd9", | |
| ["wan_video_dit", "wan_video_vace"], | |
| [WanModel, VaceWanModel], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "cb104773c6c2cb6df4f9529ad5c60d0b", | |
| ["wan_video_dit"], | |
| [WanModel], | |
| "diffusers", | |
| ), | |
| ( | |
| None, | |
| "9c8818c2cbea55eca56c7b447df170da", | |
| ["wan_video_text_encoder"], | |
| [WanTextEncoder], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "5941c53e207d62f20f9025686193c40b", | |
| ["wan_video_image_encoder"], | |
| [WanImageEncoder], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "1378ea763357eea97acdef78e65d6d96", | |
| ["wan_video_vae"], | |
| [WanVideoVAE], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "ccc42284ea13e1ad04693284c7a09be6", | |
| ["wan_video_vae"], | |
| [WanVideoVAE], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "e1de6c02cdac79f8b739f4d3698cd216", | |
| ["wan_video_vae"], | |
| [WanVideoVAE38], | |
| "civitai", | |
| ), | |
| ( | |
| None, | |
| "dbd5ec76bbf977983f972c151d545389", | |
| ["wan_video_motion_controller"], | |
| [WanMotionControllerModel], | |
| "civitai", | |
| ), | |
| ] | |
| huggingface_model_loader_configs = [ | |
| # These configs are provided for detecting model type automatically. | |
| # The format is (architecture_in_huggingface_config, huggingface_lib, model_name, redirected_architecture) | |
| ( | |
| "ChatGLMModel", | |
| "diffsynth.models.kolors_text_encoder", | |
| "kolors_text_encoder", | |
| None, | |
| ), | |
| ("MarianMTModel", "transformers.models.marian.modeling_marian", "translator", None), | |
| ( | |
| "BloomForCausalLM", | |
| "transformers.models.bloom.modeling_bloom", | |
| "beautiful_prompt", | |
| None, | |
| ), | |
| ( | |
| "Qwen2ForCausalLM", | |
| "transformers.models.qwen2.modeling_qwen2", | |
| "qwen_prompt", | |
| None, | |
| ), | |
| # ("LlamaForCausalLM", "transformers.models.llama.modeling_llama", "omost_prompt", None), | |
| ( | |
| "T5EncoderModel", | |
| "diffsynth.models.flux_text_encoder", | |
| "flux_text_encoder_2", | |
| "FluxTextEncoder2", | |
| ), | |
| ("CogVideoXTransformer3DModel", "diffsynth.models.cog_dit", "cog_dit", "CogDiT"), | |
| ( | |
| "SiglipModel", | |
| "transformers.models.siglip.modeling_siglip", | |
| "siglip_vision_model", | |
| "SiglipVisionModel", | |
| ), | |
| ( | |
| "LlamaForCausalLM", | |
| "diffsynth.models.hunyuan_video_text_encoder", | |
| "hunyuan_video_text_encoder_2", | |
| "HunyuanVideoLLMEncoder", | |
| ), | |
| ( | |
| "LlavaForConditionalGeneration", | |
| "diffsynth.models.hunyuan_video_text_encoder", | |
| "hunyuan_video_text_encoder_2", | |
| "HunyuanVideoMLLMEncoder", | |
| ), | |
| ( | |
| "Step1Model", | |
| "diffsynth.models.stepvideo_text_encoder", | |
| "stepvideo_text_encoder_2", | |
| "STEP1TextEncoder", | |
| ), | |
| ( | |
| "Qwen2_5_VLForConditionalGeneration", | |
| "diffsynth.models.qwenvl", | |
| "qwenvl", | |
| "Qwen25VL_7b_Embedder", | |
| ), | |
| ] | |
| patch_model_loader_configs = [ | |
| # These configs are provided for detecting model type automatically. | |
| # The format is (state_dict_keys_hash_with_shape, model_name, model_class, extra_kwargs) | |
| ] | |
| preset_models_on_huggingface = { | |
| "HunyuanDiT": [ | |
| ( | |
| "Tencent-Hunyuan/HunyuanDiT", | |
| "t2i/clip_text_encoder/pytorch_model.bin", | |
| "models/HunyuanDiT/t2i/clip_text_encoder", | |
| ), | |
| ( | |
| "Tencent-Hunyuan/HunyuanDiT", | |
| "t2i/mt5/pytorch_model.bin", | |
| "models/HunyuanDiT/t2i/mt5", | |
| ), | |
| ( | |
| "Tencent-Hunyuan/HunyuanDiT", | |
| "t2i/model/pytorch_model_ema.pt", | |
| "models/HunyuanDiT/t2i/model", | |
| ), | |
| ( | |
| "Tencent-Hunyuan/HunyuanDiT", | |
| "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", | |
| "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix", | |
| ), | |
| ], | |
| "stable-video-diffusion-img2vid-xt": [ | |
| ( | |
| "stabilityai/stable-video-diffusion-img2vid-xt", | |
| "svd_xt.safetensors", | |
| "models/stable_video_diffusion", | |
| ), | |
| ], | |
| "ExVideo-SVD-128f-v1": [ | |
| ( | |
| "ECNU-CILab/ExVideo-SVD-128f-v1", | |
| "model.fp16.safetensors", | |
| "models/stable_video_diffusion", | |
| ), | |
| ], | |
| # Stable Diffusion | |
| "StableDiffusion_v15": [ | |
| ( | |
| "benjamin-paine/stable-diffusion-v1-5", | |
| "v1-5-pruned-emaonly.safetensors", | |
| "models/stable_diffusion", | |
| ), | |
| ], | |
| "DreamShaper_8": [ | |
| ("Yntec/Dreamshaper8", "dreamshaper_8.safetensors", "models/stable_diffusion"), | |
| ], | |
| # Textual Inversion | |
| "TextualInversion_VeryBadImageNegative_v1.3": [ | |
| ( | |
| "gemasai/verybadimagenegative_v1.3", | |
| "verybadimagenegative_v1.3.pt", | |
| "models/textual_inversion", | |
| ), | |
| ], | |
| # Stable Diffusion XL | |
| "StableDiffusionXL_v1": [ | |
| ( | |
| "stabilityai/stable-diffusion-xl-base-1.0", | |
| "sd_xl_base_1.0.safetensors", | |
| "models/stable_diffusion_xl", | |
| ), | |
| ], | |
| "BluePencilXL_v200": [ | |
| ( | |
| "frankjoshua/bluePencilXL_v200", | |
| "bluePencilXL_v200.safetensors", | |
| "models/stable_diffusion_xl", | |
| ), | |
| ], | |
| "StableDiffusionXL_Turbo": [ | |
| ( | |
| "stabilityai/sdxl-turbo", | |
| "sd_xl_turbo_1.0_fp16.safetensors", | |
| "models/stable_diffusion_xl_turbo", | |
| ), | |
| ], | |
| # Stable Diffusion 3 | |
| "StableDiffusion3": [ | |
| ( | |
| "stabilityai/stable-diffusion-3-medium", | |
| "sd3_medium_incl_clips_t5xxlfp16.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ], | |
| "StableDiffusion3_without_T5": [ | |
| ( | |
| "stabilityai/stable-diffusion-3-medium", | |
| "sd3_medium_incl_clips.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ], | |
| # ControlNet | |
| "ControlNet_v11f1p_sd15_depth": [ | |
| ( | |
| "lllyasviel/ControlNet-v1-1", | |
| "control_v11f1p_sd15_depth.pth", | |
| "models/ControlNet", | |
| ), | |
| ("lllyasviel/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators"), | |
| ], | |
| "ControlNet_v11p_sd15_softedge": [ | |
| ( | |
| "lllyasviel/ControlNet-v1-1", | |
| "control_v11p_sd15_softedge.pth", | |
| "models/ControlNet", | |
| ), | |
| ("lllyasviel/Annotators", "ControlNetHED.pth", "models/Annotators"), | |
| ], | |
| "ControlNet_v11f1e_sd15_tile": [ | |
| ( | |
| "lllyasviel/ControlNet-v1-1", | |
| "control_v11f1e_sd15_tile.pth", | |
| "models/ControlNet", | |
| ) | |
| ], | |
| "ControlNet_v11p_sd15_lineart": [ | |
| ( | |
| "lllyasviel/ControlNet-v1-1", | |
| "control_v11p_sd15_lineart.pth", | |
| "models/ControlNet", | |
| ), | |
| ("lllyasviel/Annotators", "sk_model.pth", "models/Annotators"), | |
| ("lllyasviel/Annotators", "sk_model2.pth", "models/Annotators"), | |
| ], | |
| "ControlNet_union_sdxl_promax": [ | |
| ( | |
| "xinsir/controlnet-union-sdxl-1.0", | |
| "diffusion_pytorch_model_promax.safetensors", | |
| "models/ControlNet/controlnet_union", | |
| ), | |
| ("lllyasviel/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators"), | |
| ], | |
| # AnimateDiff | |
| "AnimateDiff_v2": [ | |
| ("guoyww/animatediff", "mm_sd_v15_v2.ckpt", "models/AnimateDiff"), | |
| ], | |
| "AnimateDiff_xl_beta": [ | |
| ("guoyww/animatediff", "mm_sdxl_v10_beta.ckpt", "models/AnimateDiff"), | |
| ], | |
| # Qwen Prompt | |
| "QwenPrompt": [ | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "config.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "generation_config.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "model.safetensors", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "special_tokens_map.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "tokenizer.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "tokenizer_config.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "merges.txt", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "Qwen/Qwen2-1.5B-Instruct", | |
| "vocab.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ], | |
| # Beautiful Prompt | |
| "BeautifulPrompt": [ | |
| ( | |
| "alibaba-pai/pai-bloom-1b1-text2prompt-sd", | |
| "config.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "alibaba-pai/pai-bloom-1b1-text2prompt-sd", | |
| "generation_config.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "alibaba-pai/pai-bloom-1b1-text2prompt-sd", | |
| "model.safetensors", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "alibaba-pai/pai-bloom-1b1-text2prompt-sd", | |
| "special_tokens_map.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "alibaba-pai/pai-bloom-1b1-text2prompt-sd", | |
| "tokenizer.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "alibaba-pai/pai-bloom-1b1-text2prompt-sd", | |
| "tokenizer_config.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ], | |
| # Omost prompt | |
| "OmostPrompt": [ | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "model-00001-of-00002.safetensors", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "model-00002-of-00002.safetensors", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "tokenizer.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "tokenizer_config.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "config.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "generation_config.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "model.safetensors.index.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "lllyasviel/omost-llama-3-8b-4bits", | |
| "special_tokens_map.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ], | |
| # Translator | |
| "opus-mt-zh-en": [ | |
| ( | |
| "Helsinki-NLP/opus-mt-zh-en", | |
| "config.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ( | |
| "Helsinki-NLP/opus-mt-zh-en", | |
| "generation_config.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ( | |
| "Helsinki-NLP/opus-mt-zh-en", | |
| "metadata.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ( | |
| "Helsinki-NLP/opus-mt-zh-en", | |
| "pytorch_model.bin", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ("Helsinki-NLP/opus-mt-zh-en", "source.spm", "models/translator/opus-mt-zh-en"), | |
| ("Helsinki-NLP/opus-mt-zh-en", "target.spm", "models/translator/opus-mt-zh-en"), | |
| ( | |
| "Helsinki-NLP/opus-mt-zh-en", | |
| "tokenizer_config.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ("Helsinki-NLP/opus-mt-zh-en", "vocab.json", "models/translator/opus-mt-zh-en"), | |
| ], | |
| # IP-Adapter | |
| "IP-Adapter-SD": [ | |
| ( | |
| "h94/IP-Adapter", | |
| "models/image_encoder/model.safetensors", | |
| "models/IpAdapter/stable_diffusion/image_encoder", | |
| ), | |
| ( | |
| "h94/IP-Adapter", | |
| "models/ip-adapter_sd15.bin", | |
| "models/IpAdapter/stable_diffusion", | |
| ), | |
| ], | |
| "IP-Adapter-SDXL": [ | |
| ( | |
| "h94/IP-Adapter", | |
| "sdxl_models/image_encoder/model.safetensors", | |
| "models/IpAdapter/stable_diffusion_xl/image_encoder", | |
| ), | |
| ( | |
| "h94/IP-Adapter", | |
| "sdxl_models/ip-adapter_sdxl.bin", | |
| "models/IpAdapter/stable_diffusion_xl", | |
| ), | |
| ], | |
| "SDXL-vae-fp16-fix": [ | |
| ( | |
| "madebyollin/sdxl-vae-fp16-fix", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/sdxl-vae-fp16-fix", | |
| ) | |
| ], | |
| # Kolors | |
| "Kolors": [ | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/config.json", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model.bin.index.json", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00001-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00002-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00003-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00004-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00005-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00006-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00007-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "unet/diffusion_pytorch_model.safetensors", | |
| "models/kolors/Kolors/unet", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "vae/diffusion_pytorch_model.safetensors", | |
| "models/kolors/Kolors/vae", | |
| ), | |
| ], | |
| # FLUX | |
| "FLUX.1-dev": [ | |
| ( | |
| "black-forest-labs/FLUX.1-dev", | |
| "text_encoder/model.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder", | |
| ), | |
| ( | |
| "black-forest-labs/FLUX.1-dev", | |
| "text_encoder_2/config.json", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "black-forest-labs/FLUX.1-dev", | |
| "text_encoder_2/model-00001-of-00002.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "black-forest-labs/FLUX.1-dev", | |
| "text_encoder_2/model-00002-of-00002.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "black-forest-labs/FLUX.1-dev", | |
| "text_encoder_2/model.safetensors.index.json", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ("black-forest-labs/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"), | |
| ( | |
| "black-forest-labs/FLUX.1-dev", | |
| "flux1-dev.safetensors", | |
| "models/FLUX/FLUX.1-dev", | |
| ), | |
| ], | |
| "InstantX/FLUX.1-dev-IP-Adapter": { | |
| "file_list": [ | |
| ( | |
| "InstantX/FLUX.1-dev-IP-Adapter", | |
| "ip-adapter.bin", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter", | |
| ), | |
| ( | |
| "google/siglip-so400m-patch14-384", | |
| "model.safetensors", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", | |
| ), | |
| ( | |
| "google/siglip-so400m-patch14-384", | |
| "config.json", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/ip-adapter.bin", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", | |
| ], | |
| }, | |
| # RIFE | |
| "RIFE": [ | |
| ("AlexWortega/RIFE", "flownet.pkl", "models/RIFE"), | |
| ], | |
| # CogVideo | |
| "CogVideoX-5B": [ | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "text_encoder/config.json", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "text_encoder/model.safetensors.index.json", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "text_encoder/model-00001-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "text_encoder/model-00002-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "transformer/config.json", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "transformer/diffusion_pytorch_model.safetensors.index.json", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "transformer/diffusion_pytorch_model-00001-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "transformer/diffusion_pytorch_model-00002-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "THUDM/CogVideoX-5b", | |
| "vae/diffusion_pytorch_model.safetensors", | |
| "models/CogVideo/CogVideoX-5b/vae", | |
| ), | |
| ], | |
| # Stable Diffusion 3.5 | |
| "StableDiffusion3.5-large": [ | |
| ( | |
| "stabilityai/stable-diffusion-3.5-large", | |
| "sd3.5_large.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ( | |
| "stabilityai/stable-diffusion-3.5-large", | |
| "text_encoders/clip_l.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "stabilityai/stable-diffusion-3.5-large", | |
| "text_encoders/clip_g.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "stabilityai/stable-diffusion-3.5-large", | |
| "text_encoders/t5xxl_fp16.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ], | |
| } | |
| preset_models_on_modelscope = { | |
| # Hunyuan DiT | |
| "HunyuanDiT": [ | |
| ( | |
| "modelscope/HunyuanDiT", | |
| "t2i/clip_text_encoder/pytorch_model.bin", | |
| "models/HunyuanDiT/t2i/clip_text_encoder", | |
| ), | |
| ( | |
| "modelscope/HunyuanDiT", | |
| "t2i/mt5/pytorch_model.bin", | |
| "models/HunyuanDiT/t2i/mt5", | |
| ), | |
| ( | |
| "modelscope/HunyuanDiT", | |
| "t2i/model/pytorch_model_ema.pt", | |
| "models/HunyuanDiT/t2i/model", | |
| ), | |
| ( | |
| "modelscope/HunyuanDiT", | |
| "t2i/sdxl-vae-fp16-fix/diffusion_pytorch_model.bin", | |
| "models/HunyuanDiT/t2i/sdxl-vae-fp16-fix", | |
| ), | |
| ], | |
| # Stable Video Diffusion | |
| "stable-video-diffusion-img2vid-xt": [ | |
| ( | |
| "AI-ModelScope/stable-video-diffusion-img2vid-xt", | |
| "svd_xt.safetensors", | |
| "models/stable_video_diffusion", | |
| ), | |
| ], | |
| # ExVideo | |
| "ExVideo-SVD-128f-v1": [ | |
| ( | |
| "ECNU-CILab/ExVideo-SVD-128f-v1", | |
| "model.fp16.safetensors", | |
| "models/stable_video_diffusion", | |
| ), | |
| ], | |
| "ExVideo-CogVideoX-LoRA-129f-v1": [ | |
| ( | |
| "ECNU-CILab/ExVideo-CogVideoX-LoRA-129f-v1", | |
| "ExVideo-CogVideoX-LoRA-129f-v1.safetensors", | |
| "models/lora", | |
| ), | |
| ], | |
| # Stable Diffusion | |
| "StableDiffusion_v15": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-v1-5", | |
| "v1-5-pruned-emaonly.safetensors", | |
| "models/stable_diffusion", | |
| ), | |
| ], | |
| "DreamShaper_8": [ | |
| ( | |
| "sd_lora/dreamshaper_8", | |
| "dreamshaper_8.safetensors", | |
| "models/stable_diffusion", | |
| ), | |
| ], | |
| "AingDiffusion_v12": [ | |
| ( | |
| "sd_lora/aingdiffusion_v12", | |
| "aingdiffusion_v12.safetensors", | |
| "models/stable_diffusion", | |
| ), | |
| ], | |
| "Flat2DAnimerge_v45Sharp": [ | |
| ( | |
| "sd_lora/Flat-2D-Animerge", | |
| "flat2DAnimerge_v45Sharp.safetensors", | |
| "models/stable_diffusion", | |
| ), | |
| ], | |
| # Textual Inversion | |
| "TextualInversion_VeryBadImageNegative_v1.3": [ | |
| ( | |
| "sd_lora/verybadimagenegative_v1.3", | |
| "verybadimagenegative_v1.3.pt", | |
| "models/textual_inversion", | |
| ), | |
| ], | |
| # Stable Diffusion XL | |
| "StableDiffusionXL_v1": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-xl-base-1.0", | |
| "sd_xl_base_1.0.safetensors", | |
| "models/stable_diffusion_xl", | |
| ), | |
| ], | |
| "BluePencilXL_v200": [ | |
| ( | |
| "sd_lora/bluePencilXL_v200", | |
| "bluePencilXL_v200.safetensors", | |
| "models/stable_diffusion_xl", | |
| ), | |
| ], | |
| "StableDiffusionXL_Turbo": [ | |
| ( | |
| "AI-ModelScope/sdxl-turbo", | |
| "sd_xl_turbo_1.0_fp16.safetensors", | |
| "models/stable_diffusion_xl_turbo", | |
| ), | |
| ], | |
| "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0": [ | |
| ( | |
| "sd_lora/zyd232_ChineseInkStyle_SDXL_v1_0", | |
| "zyd232_ChineseInkStyle_SDXL_v1_0.safetensors", | |
| "models/lora", | |
| ), | |
| ], | |
| # Stable Diffusion 3 | |
| "StableDiffusion3": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-3-medium", | |
| "sd3_medium_incl_clips_t5xxlfp16.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ], | |
| "StableDiffusion3_without_T5": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-3-medium", | |
| "sd3_medium_incl_clips.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ], | |
| # ControlNet | |
| "ControlNet_v11f1p_sd15_depth": [ | |
| ( | |
| "AI-ModelScope/ControlNet-v1-1", | |
| "control_v11f1p_sd15_depth.pth", | |
| "models/ControlNet", | |
| ), | |
| ("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators"), | |
| ], | |
| "ControlNet_v11p_sd15_softedge": [ | |
| ( | |
| "AI-ModelScope/ControlNet-v1-1", | |
| "control_v11p_sd15_softedge.pth", | |
| "models/ControlNet", | |
| ), | |
| ("sd_lora/Annotators", "ControlNetHED.pth", "models/Annotators"), | |
| ], | |
| "ControlNet_v11f1e_sd15_tile": [ | |
| ( | |
| "AI-ModelScope/ControlNet-v1-1", | |
| "control_v11f1e_sd15_tile.pth", | |
| "models/ControlNet", | |
| ) | |
| ], | |
| "ControlNet_v11p_sd15_lineart": [ | |
| ( | |
| "AI-ModelScope/ControlNet-v1-1", | |
| "control_v11p_sd15_lineart.pth", | |
| "models/ControlNet", | |
| ), | |
| ("sd_lora/Annotators", "sk_model.pth", "models/Annotators"), | |
| ("sd_lora/Annotators", "sk_model2.pth", "models/Annotators"), | |
| ], | |
| "ControlNet_union_sdxl_promax": [ | |
| ( | |
| "AI-ModelScope/controlnet-union-sdxl-1.0", | |
| "diffusion_pytorch_model_promax.safetensors", | |
| "models/ControlNet/controlnet_union", | |
| ), | |
| ("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators"), | |
| ], | |
| "Annotators:Depth": [ | |
| ("sd_lora/Annotators", "dpt_hybrid-midas-501f0c75.pt", "models/Annotators"), | |
| ], | |
| "Annotators:Softedge": [ | |
| ("sd_lora/Annotators", "ControlNetHED.pth", "models/Annotators"), | |
| ], | |
| "Annotators:Lineart": [ | |
| ("sd_lora/Annotators", "sk_model.pth", "models/Annotators"), | |
| ("sd_lora/Annotators", "sk_model2.pth", "models/Annotators"), | |
| ], | |
| "Annotators:Normal": [ | |
| ("sd_lora/Annotators", "scannet.pt", "models/Annotators"), | |
| ], | |
| "Annotators:Openpose": [ | |
| ("sd_lora/Annotators", "body_pose_model.pth", "models/Annotators"), | |
| ("sd_lora/Annotators", "facenet.pth", "models/Annotators"), | |
| ("sd_lora/Annotators", "hand_pose_model.pth", "models/Annotators"), | |
| ], | |
| # AnimateDiff | |
| "AnimateDiff_v2": [ | |
| ( | |
| "Shanghai_AI_Laboratory/animatediff", | |
| "mm_sd_v15_v2.ckpt", | |
| "models/AnimateDiff", | |
| ), | |
| ], | |
| "AnimateDiff_xl_beta": [ | |
| ( | |
| "Shanghai_AI_Laboratory/animatediff", | |
| "mm_sdxl_v10_beta.ckpt", | |
| "models/AnimateDiff", | |
| ), | |
| ], | |
| # RIFE | |
| "RIFE": [ | |
| ("Damo_XR_Lab/cv_rife_video-frame-interpolation", "flownet.pkl", "models/RIFE"), | |
| ], | |
| # Qwen Prompt | |
| "QwenPrompt": { | |
| "file_list": [ | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "config.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "generation_config.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "model.safetensors", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "special_tokens_map.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "tokenizer.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "tokenizer_config.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "merges.txt", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ( | |
| "qwen/Qwen2-1.5B-Instruct", | |
| "vocab.json", | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/QwenPrompt/qwen2-1.5b-instruct", | |
| ], | |
| }, | |
| # Beautiful Prompt | |
| "BeautifulPrompt": { | |
| "file_list": [ | |
| ( | |
| "AI-ModelScope/pai-bloom-1b1-text2prompt-sd", | |
| "config.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "AI-ModelScope/pai-bloom-1b1-text2prompt-sd", | |
| "generation_config.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "AI-ModelScope/pai-bloom-1b1-text2prompt-sd", | |
| "model.safetensors", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "AI-ModelScope/pai-bloom-1b1-text2prompt-sd", | |
| "special_tokens_map.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "AI-ModelScope/pai-bloom-1b1-text2prompt-sd", | |
| "tokenizer.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ( | |
| "AI-ModelScope/pai-bloom-1b1-text2prompt-sd", | |
| "tokenizer_config.json", | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/BeautifulPrompt/pai-bloom-1b1-text2prompt-sd", | |
| ], | |
| }, | |
| # Omost prompt | |
| "OmostPrompt": { | |
| "file_list": [ | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "model-00001-of-00002.safetensors", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "model-00002-of-00002.safetensors", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "tokenizer.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "tokenizer_config.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "config.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "generation_config.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "model.safetensors.index.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ( | |
| "Omost/omost-llama-3-8b-4bits", | |
| "special_tokens_map.json", | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/OmostPrompt/omost-llama-3-8b-4bits", | |
| ], | |
| }, | |
| # Translator | |
| "opus-mt-zh-en": { | |
| "file_list": [ | |
| ("moxying/opus-mt-zh-en", "config.json", "models/translator/opus-mt-zh-en"), | |
| ( | |
| "moxying/opus-mt-zh-en", | |
| "generation_config.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ( | |
| "moxying/opus-mt-zh-en", | |
| "metadata.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ( | |
| "moxying/opus-mt-zh-en", | |
| "pytorch_model.bin", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ("moxying/opus-mt-zh-en", "source.spm", "models/translator/opus-mt-zh-en"), | |
| ("moxying/opus-mt-zh-en", "target.spm", "models/translator/opus-mt-zh-en"), | |
| ( | |
| "moxying/opus-mt-zh-en", | |
| "tokenizer_config.json", | |
| "models/translator/opus-mt-zh-en", | |
| ), | |
| ("moxying/opus-mt-zh-en", "vocab.json", "models/translator/opus-mt-zh-en"), | |
| ], | |
| "load_path": [ | |
| "models/translator/opus-mt-zh-en", | |
| ], | |
| }, | |
| # IP-Adapter | |
| "IP-Adapter-SD": [ | |
| ( | |
| "AI-ModelScope/IP-Adapter", | |
| "models/image_encoder/model.safetensors", | |
| "models/IpAdapter/stable_diffusion/image_encoder", | |
| ), | |
| ( | |
| "AI-ModelScope/IP-Adapter", | |
| "models/ip-adapter_sd15.bin", | |
| "models/IpAdapter/stable_diffusion", | |
| ), | |
| ], | |
| "IP-Adapter-SDXL": [ | |
| ( | |
| "AI-ModelScope/IP-Adapter", | |
| "sdxl_models/image_encoder/model.safetensors", | |
| "models/IpAdapter/stable_diffusion_xl/image_encoder", | |
| ), | |
| ( | |
| "AI-ModelScope/IP-Adapter", | |
| "sdxl_models/ip-adapter_sdxl.bin", | |
| "models/IpAdapter/stable_diffusion_xl", | |
| ), | |
| ], | |
| # Kolors | |
| "Kolors": { | |
| "file_list": [ | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/config.json", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model.bin.index.json", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00001-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00002-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00003-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00004-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00005-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00006-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "text_encoder/pytorch_model-00007-of-00007.bin", | |
| "models/kolors/Kolors/text_encoder", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "unet/diffusion_pytorch_model.safetensors", | |
| "models/kolors/Kolors/unet", | |
| ), | |
| ( | |
| "Kwai-Kolors/Kolors", | |
| "vae/diffusion_pytorch_model.safetensors", | |
| "models/kolors/Kolors/vae", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/kolors/Kolors/text_encoder", | |
| "models/kolors/Kolors/unet/diffusion_pytorch_model.safetensors", | |
| "models/kolors/Kolors/vae/diffusion_pytorch_model.safetensors", | |
| ], | |
| }, | |
| "SDXL-vae-fp16-fix": [ | |
| ( | |
| "AI-ModelScope/sdxl-vae-fp16-fix", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/sdxl-vae-fp16-fix", | |
| ) | |
| ], | |
| # FLUX | |
| "FLUX.1-dev": { | |
| "file_list": [ | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder/model.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/config.json", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/model-00001-of-00002.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/model-00002-of-00002.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/model.safetensors.index.json", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ("AI-ModelScope/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "flux1-dev.safetensors", | |
| "models/FLUX/FLUX.1-dev", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| "models/FLUX/FLUX.1-dev/ae.safetensors", | |
| "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", | |
| ], | |
| }, | |
| "FLUX.1-schnell": { | |
| "file_list": [ | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder/model.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/config.json", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/model-00001-of-00002.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/model-00002-of-00002.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/FLUX.1-dev", | |
| "text_encoder_2/model.safetensors.index.json", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| ), | |
| ("AI-ModelScope/FLUX.1-dev", "ae.safetensors", "models/FLUX/FLUX.1-dev"), | |
| ( | |
| "AI-ModelScope/FLUX.1-schnell", | |
| "flux1-schnell.safetensors", | |
| "models/FLUX/FLUX.1-schnell", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", | |
| "models/FLUX/FLUX.1-dev/text_encoder_2", | |
| "models/FLUX/FLUX.1-dev/ae.safetensors", | |
| "models/FLUX/FLUX.1-schnell/flux1-schnell.safetensors", | |
| ], | |
| }, | |
| "InstantX/FLUX.1-dev-Controlnet-Union-alpha": [ | |
| ( | |
| "InstantX/FLUX.1-dev-Controlnet-Union-alpha", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/InstantX/FLUX.1-dev-Controlnet-Union-alpha", | |
| ), | |
| ], | |
| "jasperai/Flux.1-dev-Controlnet-Depth": [ | |
| ( | |
| "jasperai/Flux.1-dev-Controlnet-Depth", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Depth", | |
| ), | |
| ], | |
| "jasperai/Flux.1-dev-Controlnet-Surface-Normals": [ | |
| ( | |
| "jasperai/Flux.1-dev-Controlnet-Surface-Normals", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Surface-Normals", | |
| ), | |
| ], | |
| "jasperai/Flux.1-dev-Controlnet-Upscaler": [ | |
| ( | |
| "jasperai/Flux.1-dev-Controlnet-Upscaler", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Upscaler", | |
| ), | |
| ], | |
| "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha": [ | |
| ( | |
| "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha", | |
| ), | |
| ], | |
| "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta": [ | |
| ( | |
| "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta", | |
| ), | |
| ], | |
| "Shakker-Labs/FLUX.1-dev-ControlNet-Depth": [ | |
| ( | |
| "Shakker-Labs/FLUX.1-dev-ControlNet-Depth", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/Shakker-Labs/FLUX.1-dev-ControlNet-Depth", | |
| ), | |
| ], | |
| "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro": [ | |
| ( | |
| "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro", | |
| "diffusion_pytorch_model.safetensors", | |
| "models/ControlNet/Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro", | |
| ), | |
| ], | |
| "InstantX/FLUX.1-dev-IP-Adapter": { | |
| "file_list": [ | |
| ( | |
| "InstantX/FLUX.1-dev-IP-Adapter", | |
| "ip-adapter.bin", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter", | |
| ), | |
| ( | |
| "AI-ModelScope/siglip-so400m-patch14-384", | |
| "model.safetensors", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", | |
| ), | |
| ( | |
| "AI-ModelScope/siglip-so400m-patch14-384", | |
| "config.json", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/ip-adapter.bin", | |
| "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", | |
| ], | |
| }, | |
| "InfiniteYou": { | |
| "file_list": [ | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "infu_flux_v1.0/aes_stage2/InfuseNetModel/diffusion_pytorch_model-00001-of-00002.safetensors", | |
| "models/InfiniteYou/InfuseNetModel", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "infu_flux_v1.0/aes_stage2/InfuseNetModel/diffusion_pytorch_model-00002-of-00002.safetensors", | |
| "models/InfiniteYou/InfuseNetModel", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "infu_flux_v1.0/aes_stage2/image_proj_model.bin", | |
| "models/InfiniteYou", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "supports/insightface/models/antelopev2/1k3d68.onnx", | |
| "models/InfiniteYou/insightface/models/antelopev2", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "supports/insightface/models/antelopev2/2d106det.onnx", | |
| "models/InfiniteYou/insightface/models/antelopev2", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "supports/insightface/models/antelopev2/genderage.onnx", | |
| "models/InfiniteYou/insightface/models/antelopev2", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "supports/insightface/models/antelopev2/glintr100.onnx", | |
| "models/InfiniteYou/insightface/models/antelopev2", | |
| ), | |
| ( | |
| "ByteDance/InfiniteYou", | |
| "supports/insightface/models/antelopev2/scrfd_10g_bnkps.onnx", | |
| "models/InfiniteYou/insightface/models/antelopev2", | |
| ), | |
| ], | |
| "load_path": [ | |
| [ | |
| "models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00001-of-00002.safetensors", | |
| "models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00002-of-00002.safetensors", | |
| ], | |
| "models/InfiniteYou/image_proj_model.bin", | |
| ], | |
| }, | |
| # ESRGAN | |
| "ESRGAN_x4": [ | |
| ("AI-ModelScope/Real-ESRGAN", "RealESRGAN_x4.pth", "models/ESRGAN"), | |
| ], | |
| # RIFE | |
| "RIFE": [ | |
| ("AI-ModelScope/RIFE", "flownet.pkl", "models/RIFE"), | |
| ], | |
| # Omnigen | |
| "OmniGen-v1": { | |
| "file_list": [ | |
| ( | |
| "BAAI/OmniGen-v1", | |
| "vae/diffusion_pytorch_model.safetensors", | |
| "models/OmniGen/OmniGen-v1/vae", | |
| ), | |
| ("BAAI/OmniGen-v1", "model.safetensors", "models/OmniGen/OmniGen-v1"), | |
| ("BAAI/OmniGen-v1", "config.json", "models/OmniGen/OmniGen-v1"), | |
| ("BAAI/OmniGen-v1", "special_tokens_map.json", "models/OmniGen/OmniGen-v1"), | |
| ("BAAI/OmniGen-v1", "tokenizer_config.json", "models/OmniGen/OmniGen-v1"), | |
| ("BAAI/OmniGen-v1", "tokenizer.json", "models/OmniGen/OmniGen-v1"), | |
| ], | |
| "load_path": [ | |
| "models/OmniGen/OmniGen-v1/vae/diffusion_pytorch_model.safetensors", | |
| "models/OmniGen/OmniGen-v1/model.safetensors", | |
| ], | |
| }, | |
| # CogVideo | |
| "CogVideoX-5B": { | |
| "file_list": [ | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "text_encoder/config.json", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "text_encoder/model.safetensors.index.json", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "text_encoder/model-00001-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "text_encoder/model-00002-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "transformer/config.json", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "transformer/diffusion_pytorch_model.safetensors.index.json", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "transformer/diffusion_pytorch_model-00001-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "transformer/diffusion_pytorch_model-00002-of-00002.safetensors", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| ), | |
| ( | |
| "ZhipuAI/CogVideoX-5b", | |
| "vae/diffusion_pytorch_model.safetensors", | |
| "models/CogVideo/CogVideoX-5b/vae", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/CogVideo/CogVideoX-5b/text_encoder", | |
| "models/CogVideo/CogVideoX-5b/transformer", | |
| "models/CogVideo/CogVideoX-5b/vae/diffusion_pytorch_model.safetensors", | |
| ], | |
| }, | |
| # Stable Diffusion 3.5 | |
| "StableDiffusion3.5-large": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "sd3.5_large.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/clip_l.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/clip_g.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/t5xxl_fp16.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ], | |
| "StableDiffusion3.5-medium": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-medium", | |
| "sd3.5_medium.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/clip_l.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/clip_g.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/t5xxl_fp16.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ], | |
| "StableDiffusion3.5-large-turbo": [ | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large-turbo", | |
| "sd3.5_large_turbo.safetensors", | |
| "models/stable_diffusion_3", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/clip_l.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/clip_g.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ( | |
| "AI-ModelScope/stable-diffusion-3.5-large", | |
| "text_encoders/t5xxl_fp16.safetensors", | |
| "models/stable_diffusion_3/text_encoders", | |
| ), | |
| ], | |
| "HunyuanVideo": { | |
| "file_list": [ | |
| ( | |
| "AI-ModelScope/clip-vit-large-patch14", | |
| "model.safetensors", | |
| "models/HunyuanVideo/text_encoder", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00001-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00002-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00003-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00004-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "config.json", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model.safetensors.index.json", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/HunyuanVideo", | |
| "hunyuan-video-t2v-720p/vae/pytorch_model.pt", | |
| "models/HunyuanVideo/vae", | |
| ), | |
| ( | |
| "AI-ModelScope/HunyuanVideo", | |
| "hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt", | |
| "models/HunyuanVideo/transformers", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/HunyuanVideo/text_encoder/model.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| "models/HunyuanVideo/vae/pytorch_model.pt", | |
| "models/HunyuanVideo/transformers/mp_rank_00_model_states.pt", | |
| ], | |
| }, | |
| "HunyuanVideoI2V": { | |
| "file_list": [ | |
| ( | |
| "AI-ModelScope/clip-vit-large-patch14", | |
| "model.safetensors", | |
| "models/HunyuanVideoI2V/text_encoder", | |
| ), | |
| ( | |
| "AI-ModelScope/llava-llama-3-8b-v1_1-transformers", | |
| "model-00001-of-00004.safetensors", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/llava-llama-3-8b-v1_1-transformers", | |
| "model-00002-of-00004.safetensors", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/llava-llama-3-8b-v1_1-transformers", | |
| "model-00003-of-00004.safetensors", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/llava-llama-3-8b-v1_1-transformers", | |
| "model-00004-of-00004.safetensors", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/llava-llama-3-8b-v1_1-transformers", | |
| "config.json", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/llava-llama-3-8b-v1_1-transformers", | |
| "model.safetensors.index.json", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/HunyuanVideo-I2V", | |
| "hunyuan-video-i2v-720p/vae/pytorch_model.pt", | |
| "models/HunyuanVideoI2V/vae", | |
| ), | |
| ( | |
| "AI-ModelScope/HunyuanVideo-I2V", | |
| "hunyuan-video-i2v-720p/transformers/mp_rank_00_model_states.pt", | |
| "models/HunyuanVideoI2V/transformers", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/HunyuanVideoI2V/text_encoder/model.safetensors", | |
| "models/HunyuanVideoI2V/text_encoder_2", | |
| "models/HunyuanVideoI2V/vae/pytorch_model.pt", | |
| "models/HunyuanVideoI2V/transformers/mp_rank_00_model_states.pt", | |
| ], | |
| }, | |
| "HunyuanVideo-fp8": { | |
| "file_list": [ | |
| ( | |
| "AI-ModelScope/clip-vit-large-patch14", | |
| "model.safetensors", | |
| "models/HunyuanVideo/text_encoder", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00001-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00002-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00003-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model-00004-of-00004.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "config.json", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo_MLLM_text_encoder", | |
| "model.safetensors.index.json", | |
| "models/HunyuanVideo/text_encoder_2", | |
| ), | |
| ( | |
| "AI-ModelScope/HunyuanVideo", | |
| "hunyuan-video-t2v-720p/vae/pytorch_model.pt", | |
| "models/HunyuanVideo/vae", | |
| ), | |
| ( | |
| "DiffSynth-Studio/HunyuanVideo-safetensors", | |
| "model.fp8.safetensors", | |
| "models/HunyuanVideo/transformers", | |
| ), | |
| ], | |
| "load_path": [ | |
| "models/HunyuanVideo/text_encoder/model.safetensors", | |
| "models/HunyuanVideo/text_encoder_2", | |
| "models/HunyuanVideo/vae/pytorch_model.pt", | |
| "models/HunyuanVideo/transformers/model.fp8.safetensors", | |
| ], | |
| }, | |
| } | |
| Preset_model_id: TypeAlias = Literal[ | |
| "HunyuanDiT", | |
| "stable-video-diffusion-img2vid-xt", | |
| "ExVideo-SVD-128f-v1", | |
| "ExVideo-CogVideoX-LoRA-129f-v1", | |
| "StableDiffusion_v15", | |
| "DreamShaper_8", | |
| "AingDiffusion_v12", | |
| "Flat2DAnimerge_v45Sharp", | |
| "TextualInversion_VeryBadImageNegative_v1.3", | |
| "StableDiffusionXL_v1", | |
| "BluePencilXL_v200", | |
| "StableDiffusionXL_Turbo", | |
| "ControlNet_v11f1p_sd15_depth", | |
| "ControlNet_v11p_sd15_softedge", | |
| "ControlNet_v11f1e_sd15_tile", | |
| "ControlNet_v11p_sd15_lineart", | |
| "AnimateDiff_v2", | |
| "AnimateDiff_xl_beta", | |
| "RIFE", | |
| "BeautifulPrompt", | |
| "opus-mt-zh-en", | |
| "IP-Adapter-SD", | |
| "IP-Adapter-SDXL", | |
| "StableDiffusion3", | |
| "StableDiffusion3_without_T5", | |
| "Kolors", | |
| "SDXL-vae-fp16-fix", | |
| "ControlNet_union_sdxl_promax", | |
| "FLUX.1-dev", | |
| "FLUX.1-schnell", | |
| "InstantX/FLUX.1-dev-Controlnet-Union-alpha", | |
| "jasperai/Flux.1-dev-Controlnet-Depth", | |
| "jasperai/Flux.1-dev-Controlnet-Surface-Normals", | |
| "jasperai/Flux.1-dev-Controlnet-Upscaler", | |
| "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Alpha", | |
| "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta", | |
| "Shakker-Labs/FLUX.1-dev-ControlNet-Depth", | |
| "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro", | |
| "InstantX/FLUX.1-dev-IP-Adapter", | |
| "InfiniteYou", | |
| "SDXL_lora_zyd232_ChineseInkStyle_SDXL_v1_0", | |
| "QwenPrompt", | |
| "OmostPrompt", | |
| "ESRGAN_x4", | |
| "RIFE", | |
| "OmniGen-v1", | |
| "CogVideoX-5B", | |
| "Annotators:Depth", | |
| "Annotators:Softedge", | |
| "Annotators:Lineart", | |
| "Annotators:Normal", | |
| "Annotators:Openpose", | |
| "StableDiffusion3.5-large", | |
| "StableDiffusion3.5-medium", | |
| "HunyuanVideo", | |
| "HunyuanVideo-fp8", | |
| "HunyuanVideoI2V", | |
| ] | |