Spaces:
Running
on
Zero
Running
on
Zero
| from collections import namedtuple | |
| from typing import List | |
| ModelInfo = namedtuple("ModelInfo", ["simple_name", "link", "description", "license", "organization", "type"]) | |
| model_info = {} | |
| def register_model_info( | |
| full_names: List[str], simple_name: str, link: str, description: str, | |
| license: str, organization: str, model_type: str | |
| ): | |
| info = ModelInfo(simple_name, link, description, license, organization, model_type) | |
| for full_name in full_names: | |
| model_info[full_name] = info | |
| model_info[full_name.split("_")[1]] = info | |
| model_info[simple_name] = info | |
| def get_model_info(name: str) -> ModelInfo: | |
| if name in model_info: | |
| return model_info[name] | |
| else: | |
| # To fix this, please use `register_model_info` to register your model | |
| return ModelInfo( | |
| name, "-", "Register the description at fastchat/model/model_registry.py", | |
| "-", "-", None | |
| ) | |
| def get_model_description_md(model_list): | |
| model_description_md = """ | |
| | | | | | |
| | ---- | ---- | ---- | | |
| """ | |
| ct = 0 | |
| visited = set() | |
| for i, name in enumerate(model_list): | |
| minfo = get_model_info(name) | |
| if minfo.simple_name in visited: | |
| continue | |
| visited.add(minfo.simple_name) | |
| one_model_md = f"[{minfo.simple_name}]({minfo.link}): {minfo.description}" | |
| if ct % 3 == 0: | |
| model_description_md += "|" | |
| model_description_md += f" {one_model_md} |" | |
| if ct % 3 == 2: | |
| model_description_md += "\n" | |
| ct += 1 | |
| return model_description_md | |
| # regist image generation models | |
| register_model_info( | |
| ["imagenhub_LCM_generation", "fal_LCM_text2image"], | |
| "LCM", | |
| "https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7", | |
| "Latent Consistency Models.", | |
| "MIT License", | |
| "Tsinghua University", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["fal_LCM(v1.5/XL)_text2image"], | |
| "LCM(v1.5/XL)", | |
| "https://fal.ai/models/fast-lcm-diffusion-turbo", | |
| "Latent Consistency Models (v1.5/XL)", | |
| "openrail++", | |
| "Latent Consistency", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_PlayGroundV2_generation", 'playground_PlayGroundV2_generation'], | |
| "PlayGround V2", | |
| "https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic", | |
| "Playground v2 – 1024px Aesthetic Model", | |
| "Playground v2 Community License", | |
| "Playground", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_PlayGroundV2.5_generation", 'playground_PlayGroundV2.5_generation'], | |
| "PlayGround V2.5", | |
| "https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic", | |
| "Playground v2.5 is the state-of-the-art open-source model in aesthetic quality", | |
| "Playground v2.5 Community License", | |
| "Playground", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_OpenJourney_generation"], | |
| "OpenJourney", | |
| "https://huggingface.co/prompthero/openjourney", | |
| "Openjourney is an open source Stable Diffusion fine tuned model on Midjourney images, by PromptHero.", | |
| "creativeml-openrail-m", | |
| "PromptHero", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_SDXLTurbo_generation", "fal_SDXLTurbo_text2image"], | |
| "SDXLTurbo", | |
| "https://huggingface.co/stabilityai/sdxl-turbo", | |
| "SDXL-Turbo is a fast generative text-to-image model.", | |
| "sai-nc-community (other)", | |
| "Stability AI", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_SDEdit_edition"], | |
| "SDEdit", | |
| "https://sde-image-editing.github.io", | |
| "SDEdit is an image synthesis and editing framework based on stochastic differential equations (SDEs) or diffusion models.", | |
| "MIT License", | |
| "Stanford University", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_SDXL_generation", "fal_SDXL_text2image"], | |
| "SDXL", | |
| "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0", | |
| "SDXL is a Latent Diffusion Model that uses two fixed, pretrained text encoders.", | |
| "openrail++", | |
| "Stability AI", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_SD3_generation"], | |
| "SD3", | |
| "https://huggingface.co/blog/sd3", | |
| "SD3 is a novel Multimodal Diffusion Transformer (MMDiT) model.", | |
| "stabilityai-nc-research-community", | |
| "Stability AI", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_PixArtAlpha_generation"], | |
| "PixArtAlpha", | |
| "https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS", | |
| "Pixart-α consists of pure transformer blocks for latent diffusion.", | |
| "openrail++", | |
| "PixArt-alpha", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_PixArtSigma_generation", "fal_PixArtSigma_text2image"], | |
| "PixArtSigma", | |
| "https://github.com/PixArt-alpha/PixArt-sigma", | |
| "Improved version of Pixart-α.", | |
| "openrail++", | |
| "PixArt-alpha", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_SDXLLightning_generation", "fal_SDXLLightning_text2image"], | |
| "SDXL-Lightning", | |
| "https://huggingface.co/ByteDance/SDXL-Lightning", | |
| "SDXL-Lightning is a lightning-fast text-to-image generation model.", | |
| "openrail++", | |
| "ByteDance", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_StableCascade_generation", "fal_StableCascade_text2image"], | |
| "StableCascade", | |
| "https://huggingface.co/stabilityai/stable-cascade", | |
| "StableCascade is built upon the Würstchen architecture and working at a much smaller latent space.", | |
| "stable-cascade-nc-community (other)", | |
| "Stability AI", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_HunyuanDiT_generation"], | |
| "HunyuanDiT", | |
| "https://github.com/Tencent/HunyuanDiT", | |
| "HunyuanDiT is a Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding", | |
| "tencent-hunyuan-community", | |
| "Tencent", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["imagenhub_Kolors_generation"], | |
| "Kolors", | |
| "https://huggingface.co/Kwai-Kolors/Kolors", | |
| "Kolors is a large-scale text-to-image generation model based on latent diffusion", | |
| "Apache-2.0", | |
| "Kwai Kolors", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["fal_AuraFlow_text2image"], | |
| "AuraFlow", | |
| "https://huggingface.co/fal/AuraFlow", | |
| "Opensourced flow-based text-to-image generation model.", | |
| "Apache-2.0", | |
| "Fal.AI", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["fal_FLUX1schnell_text2image"], | |
| "FLUX.1-schnell", | |
| "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux", | |
| "Flux is a series of text-to-image generation models based on diffusion transformers. Timestep-distilled version.", | |
| "Apache-2.0", | |
| "Black Forest Labs", | |
| "text2image_generation" | |
| ) | |
| register_model_info( | |
| ["fal_FLUX1dev_text2image"], | |
| "FLUX.1-dev", | |
| "https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux", | |
| "Flux is a series of text-to-image generation models based on diffusion transformers. Guidance-distilled version.", | |
| "flux-1-dev-non-commercial-license (other)", | |
| "Black Forest Labs", | |
| "text2image_generation" | |
| ) | |
| # regist image edition models | |
| register_model_info( | |
| ["imagenhub_CycleDiffusion_edition"], | |
| "CycleDiffusion", | |
| "https://github.com/ChenWu98/cycle-diffusion?tab=readme-ov-file", | |
| "A latent space for stochastic diffusion models.", | |
| "X11", | |
| "Carnegie Mellon University", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_Pix2PixZero_edition"], | |
| "Pix2PixZero", | |
| "https://pix2pixzero.github.io/", | |
| "A zero-shot Image-to-Image translation model.", | |
| "MIT License", | |
| "Carnegie Mellon University, Adobe Research", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_Prompt2prompt_edition"], | |
| "Prompt2prompt", | |
| "https://prompt-to-prompt.github.io/", | |
| "Image Editing with Cross-Attention Control.", | |
| "Apache-2.0", | |
| "Google, Tel Aviv University", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_InstructPix2Pix_edition"], | |
| "InstructPix2Pix", | |
| "https://www.timothybrooks.com/instruct-pix2pix", | |
| "An instruction-based image editing model.", | |
| "Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros", | |
| "University of California, Berkeley", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_MagicBrush_edition"], | |
| "MagicBrush", | |
| "https://osu-nlp-group.github.io/MagicBrush/", | |
| "Manually Annotated Dataset for Instruction-Guided Image Editing.", | |
| "CC-BY-4.0", | |
| "The Ohio State University, University of Waterloo", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_PNP_edition"], | |
| "PNP", | |
| "https://github.com/MichalGeyer/plug-and-play", | |
| "Plug-and-Play Diffusion Features for Text-Driven Image-to-Image Translation.", | |
| "-", | |
| "Weizmann Institute of Science", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_InfEdit_edition"], | |
| "InfEdit", | |
| "https://sled-group.github.io/InfEdit/", | |
| "Inversion-Free Image Editing with Natural Language.", | |
| "CC BY-NC-ND 4.0", | |
| "University of Michigan, University of California, Berkeley", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_CosXLEdit_edition"], | |
| "CosXLEdit", | |
| "https://huggingface.co/stabilityai/cosxl", | |
| "An instruction-based image editing model from SDXL.", | |
| "cosxl-nc-community", | |
| "Stability AI", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_UltraEdit_edition"], | |
| "UltraEdit", | |
| "https://ultra-editing.github.io/", | |
| "Instruction-based Fine-Grained Image Editing at Scale.", | |
| "other", | |
| "Peking University; BIGAI", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_AURORA_edition"], | |
| "AURORA", | |
| "https://aurora-editing.github.io/", | |
| "AURORA (Action Reasoning Object Attribute) enables training an instruction-guided image editing model that can perform action and reasoning-centric edits.", | |
| "MIT", | |
| "McGill NLP", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["imagenhub_FluxEdit_edition"], | |
| "FluxEdit", | |
| "https://github.com/sayakpaul/flux-image-editing", | |
| "Flux Control trained on OmniEdit dataset", | |
| "Apache 2.0", | |
| "HuggingFace", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["fal_stable-cascade_text2image"], | |
| "StableCascade", | |
| "https://fal.ai/models/stable-cascade/api", | |
| "StableCascade is a generative model that can generate high-quality images from text prompts.", | |
| "stable-cascade-nc-community (other)", | |
| "Stability AI", | |
| "image_edition" | |
| ) | |
| register_model_info( | |
| ["fal_AnimateDiff_text2video"], | |
| "AnimateDiff", | |
| "https://fal.ai/models/fast-animatediff-t2v", | |
| "AnimateDiff is a text-driven models that produce diverse and personalized animated images.", | |
| "creativeml-openrail-m", | |
| "The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["fal_StableVideoDiffusion_text2video"], | |
| "StableVideoDiffusion", | |
| "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api", | |
| "Stable Video Diffusion empowers individuals to transform text and image inputs into vivid scenes.", | |
| "SVD-nc-community", | |
| "Stability AI", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["fal_AnimateDiffTurbo_text2video"], | |
| "AnimateDiff Turbo", | |
| "https://fal.ai/models/fast-animatediff-t2v-turbo", | |
| "AnimateDiff Turbo is a lightning version of AnimateDiff.", | |
| "creativeml-openrail-m", | |
| "The Chinese University of Hong Kong, Shanghai AI Lab, Stanford University", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_VideoCrafter2_generation"], | |
| "VideoCrafter2", | |
| "https://ailab-cvc.github.io/videocrafter2/", | |
| "VideoCrafter2 is a T2V model that disentangling motion from appearance.", | |
| "Apache 2.0", | |
| "Tencent AI Lab", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_LaVie_generation"], | |
| "LaVie", | |
| "https://github.com/Vchitect/LaVie", | |
| "LaVie is a video generation model with cascaded latent diffusion models.", | |
| "Apache 2.0", | |
| "Shanghai AI Lab", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_ModelScope_generation"], | |
| "ModelScope", | |
| "https://arxiv.org/abs/2308.06571", | |
| "ModelScope is a a T2V synthesis model that evolves from a T2I synthesis model.", | |
| "cc-by-nc-4.0", | |
| "Alibaba Group", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_OpenSora_generation"], | |
| "OpenSora", | |
| "https://github.com/hpcaitech/Open-Sora", | |
| "A community-driven opensource implementation of Sora.", | |
| "Apache 2.0", | |
| "HPC-AI Tech", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_OpenSora12_generation"], | |
| "OpenSora v1.2", | |
| "https://github.com/hpcaitech/Open-Sora", | |
| "A community-driven opensource implementation of Sora. v1.2", | |
| "Apache 2.0", | |
| "HPC-AI Tech", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_CogVideoX-2B_generation"], | |
| "CogVideoX-2B", | |
| "https://github.com/THUDM/CogVideo", | |
| "Text-to-Video Diffusion Models with An Expert Transformer.", | |
| "CogVideoX LICENSE", | |
| "THUDM", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_PyramidFlow_text2video"], | |
| "Pyramid Flow", | |
| "https://pyramid-flow.github.io/", | |
| "Pyramidal Flow Matching for Efficient Video Generative Modeling.", | |
| "MIT LICENSE", | |
| "Peking University", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["fal_CogVideoX-5B_text2video"], | |
| "CogVideoX-5B", | |
| "https://github.com/THUDM/CogVideo", | |
| "Text-to-Video Diffusion Models with An Expert Transformer.", | |
| "CogVideoX LICENSE", | |
| "THUDM", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["fal_T2VTurbo_text2video"], | |
| "T2V-Turbo", | |
| "https://github.com/Ji4chenLi/t2v-turbo", | |
| "Video Consistency Model with Mixed Reward Feedback.", | |
| "cc-by-nc-4.0", | |
| "University of California, Santa Barbara", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_Allegro_text2video"], | |
| "Allegro", | |
| "https://github.com/rhymes-ai/Allegro", | |
| "DiT based Video Generation Model", | |
| "Apache 2.0", | |
| "rhymes-ai", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_LTXVideo_text2video"], | |
| "LTXVideo", | |
| "https://github.com/Lightricks/LTX-Video", | |
| "DiT based Video Generation Model", | |
| "Apache 2.0", | |
| "Lightricks", | |
| "text2video_generation" | |
| ) | |
| register_model_info( | |
| ["videogenhub_Mochi1_text2video"], | |
| "Mochi1", | |
| "https://github.com/genmoai/mochi", | |
| "Mochi 1 preview is an open state-of-the-art video generation model with high-fidelity motion and strong prompt adherence in preliminary evaluation.", | |
| "Apache 2.0", | |
| "Genmo AI", | |
| "text2video_generation" | |
| ) |