Spaces:
Running
on
Zero
Running
on
Zero
Merge branch 'main' of https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena
Browse files- README.md +1 -1
- app.py +1 -1
- arena_elo/video_generation_model_info.json +1 -1
- model/model_registry.py +8 -10
- model/models/__init__.py +1 -1
- model/models/fal_api_models.py +0 -18
- requirements.txt +1 -1
- serve/leaderboard.py +0 -39
README.md
CHANGED
|
@@ -7,7 +7,7 @@ sdk: gradio
|
|
| 7 |
sdk_version: 4.41.0
|
| 8 |
python_version: 3.12
|
| 9 |
app_file: app.py
|
| 10 |
-
pinned:
|
| 11 |
license: mit
|
| 12 |
tags:
|
| 13 |
- arena
|
|
|
|
| 7 |
sdk_version: 4.41.0
|
| 8 |
python_version: 3.12
|
| 9 |
app_file: app.py
|
| 10 |
+
pinned: true
|
| 11 |
license: mit
|
| 12 |
tags:
|
| 13 |
- arena
|
app.py
CHANGED
|
@@ -97,7 +97,7 @@ if __name__ == "__main__":
|
|
| 97 |
root_path = ROOT_PATH
|
| 98 |
elo_results_dir = ELO_RESULTS_DIR
|
| 99 |
models = ModelManager(enable_nsfw=False, do_pre_download=True, do_debug_packages=True)
|
| 100 |
-
# models = ModelManager(enable_nsfw=False,
|
| 101 |
|
| 102 |
elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
|
| 103 |
demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
|
|
|
|
| 97 |
root_path = ROOT_PATH
|
| 98 |
elo_results_dir = ELO_RESULTS_DIR
|
| 99 |
models = ModelManager(enable_nsfw=False, do_pre_download=True, do_debug_packages=True)
|
| 100 |
+
# models = ModelManager(enable_nsfw=False, do_pre_download=False, do_debug_packages=False)
|
| 101 |
|
| 102 |
elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
|
| 103 |
demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
|
arena_elo/video_generation_model_info.json
CHANGED
|
@@ -31,7 +31,7 @@
|
|
| 31 |
},
|
| 32 |
"StableVideoDiffusion": {
|
| 33 |
"Link": "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api",
|
| 34 |
-
"License": "
|
| 35 |
"Organization": "Stability AI"
|
| 36 |
},
|
| 37 |
"T2VTurbo": {
|
|
|
|
| 31 |
},
|
| 32 |
"StableVideoDiffusion": {
|
| 33 |
"Link": "https://fal.ai/models/fal-ai/fast-svd/text-to-video/api",
|
| 34 |
+
"License": "SVD-nc-community",
|
| 35 |
"Organization": "Stability AI"
|
| 36 |
},
|
| 37 |
"T2VTurbo": {
|
model/model_registry.py
CHANGED
|
@@ -258,15 +258,6 @@ register_model_info(
|
|
| 258 |
"AnimateDiff Turbo is a lightning version of AnimateDiff.",
|
| 259 |
)
|
| 260 |
|
| 261 |
-
"""
|
| 262 |
-
register_model_info(
|
| 263 |
-
["videogenhub_LaVie_generation"],
|
| 264 |
-
"LaVie",
|
| 265 |
-
"https://github.com/Vchitect/LaVie",
|
| 266 |
-
"LaVie is a video generation model with cascaded latent diffusion models.",
|
| 267 |
-
)
|
| 268 |
-
|
| 269 |
-
|
| 270 |
register_model_info(
|
| 271 |
["videogenhub_VideoCrafter2_generation"],
|
| 272 |
"VideoCrafter2",
|
|
@@ -274,6 +265,13 @@ register_model_info(
|
|
| 274 |
"VideoCrafter2 is a T2V model that disentangling motion from appearance.",
|
| 275 |
)
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
register_model_info(
|
| 278 |
["videogenhub_ModelScope_generation"],
|
| 279 |
"ModelScope",
|
|
@@ -303,7 +301,7 @@ register_model_info(
|
|
| 303 |
)
|
| 304 |
|
| 305 |
register_model_info(
|
| 306 |
-
["
|
| 307 |
"T2V-Turbo",
|
| 308 |
"https://github.com/Ji4chenLi/t2v-turbo",
|
| 309 |
"Video Consistency Model with Mixed Reward Feedback.",
|
|
|
|
| 258 |
"AnimateDiff Turbo is a lightning version of AnimateDiff.",
|
| 259 |
)
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
register_model_info(
|
| 262 |
["videogenhub_VideoCrafter2_generation"],
|
| 263 |
"VideoCrafter2",
|
|
|
|
| 265 |
"VideoCrafter2 is a T2V model that disentangling motion from appearance.",
|
| 266 |
)
|
| 267 |
|
| 268 |
+
"""
|
| 269 |
+
register_model_info(
|
| 270 |
+
["videogenhub_LaVie_generation"],
|
| 271 |
+
"LaVie",
|
| 272 |
+
"https://github.com/Vchitect/LaVie",
|
| 273 |
+
"LaVie is a video generation model with cascaded latent diffusion models.",
|
| 274 |
+
)
|
| 275 |
register_model_info(
|
| 276 |
["videogenhub_ModelScope_generation"],
|
| 277 |
"ModelScope",
|
|
|
|
| 301 |
)
|
| 302 |
|
| 303 |
register_model_info(
|
| 304 |
+
["fal_T2VTurbo_text2video"],
|
| 305 |
"T2V-Turbo",
|
| 306 |
"https://github.com/Ji4chenLi/t2v-turbo",
|
| 307 |
"Video Consistency Model with Mixed Reward Feedback.",
|
model/models/__init__.py
CHANGED
|
@@ -18,7 +18,7 @@ IMAGE_EDITION_MODELS = ['imagenhub_CycleDiffusion_edition', 'imagenhub_Pix2PixZe
|
|
| 18 |
VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
|
| 19 |
'fal_AnimateDiffTurbo_text2video',
|
| 20 |
#'videogenhub_LaVie_generation',
|
| 21 |
-
|
| 22 |
#'videogenhub_ModelScope_generation',
|
| 23 |
'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
|
| 24 |
#'videogenhub_OpenSora_generation',
|
|
|
|
| 18 |
VIDEO_GENERATION_MODELS = ['fal_AnimateDiff_text2video',
|
| 19 |
'fal_AnimateDiffTurbo_text2video',
|
| 20 |
#'videogenhub_LaVie_generation',
|
| 21 |
+
'videogenhub_VideoCrafter2_generation',
|
| 22 |
#'videogenhub_ModelScope_generation',
|
| 23 |
'videogenhub_CogVideoX_generation', 'videogenhub_OpenSora12_generation',
|
| 24 |
#'videogenhub_OpenSora_generation',
|
model/models/fal_api_models.py
CHANGED
|
@@ -54,24 +54,6 @@ class FalModel():
|
|
| 54 |
return result
|
| 55 |
elif self.model_type == "image2image":
|
| 56 |
raise NotImplementedError("image2image model is not implemented yet")
|
| 57 |
-
# assert "image" in kwargs or "image_url" in kwargs, "image or image_url is required for image2image model"
|
| 58 |
-
# if "image" in kwargs:
|
| 59 |
-
# image_url = None
|
| 60 |
-
# pass
|
| 61 |
-
# handler = fal_client.submit(
|
| 62 |
-
# f"fal-ai/{self.model_name}",
|
| 63 |
-
# arguments={
|
| 64 |
-
# "image_url": image_url
|
| 65 |
-
# },
|
| 66 |
-
# )
|
| 67 |
-
#
|
| 68 |
-
# for event in handler.iter_events():
|
| 69 |
-
# if isinstance(event, fal_client.InProgress):
|
| 70 |
-
# print('Request in progress')
|
| 71 |
-
# print(event.logs)
|
| 72 |
-
#
|
| 73 |
-
# result = handler.get()
|
| 74 |
-
# return result
|
| 75 |
elif self.model_type == "text2video":
|
| 76 |
assert "prompt" in kwargs, "prompt is required for text2video model"
|
| 77 |
if self.model_name == 'AnimateDiff':
|
|
|
|
| 54 |
return result
|
| 55 |
elif self.model_type == "image2image":
|
| 56 |
raise NotImplementedError("image2image model is not implemented yet")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
elif self.model_type == "text2video":
|
| 58 |
assert "prompt" in kwargs, "prompt is required for text2video model"
|
| 59 |
if self.model_name == 'AnimateDiff':
|
requirements.txt
CHANGED
|
@@ -68,4 +68,4 @@ tensorboard
|
|
| 68 |
timm
|
| 69 |
wandb
|
| 70 |
pandarallel
|
| 71 |
-
kaleido
|
|
|
|
| 68 |
timm
|
| 69 |
wandb
|
| 70 |
pandarallel
|
| 71 |
+
kaleido
|
serve/leaderboard.py
CHANGED
|
@@ -22,20 +22,6 @@ basic_component_values = [None] * 6
|
|
| 22 |
leader_component_values = [None] * 5
|
| 23 |
|
| 24 |
|
| 25 |
-
# def make_leaderboard_md(elo_results):
|
| 26 |
-
# leaderboard_md = f"""
|
| 27 |
-
# # π Chatbot Arena Leaderboard
|
| 28 |
-
# | [Blog](https://lmsys.org/blog/2023-05-03-arena/) | [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) | [Twitter](https://twitter.com/lmsysorg) | [Discord](https://discord.gg/HSWAKCrnFx) |
|
| 29 |
-
|
| 30 |
-
# This leaderboard is based on the following three benchmarks.
|
| 31 |
-
# - [Chatbot Arena](https://lmsys.org/blog/2023-05-03-arena/) - a crowdsourced, randomized battle platform. We use 100K+ user votes to compute Elo ratings.
|
| 32 |
-
# - [MT-Bench](https://arxiv.org/abs/2306.05685) - a set of challenging multi-turn questions. We use GPT-4 to grade the model responses.
|
| 33 |
-
# - [MMLU](https://arxiv.org/abs/2009.03300) (5-shot) - a test to measure a model's multitask accuracy on 57 tasks.
|
| 34 |
-
|
| 35 |
-
# π» Code: The Arena Elo ratings are computed by this [notebook]({notebook_url}). The MT-bench scores (single-answer grading on a scale of 10) are computed by [fastchat.llm_judge](https://github.com/lm-sys/FastChat/tree/main/fastchat/llm_judge). The MMLU scores are mostly computed by [InstructEval](https://github.com/declare-lab/instruct-eval). Higher values are better for all benchmarks. Empty cells mean not available. Last updated: November, 2023.
|
| 36 |
-
# """
|
| 37 |
-
# return leaderboard_md
|
| 38 |
-
|
| 39 |
def make_leaderboard_md(elo_results):
|
| 40 |
leaderboard_md = f"""
|
| 41 |
# π GenAI-Arena Leaderboard
|
|
@@ -324,31 +310,6 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Tr
|
|
| 324 |
|
| 325 |
leader_component_values[:] = [md, p1, p2, p3, p4]
|
| 326 |
|
| 327 |
-
"""
|
| 328 |
-
with gr.Row():
|
| 329 |
-
with gr.Column():
|
| 330 |
-
gr.Markdown(
|
| 331 |
-
"#### Figure 1: Fraction of Model A Wins for All Non-tied A vs. B Battles"
|
| 332 |
-
)
|
| 333 |
-
plot_1 = gr.Plot(p1, show_label=False)
|
| 334 |
-
with gr.Column():
|
| 335 |
-
gr.Markdown(
|
| 336 |
-
"#### Figure 2: Battle Count for Each Combination of Models (without Ties)"
|
| 337 |
-
)
|
| 338 |
-
plot_2 = gr.Plot(p2, show_label=False)
|
| 339 |
-
with gr.Row():
|
| 340 |
-
with gr.Column():
|
| 341 |
-
gr.Markdown(
|
| 342 |
-
"#### Figure 3: Bootstrap of Elo Estimates (1000 Rounds of Random Sampling)"
|
| 343 |
-
)
|
| 344 |
-
plot_3 = gr.Plot(p3, show_label=False)
|
| 345 |
-
with gr.Column():
|
| 346 |
-
gr.Markdown(
|
| 347 |
-
"#### Figure 4: Average Win Rate Against All Other Models (Assuming Uniform Sampling and No Ties)"
|
| 348 |
-
)
|
| 349 |
-
plot_4 = gr.Plot(p4, show_label=False)
|
| 350 |
-
"""
|
| 351 |
-
|
| 352 |
from .utils import acknowledgment_md
|
| 353 |
|
| 354 |
gr.Markdown(acknowledgment_md)
|
|
|
|
| 22 |
leader_component_values = [None] * 5
|
| 23 |
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def make_leaderboard_md(elo_results):
|
| 26 |
leaderboard_md = f"""
|
| 27 |
# π GenAI-Arena Leaderboard
|
|
|
|
| 310 |
|
| 311 |
leader_component_values[:] = [md, p1, p2, p3, p4]
|
| 312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 313 |
from .utils import acknowledgment_md
|
| 314 |
|
| 315 |
gr.Markdown(acknowledgment_md)
|