Spaces:
Runtime error
Runtime error
root
commited on
Commit
·
58afb94
1
Parent(s):
2f4f14b
vbench2 filter
Browse files- app.py +73 -29
- constants.py +7 -2
app.py
CHANGED
|
@@ -18,6 +18,12 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
| 18 |
global data_component, filter_component
|
| 19 |
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def upload_file(files):
|
| 22 |
file_paths = [file.name for file in files]
|
| 23 |
return file_paths
|
|
@@ -384,12 +390,6 @@ def get_final_score_quality(df, selected_columns):
|
|
| 384 |
return df
|
| 385 |
|
| 386 |
def get_final_score2(df, selected_columns):
|
| 387 |
-
category_to_dimension = {}
|
| 388 |
-
|
| 389 |
-
for key, value in VBENCH2_DIM2CAT.items():
|
| 390 |
-
if value not in category_to_dimension:
|
| 391 |
-
category_to_dimension[value] = []
|
| 392 |
-
category_to_dimension[value].append(key)
|
| 393 |
score_names = []
|
| 394 |
for cur_score in category_to_dimension:
|
| 395 |
score_name = f"{cur_score} Score"
|
|
@@ -497,11 +497,11 @@ def get_all_df_long(selected_columns, dir=LONG_DIR):
|
|
| 497 |
df = df.sort_values(by="Selected Score", ascending=False)
|
| 498 |
return df
|
| 499 |
|
| 500 |
-
def get_all_df2(dir=VBENCH2_DIR):
|
| 501 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 502 |
submission_repo.git_pull()
|
| 503 |
df = pd.read_csv(dir)
|
| 504 |
-
df = get_final_score2(df,
|
| 505 |
df = df.sort_values(by="Total Score", ascending=False)
|
| 506 |
return df
|
| 507 |
|
|
@@ -621,13 +621,13 @@ def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample
|
|
| 621 |
visible=True,
|
| 622 |
)
|
| 623 |
return filter_component#.value
|
|
|
|
| 624 |
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
updated_data = get_all_df2(VBENCH2_DIR)
|
| 628 |
print(updated_data)
|
| 629 |
if vbench_team_sample:
|
| 630 |
-
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
| 631 |
if vbench_team_eval:
|
| 632 |
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
| 633 |
|
|
@@ -642,7 +642,34 @@ def on_filter_model_size_method_change_2(vbench_team_sample, vbench_team_eval=Fa
|
|
| 642 |
interactive=False,
|
| 643 |
visible=True,
|
| 644 |
)
|
| 645 |
-
return filter_component
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 646 |
|
| 647 |
block = gr.Blocks()
|
| 648 |
|
|
@@ -699,7 +726,7 @@ with block:
|
|
| 699 |
datatype=DATA_TITILE_TYPE,
|
| 700 |
interactive=False,
|
| 701 |
visible=True,
|
| 702 |
-
height=700,
|
| 703 |
)
|
| 704 |
|
| 705 |
choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
|
@@ -724,18 +751,29 @@ with block:
|
|
| 724 |
TABLE_INTRODUCTION
|
| 725 |
)
|
| 726 |
with gr.Row():
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
value=False,
|
| 731 |
-
interactive=True
|
| 732 |
-
)
|
| 733 |
-
vbench_validate_filter_2 = gr.Checkbox(
|
| 734 |
-
label="Evaluated by VBench Team (Uncheck to view all submissions)",
|
| 735 |
-
value=True,
|
| 736 |
interactive=True
|
| 737 |
)
|
| 738 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 739 |
|
| 740 |
data_component_2 = gr.components.Dataframe(
|
| 741 |
value=get_baseline_df_2,
|
|
@@ -744,11 +782,17 @@ with block:
|
|
| 744 |
datatype=VBENCH2_TITLE_TYPE,
|
| 745 |
interactive=False,
|
| 746 |
visible=True,
|
| 747 |
-
height=700,
|
| 748 |
)
|
| 749 |
-
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 750 |
-
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 751 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 752 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
| 753 |
with gr.Accordion("INSTRUCTION", open=False):
|
| 754 |
citation_button = gr.Textbox(
|
|
@@ -868,7 +912,7 @@ with block:
|
|
| 868 |
datatype=DATA_TITILE_TYPE,
|
| 869 |
interactive=False,
|
| 870 |
visible=True,
|
| 871 |
-
height=700,
|
| 872 |
)
|
| 873 |
|
| 874 |
choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
|
|
|
|
| 18 |
global data_component, filter_component
|
| 19 |
|
| 20 |
|
| 21 |
+
category_to_dimension = {}
|
| 22 |
+
for key, value in VBENCH2_DIM2CAT.items():
|
| 23 |
+
if value not in category_to_dimension:
|
| 24 |
+
category_to_dimension[value] = []
|
| 25 |
+
category_to_dimension[value].append(key)
|
| 26 |
+
|
| 27 |
def upload_file(files):
|
| 28 |
file_paths = [file.name for file in files]
|
| 29 |
return file_paths
|
|
|
|
| 390 |
return df
|
| 391 |
|
| 392 |
def get_final_score2(df, selected_columns):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
score_names = []
|
| 394 |
for cur_score in category_to_dimension:
|
| 395 |
score_name = f"{cur_score} Score"
|
|
|
|
| 497 |
df = df.sort_values(by="Selected Score", ascending=False)
|
| 498 |
return df
|
| 499 |
|
| 500 |
+
def get_all_df2(selected_columns, dir=VBENCH2_DIR):
|
| 501 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 502 |
submission_repo.git_pull()
|
| 503 |
df = pd.read_csv(dir)
|
| 504 |
+
df = get_final_score2(df, selected_columns)
|
| 505 |
df = df.sort_values(by="Total Score", ascending=False)
|
| 506 |
return df
|
| 507 |
|
|
|
|
| 621 |
visible=True,
|
| 622 |
)
|
| 623 |
return filter_component#.value
|
| 624 |
+
|
| 625 |
|
| 626 |
+
def on_filter_model_size_method_change_2(selected_columns=TASK_INFO_2, vbench_team_sample=False, vbench_team_eval=False):
|
| 627 |
+
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
|
|
|
| 628 |
print(updated_data)
|
| 629 |
if vbench_team_sample:
|
| 630 |
+
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
| 631 |
if vbench_team_eval:
|
| 632 |
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
| 633 |
|
|
|
|
| 642 |
interactive=False,
|
| 643 |
visible=True,
|
| 644 |
)
|
| 645 |
+
return filter_component
|
| 646 |
+
|
| 647 |
+
def on_filter_model_size_method_score_change_2(select_score, vbench_team_sample=False, vbench_team_eval=False):
|
| 648 |
+
selected_columns = category_to_dimension[select_score]
|
| 649 |
+
print(select_score,"===>",selected_columns)
|
| 650 |
+
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
| 651 |
+
print(updated_data)
|
| 652 |
+
if vbench_team_sample:
|
| 653 |
+
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
| 654 |
+
if vbench_team_eval:
|
| 655 |
+
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
| 656 |
+
|
| 657 |
+
present_columns = VBENCH2_MODEL_INFO_DEFAULT + [f"{select_score} Score"] + selected_columns
|
| 658 |
+
updated_headers = present_columns
|
| 659 |
+
updated_data = updated_data[present_columns]
|
| 660 |
+
updated_data = updated_data.sort_values(by=f"{select_score} Score", ascending=False)
|
| 661 |
+
updated_data = convert_scores_to_percentage(updated_data)
|
| 662 |
+
update_datatype = [VBENCH2_TITLE_TYPE[COLUMN_NAMES_2.index(x)] for x in updated_headers]
|
| 663 |
+
print(updated_data)
|
| 664 |
+
filter_component = gr.components.Dataframe(
|
| 665 |
+
value=updated_data,
|
| 666 |
+
headers=updated_headers,
|
| 667 |
+
type="pandas",
|
| 668 |
+
datatype=update_datatype,
|
| 669 |
+
interactive=False,
|
| 670 |
+
visible=True,
|
| 671 |
+
)
|
| 672 |
+
return filter_component, gr.update(value=selected_columns)
|
| 673 |
|
| 674 |
block = gr.Blocks()
|
| 675 |
|
|
|
|
| 726 |
datatype=DATA_TITILE_TYPE,
|
| 727 |
interactive=False,
|
| 728 |
visible=True,
|
| 729 |
+
# height=700,
|
| 730 |
)
|
| 731 |
|
| 732 |
choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
|
|
|
| 751 |
TABLE_INTRODUCTION
|
| 752 |
)
|
| 753 |
with gr.Row():
|
| 754 |
+
vbench_team_filter_2 = gr.Checkbox(
|
| 755 |
+
label="Sampled by VBench Team (Uncheck to view all submissions)",
|
| 756 |
+
value=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 757 |
interactive=True
|
| 758 |
)
|
| 759 |
+
vbench_validate_filter_2 = gr.Checkbox(
|
| 760 |
+
label="Evaluated by VBench Team (Uncheck to view all submissions)",
|
| 761 |
+
value=True,
|
| 762 |
+
interactive=True
|
| 763 |
+
)
|
| 764 |
+
with gr.Row():
|
| 765 |
+
vbench2_creativity_button = gr.Button("Show Creativity Score")
|
| 766 |
+
vbench2_commonsense_button = gr.Button("Show Commonsense Score")
|
| 767 |
+
vbench2_control_button = gr.Button("Show Controllability Score")
|
| 768 |
+
vbench2_human_button = gr.Button("Show Human Fidelity Score")
|
| 769 |
+
vbench2_physics_button = gr.Button("Show Physics Score")
|
| 770 |
+
with gr.Row():
|
| 771 |
+
vbench2_checkgroup = gr.CheckboxGroup(
|
| 772 |
+
choices=TASK_INFO_2,
|
| 773 |
+
value=TASK_INFO_2,
|
| 774 |
+
label="Evaluation Dimension",
|
| 775 |
+
interactive=True,
|
| 776 |
+
)
|
| 777 |
|
| 778 |
data_component_2 = gr.components.Dataframe(
|
| 779 |
value=get_baseline_df_2,
|
|
|
|
| 782 |
datatype=VBENCH2_TITLE_TYPE,
|
| 783 |
interactive=False,
|
| 784 |
visible=True,
|
| 785 |
+
# height=700,
|
| 786 |
)
|
| 787 |
+
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 788 |
+
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 789 |
+
# vbench2_checkgroup.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 790 |
+
vbench2_creativity_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Creativity"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
| 791 |
+
vbench2_commonsense_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Commonsense"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
| 792 |
+
vbench2_control_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Controllability"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
| 793 |
+
vbench2_human_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Human Fidelity"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
| 794 |
+
vbench2_physics_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Physics"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
| 795 |
+
|
| 796 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
| 797 |
with gr.Accordion("INSTRUCTION", open=False):
|
| 798 |
citation_button = gr.Textbox(
|
|
|
|
| 912 |
datatype=DATA_TITILE_TYPE,
|
| 913 |
interactive=False,
|
| 914 |
visible=True,
|
| 915 |
+
# height=700,
|
| 916 |
)
|
| 917 |
|
| 918 |
choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
|
constants.py
CHANGED
|
@@ -50,13 +50,16 @@ TASK_INFO = [
|
|
| 50 |
]
|
| 51 |
|
| 52 |
|
| 53 |
-
|
| 54 |
"Model (alphabetical order)",
|
| 55 |
"Sampled by",
|
| 56 |
"Evaluated by",
|
| 57 |
"Accessibility",
|
| 58 |
"Date",
|
| 59 |
-
"Total Score"
|
|
|
|
|
|
|
|
|
|
| 60 |
'Creativity Score',
|
| 61 |
'Commonsense Score',
|
| 62 |
'Controllability Score',
|
|
@@ -64,6 +67,8 @@ MODEL_INFO_2 = [
|
|
| 64 |
'Physics Score'
|
| 65 |
]
|
| 66 |
|
|
|
|
|
|
|
| 67 |
TASK_INFO_2 = [
|
| 68 |
"Human Anatomy",
|
| 69 |
"Human Clothes",
|
|
|
|
| 50 |
]
|
| 51 |
|
| 52 |
|
| 53 |
+
VBENCH2_MODEL_INFO_DEFAULT = [
|
| 54 |
"Model (alphabetical order)",
|
| 55 |
"Sampled by",
|
| 56 |
"Evaluated by",
|
| 57 |
"Accessibility",
|
| 58 |
"Date",
|
| 59 |
+
"Total Score"
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
VBENCH2_DIM_DEFAULT_INFO = [
|
| 63 |
'Creativity Score',
|
| 64 |
'Commonsense Score',
|
| 65 |
'Controllability Score',
|
|
|
|
| 67 |
'Physics Score'
|
| 68 |
]
|
| 69 |
|
| 70 |
+
MODEL_INFO_2 = VBENCH2_MODEL_INFO_DEFAULT + VBENCH2_DIM_DEFAULT_INFO
|
| 71 |
+
|
| 72 |
TASK_INFO_2 = [
|
| 73 |
"Human Anatomy",
|
| 74 |
"Human Clothes",
|