| import abc | |
| import gradio as gr | |
| from loguru import logger | |
| import pandas as pd | |
| from collections import defaultdict | |
| from judgerbench.preprocess.gen_table import ( | |
| format_timestamp, | |
| generate_table, | |
| build_l1_df, | |
| # build_l2_df, | |
| ) | |
| from judgerbench.meta_data import ( | |
| LEADERBORAD_INTRODUCTION, | |
| LEADERBOARD_MD, | |
| LEADERBOARD_FILE_MAPPING, | |
| MAIN_FIELDS, | |
| DEFAULT_BENCH, | |
| STYLE_CLASS_MAPPING, | |
| CITATION_BUTTON_TEXT, | |
| CITATION_BUTTON_LABEL, | |
| ) | |
| def refresh_dataframe(required_fields): | |
| df = generate_table(MAIN_FIELDS) | |
| comp = gr.DataFrame( | |
| value=df, | |
| type='pandas', | |
| interactive=False, | |
| visible=True | |
| ) | |
| return comp | |
| with gr.Blocks() as demo: | |
| # struct = load_results() | |
| # timestamp = struct['time'] | |
| # EVAL_TIME = format_timestamp(timestamp) | |
| EVAL_TIME = '20241022' | |
| # results = struct['results'] | |
| # N_MODEL = len(results) | |
| # N_DATA = len(results['LLaVA-v1.5-7B']) - 1 | |
| N_MODEL = 10 | |
| N_DATA = 100 | |
| # DATASETS = list(results['LLaVA-v1.5-7B']) | |
| # DATASETS.remove('META') | |
| # print(DATASETS) | |
| gr.Markdown(LEADERBORAD_INTRODUCTION.format( | |
| # N_MODEL, | |
| # N_DATA, | |
| EVAL_TIME | |
| )) | |
| # structs = [abc.abstractproperty() for _ in range(N_DATA)] | |
| with gr.Tabs(elem_classes='tab-buttons') as tabs: | |
| for cur_id, (filename, filepath) in enumerate(LEADERBOARD_FILE_MAPPING.items()): | |
| tab_name = filename | |
| # if filename == "overall": | |
| # tab_name = '🏅 JudgerBench Main Leaderboard' | |
| with gr.Tab(tab_name.upper(), elem_id=f'tab_{cur_id}', id=cur_id): | |
| # gr.Markdown(LEADERBOARD_MD['MAIN']) | |
| # _, check_box = build_l1_df(MAIN_FIELDS) | |
| table = generate_table(filename=filename) | |
| # type_map = check_box['type_map'] | |
| type_map = defaultdict(lambda: 'number') | |
| type_map['Model'] = 'str' | |
| type_map['Class'] = 'str' | |
| type_map['Rank'] = 'number' | |
| # required_fields = gr.State( | |
| # check_box['essential'] | |
| # # + ["Average"] | |
| # ) | |
| # checkbox_group = gr.CheckboxGroup( | |
| # choices=[item for item in check_box['all'] if item not in required_fields.value], | |
| # value=[item for item in check_box['default'] if item not in required_fields.value], | |
| # label='Evaluation Metrics', | |
| # interactive=True, | |
| # ) | |
| # headers = ( | |
| # ['Rank'] + | |
| # required_fields.value + | |
| # [item for item in check_box['all'] if item not in required_fields.value] | |
| # # checkbox_group.value | |
| # ) | |
| table['Rank'] = list(range(1, len(table) + 1)) | |
| # Rearrange columns | |
| if "Class" in table.columns: | |
| starting_columns = ["Rank", "Models", "Class"] | |
| else: | |
| starting_columns = ["Rank", "Models"] | |
| table = table[starting_columns + [ col for col in table.columns if col not in starting_columns ]] | |
| headers = ( | |
| # ['Rank'] + | |
| list(table.columns) | |
| ) | |
| if "Class" in table.columns: | |
| def cell_styler(v): | |
| df = v.copy() | |
| class_var = df[['Class']].copy() | |
| df.loc[:, :] = '' | |
| df[['Class']] = class_var.map(lambda x: f"background-color: {STYLE_CLASS_MAPPING[x]}") | |
| logger.info(df['Class']) | |
| return df | |
| table_styler = ( | |
| table.style.apply(cell_styler, axis=None) | |
| .format(precision=1) | |
| ) | |
| else: | |
| table_styler = table.style.format(precision=1) | |
| # with gr.Row(): | |
| # model_size = gr.CheckboxGroup( | |
| # choices=MODEL_SIZE, | |
| # value=MODEL_SIZE, | |
| # label='Model Size', | |
| # interactive=True | |
| # ) | |
| # model_type = gr.CheckboxGroup( | |
| # choices=MODEL_TYPE, | |
| # value=MODEL_TYPE, | |
| # label='Model Type', | |
| # interactive=True | |
| # ) | |
| data_component = gr.DataFrame( | |
| value=table_styler, | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| visible=True | |
| ) | |
| def filter_df( | |
| required_fields, | |
| fields, | |
| # model_size, | |
| # model_type | |
| ): | |
| # filter_list = ['Avg Score', 'Avg Rank', 'OpenSource', 'Verified'] | |
| headers = ['Rank'] + required_fields + fields | |
| # new_fields = [field for field in fields if field not in filter_list] | |
| df = generate_table(fields) | |
| logger.info(f"{df.columns=}") | |
| # df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']] | |
| # df = df[df['flag']] | |
| # df.pop('flag') | |
| # if len(df): | |
| # df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))] | |
| # df = df[df['flag']] | |
| # df.pop('flag') | |
| df['Rank'] = list(range(1, len(df) + 1)) | |
| comp = gr.DataFrame( | |
| value=df[headers], | |
| type='pandas', | |
| datatype=[type_map[x] for x in headers], | |
| interactive=False, | |
| visible=True | |
| ) | |
| return comp | |
| # for cbox in [ | |
| # # checkbox_group, | |
| # # model_size, | |
| # # model_type | |
| # ]: | |
| # cbox.change( | |
| # fn=refresh_dataframe, | |
| # inputs=[required_fields], | |
| # outputs=data_component | |
| # ).then( | |
| # fn=filter_df, | |
| # inputs=[ | |
| # required_fields, | |
| # checkbox_group, | |
| # # model_size, | |
| # # model_type | |
| # ], | |
| # outputs=data_component | |
| # ) | |
| # with gr.Tab('🔍 About', elem_id='about', id=1): | |
| # gr.Markdown(urlopen(VLMEVALKIT_README).read().decode()) | |
| # for i, dataset in enumerate(DATASETS): | |
| # with gr.Tab(f'📊 {dataset} Leaderboard', elem_id=dataset, id=i + 2): | |
| # if dataset in LEADERBOARD_MD: | |
| # gr.Markdown(LEADERBOARD_MD[dataset]) | |
| # s = structs[i] | |
| # s.table, s.check_box = build_l2_df(results, dataset) | |
| # s.type_map = s.check_box['type_map'] | |
| # s.type_map['Rank'] = 'number' | |
| # s.checkbox_group = gr.CheckboxGroup( | |
| # choices=s.check_box['all'], | |
| # value=s.check_box['required'], | |
| # label=f'{dataset} CheckBoxes', | |
| # interactive=True, | |
| # ) | |
| # s.headers = ['Rank'] + s.check_box['essential'] + s.checkbox_group.value | |
| # s.table['Rank'] = list(range(1, len(s.table) + 1)) | |
| # with gr.Row(): | |
| # s.model_size = gr.CheckboxGroup( | |
| # choices=MODEL_SIZE, | |
| # value=MODEL_SIZE, | |
| # label='Model Size', | |
| # interactive=True | |
| # ) | |
| # s.model_type = gr.CheckboxGroup( | |
| # choices=MODEL_TYPE, | |
| # value=MODEL_TYPE, | |
| # label='Model Type', | |
| # interactive=True | |
| # ) | |
| # s.data_component = gr.components.DataFrame( | |
| # value=s.table[s.headers], | |
| # type='pandas', | |
| # datatype=[s.type_map[x] for x in s.headers], | |
| # interactive=False, | |
| # visible=True) | |
| # s.dataset = gr.Textbox(value=dataset, label=dataset, visible=False) | |
| # def filter_df_l2(dataset_name, fields, model_size, model_type): | |
| # s = structs[DATASETS.index(dataset_name)] | |
| # headers = ['Rank'] + s.check_box['essential'] + fields | |
| # df = cp.deepcopy(s.table) | |
| # df['flag'] = [model_size_flag(x, model_size) for x in df['Param (B)']] | |
| # df = df[df['flag']] | |
| # df.pop('flag') | |
| # if len(df): | |
| # df['flag'] = [model_type_flag(df.iloc[i], model_type) for i in range(len(df))] | |
| # df = df[df['flag']] | |
| # df.pop('flag') | |
| # df['Rank'] = list(range(1, len(df) + 1)) | |
| # comp = gr.components.DataFrame( | |
| # value=df[headers], | |
| # type='pandas', | |
| # datatype=[s.type_map[x] for x in headers], | |
| # interactive=False, | |
| # visible=True) | |
| # return comp | |
| # for cbox in [s.checkbox_group, s.model_size, s.model_type]: | |
| # cbox.change( | |
| # fn=filter_df_l2, | |
| # inputs=[s.dataset, s.checkbox_group, s.model_size, s.model_type], | |
| # outputs=s.data_component) | |
| with gr.Row(): | |
| with gr.Accordion('Citation', open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id='citation-button', | |
| lines=7, | |
| ) | |
| if __name__ == '__main__': | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--host", type=str, default="0.0.0.0") | |
| parser.add_argument("--port", type=int, default="7860") | |
| parser.add_argument( | |
| "--share", | |
| action="store_true", | |
| help="Whether to generate a public, shareable link", | |
| ) | |
| parser.add_argument( | |
| "--concurrency-count", | |
| type=int, | |
| default=10, | |
| help="The concurrency count of the gradio queue", | |
| ) | |
| parser.add_argument( | |
| "--max-threads", | |
| type=int, | |
| default=200, | |
| help="The maximum number of threads available to process non-async functions.", | |
| ) | |
| # parser.add_argument( | |
| # "--gradio-auth-path", | |
| # type=str, | |
| # help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"', | |
| # default=None, | |
| # ) | |
| parser.add_argument( | |
| "--gradio-root-path", | |
| type=str, | |
| help="Sets the gradio root path, eg /abc/def. Useful when running behind a reverse-proxy or at a custom URL path prefix", | |
| ) | |
| parser.add_argument( | |
| "--ga-id", | |
| type=str, | |
| help="the Google Analytics ID", | |
| default=None, | |
| ) | |
| parser.add_argument( | |
| "--use-remote-storage", | |
| action="store_true", | |
| default=False, | |
| help="Uploads image files to google cloud storage if set to true", | |
| ) | |
| args = parser.parse_args() | |
| logger.info(f"args: {args}") | |
| # Set authorization credentials | |
| # auth = None | |
| # if args.gradio_auth_path is not None: | |
| # auth = parse_gradio_auth_creds(args.gradio_auth_path) | |
| demo.queue( | |
| default_concurrency_limit=args.concurrency_count, | |
| status_update_rate=10, | |
| api_open=False, | |
| ).launch( | |
| server_name=args.host, | |
| server_port=args.port, | |
| share=args.share, | |
| max_threads=args.max_threads, | |
| # auth=auth, | |
| root_path=args.gradio_root_path, | |
| # debug=True, | |
| show_error=True, | |
| allowed_paths=["../.."] | |
| ) | |