Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter | |
| import config | |
| from pathlib import Path | |
| import pandas as pd | |
| from datetime import datetime | |
| abs_path = Path(__file__).parent | |
| df = pd.read_json(str(abs_path / "leader_board.json")) | |
| # Randomly set True/ False for the "MOE" column | |
| # | |
| # print(df.info()) | |
| # print(df.columns) | |
| # print(df.head(1)) | |
| head_content = """ | |
| # 🏅 BlinkCode Leaderboard | |
| ### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](https://github.com/YJQuantumLeap/BlinkCode). | |
| ### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task. | |
| ## Main Evaluation Results | |
| - Metrics: | |
| - Avg Score: The average score on all task (normalized to 0 - 100, the higher the better). | |
| - The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy. | |
| - The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better). | |
| - By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️. | |
| - The ⭐ symbol indicates results that have undergone two rounds of refinement. | |
| This leaderboard was last updated: <nowtime>. | |
| """ | |
| CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, | |
| title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, | |
| author={OpenCompass Contributors}, | |
| howpublished = {\url{https://github.com/open-compass/opencompass}}, | |
| year={2023} | |
| }""" | |
| CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
| unique_models_count = df["Model"].nunique() | |
| # print(unique_models_count) | |
| nowtime = datetime.now() | |
| formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S") | |
| head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count)) | |
| with gr.Blocks() as demo: | |
| gr.Markdown(head_content) | |
| with gr.Tabs(): | |
| Leaderboard( | |
| value=df, | |
| select_columns=SelectColumns( | |
| default_selection=config.ON_LOAD_COLUMNS, | |
| cant_deselect=["Rank", "Model"], | |
| label="Select Columns to Display:", | |
| ), | |
| search_columns=["Model", "Model Type"], | |
| hide_columns=["Model Size", "Model Type", "Supports multiple images"], | |
| filter_columns=[ | |
| "Model Size", | |
| "Model Type", | |
| "Supports multiple images" | |
| # ColumnFilter("Params (B)", default=[0, 20]), | |
| ], | |
| datatype=config.TYPES, | |
| column_widths=["5%", "15%"], | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion('Citation', open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id='citation-button') | |
| if __name__ == "__main__": | |
| demo.launch() |