Spaces:
Sleeping
Sleeping
Update config.py
Browse files
config.py
CHANGED
|
@@ -1,74 +1,36 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
|
| 3 |
-
import config
|
| 4 |
-
from pathlib import Path
|
| 5 |
import pandas as pd
|
| 6 |
-
from datetime import datetime
|
| 7 |
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
#
|
| 15 |
-
# print(df.columns)
|
| 16 |
-
# print(df.head(1))
|
| 17 |
-
head_content = """
|
| 18 |
-
# 🏅 BlinkCode Leaderboard
|
| 19 |
-
### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](github.link).
|
| 20 |
-
|
| 21 |
-
### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
|
| 22 |
-
## Main Evaluation Results
|
| 23 |
-
- Metrics:
|
| 24 |
-
- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better).
|
| 25 |
-
- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
|
| 26 |
-
- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
|
| 27 |
-
- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.
|
| 28 |
-
- The ⭐ symbol indicates results that have undergone two rounds of refinement.
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
This leaderboard was last updated: <nowtime>.
|
| 32 |
-
"""
|
| 33 |
-
CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
|
| 34 |
-
title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
|
| 35 |
-
author={OpenCompass Contributors},
|
| 36 |
-
howpublished = {\url{https://github.com/open-compass/opencompass}},
|
| 37 |
-
year={2023}
|
| 38 |
-
}"""
|
| 39 |
-
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
| 40 |
-
unique_models_count = df["Model"].nunique()
|
| 41 |
-
# print(unique_models_count)
|
| 42 |
-
nowtime = datetime.now()
|
| 43 |
-
formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
|
| 44 |
-
head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))
|
| 45 |
-
|
| 46 |
-
with gr.Blocks() as demo:
|
| 47 |
-
gr.Markdown(head_content)
|
| 48 |
-
with gr.Tabs():
|
| 49 |
-
Leaderboard(
|
| 50 |
-
value=df,
|
| 51 |
-
select_columns=SelectColumns(
|
| 52 |
-
default_selection=config.ON_LOAD_COLUMNS,
|
| 53 |
-
cant_deselect=["Rank", "Model"],
|
| 54 |
-
label="Select Columns to Display:",
|
| 55 |
-
),
|
| 56 |
-
search_columns=["Model", "Model Type"],
|
| 57 |
-
hide_columns=["Model Size", "Model Type", "Supports multiple images"],
|
| 58 |
-
filter_columns=[
|
| 59 |
-
"Model Size",
|
| 60 |
-
"Model Type",
|
| 61 |
-
"Supports multiple images"
|
| 62 |
-
# ColumnFilter("Params (B)", default=[0, 20]),
|
| 63 |
-
],
|
| 64 |
-
datatype=config.TYPES,
|
| 65 |
-
column_widths=["5%", "15%"],
|
| 66 |
-
)
|
| 67 |
-
with gr.Row():
|
| 68 |
-
with gr.Accordion('Citation', open=False):
|
| 69 |
-
citation_button = gr.Textbox(
|
| 70 |
-
value=CITATION_BUTTON_TEXT,
|
| 71 |
-
label=CITATION_BUTTON_LABEL,
|
| 72 |
-
elem_id='citation-button')
|
| 73 |
-
if __name__ == "__main__":
|
| 74 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
|
|
|
| 2 |
|
| 3 |
+
TYPES = [
|
| 4 |
+
"number", # Rank
|
| 5 |
+
"str", # Model Type
|
| 6 |
+
"str", # Model Size
|
| 7 |
+
"str", # Model
|
| 8 |
+
"str", # Param (B)
|
| 9 |
+
"bool", # Supports multiple images
|
| 10 |
+
"number" # Avg Score
|
| 11 |
+
"number", # HumanEval-V
|
| 12 |
+
"number", # MBPP-V
|
| 13 |
+
"number", # GSM8K-V
|
| 14 |
+
"number", # MATH-V
|
| 15 |
+
"number", # VP
|
| 16 |
+
"number", # Matplotlib
|
| 17 |
+
"number", # SVG
|
| 18 |
+
"number", # TikZ
|
| 19 |
+
"number", # Webpage
|
| 20 |
+
"number" # Avg Score
|
| 21 |
+
"number", # HumanEval-V
|
| 22 |
+
"number", # MBPP-V
|
| 23 |
+
"number", # GSM8K-V
|
| 24 |
+
"number", # MATH-V
|
| 25 |
+
"number", # VP
|
| 26 |
+
"number", # Matplotlib
|
| 27 |
+
"number", # SVG
|
| 28 |
+
"number", # TikZ
|
| 29 |
+
"number", # Webpage
|
| 30 |
+
]
|
| 31 |
|
| 32 |
|
| 33 |
+
ON_LOAD_COLUMNS = [
|
| 34 |
+
"Rank", "Model", "Params (B)", "Avg Score⬆️", "HumanEval-V", "MBPP-V", "GSM8K-V", "MATH-V", "VP", "Matplotlib", "SVG", "TikZ", "Webpage"
|
| 35 |
+
]
|
| 36 |
+
# "Avg Score⬆️", "HumanEval-V", "MBPP-V", "GSM8K-V", "MATH-V", "VP", "Matplotlib", "SVG", "TikZ", "Webpage"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|