BlinkCode_leaderboard

Sleeping

App Files Files Community

BlinkCode_leaderboard / app.py

yajuniverse

Update app.py

3028eb4 verified over 1 year ago

raw

history blame contribute delete

3.19 kB

	import gradio as gr
	from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
	import config
	from pathlib import Path
	import pandas as pd
	from datetime import datetime

	abs_path = Path(__file__).parent


	df = pd.read_json(str(abs_path / "leader_board.json"))
	# Randomly set True/ False for the "MOE" column
	#
	# print(df.info())
	# print(df.columns)
	# print(df.head(1))
	head_content = """
	# 🏅 BlinkCode Leaderboard
	### Welcome to the BlinkCode Leaderboard! On this leaderboard we share the evaluation results of MLLMs obtained by the [OpenSource Framework](https://github.com/YJQuantumLeap/BlinkCode).

	### Currently, BlinkCode Leaderboard covers <model num> different VLMs (including GPT-4v, Gemini, QwenVLMAX, LLaVA, etc.) and 9 different task.
	## Main Evaluation Results
	- Metrics:
	- Avg Score: The average score on all task (normalized to 0 - 100, the higher the better).
	- The scores in the 5 tasks (HumanEval-V, MBPP-V, GSM8K-V, MATH-V, VP) represent the percentage of accuracy.
	- The scores in the image reconstruction tasks (Matplotlib, SVG, TikZ, Webpage) represent the similarity between the reconstructed images and the original images (normalized to 0 - 100, the higher the better).
	- By default, we present the unrefined evaluation results,, sorted by the descending order of Avg Score⬆️.
	- The ⭐ symbol indicates results that have undergone two rounds of refinement.


	This leaderboard was last updated: <nowtime>.
	"""
	CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
	title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
	author={OpenCompass Contributors},
	howpublished = {\url{https://github.com/open-compass/opencompass}},
	year={2023}
	}"""
	CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
	unique_models_count = df["Model"].nunique()
	# print(unique_models_count)
	nowtime = datetime.now()
	formatted_time = nowtime.strftime("%y.%m.%d %H:%M:%S")
	head_content = head_content.replace("<nowtime>", formatted_time).replace('<model num>', str(unique_models_count))

	with gr.Blocks() as demo:
	gr.Markdown(head_content)
	with gr.Tabs():
	Leaderboard(
	value=df,
	select_columns=SelectColumns(
	default_selection=config.ON_LOAD_COLUMNS,
	cant_deselect=["Rank", "Model"],
	label="Select Columns to Display:",
	),
	search_columns=["Model", "Model Type"],
	hide_columns=["Model Size", "Model Type", "Supports multiple images"],
	filter_columns=[
	"Model Size",
	"Model Type",
	"Supports multiple images"
	# ColumnFilter("Params (B)", default=[0, 20]),
	],
	datatype=config.TYPES,
	column_widths=["5%", "15%"],
	)
	with gr.Row():
	with gr.Accordion('Citation', open=False):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	elem_id='citation-button')
	if __name__ == "__main__":
	demo.launch()