Spaces:
Running
Running
add initial files
Browse files- main.py +40 -0
- opensci-ref-table.csv +12 -0
- pyproject.toml +13 -0
- requirements.txt +7 -0
main.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns
|
| 7 |
+
|
| 8 |
+
abs_path = Path(__file__).parent
|
| 9 |
+
|
| 10 |
+
df = pd.read_csv("opensci-ref-table.csv")
|
| 11 |
+
|
| 12 |
+
df.drop("#Tokens", axis=1, inplace=True)
|
| 13 |
+
df.drop("AVG", axis=1, inplace=True)
|
| 14 |
+
|
| 15 |
+
benchmarks = df.columns[1:]
|
| 16 |
+
df["Average β¬οΈ"] = df.loc[:, benchmarks].mean(axis=1)
|
| 17 |
+
# df.set_index("Model", inplace=True)
|
| 18 |
+
|
| 19 |
+
with gr.Blocks() as demo:
|
| 20 |
+
gr.Markdown(
|
| 21 |
+
"""
|
| 22 |
+
# π₯ OpenEuroLLM Leaderboard πͺπΊ
|
| 23 |
+
"""
|
| 24 |
+
)
|
| 25 |
+
Leaderboard(
|
| 26 |
+
value=df.round(2),
|
| 27 |
+
select_columns=SelectColumns(
|
| 28 |
+
default_selection=list(df.columns),
|
| 29 |
+
cant_deselect=["Model"],
|
| 30 |
+
label="Select Columns to Display:",
|
| 31 |
+
),
|
| 32 |
+
search_columns=SearchColumns(
|
| 33 |
+
primary_column="Model",
|
| 34 |
+
label="Filter a model",
|
| 35 |
+
secondary_columns=[],
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
demo.launch()
|
opensci-ref-table.csv
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Model,#Tokens,Copa[0],Lambada[0],Openbookqa[0],Wino[10],Mmlu[5],Commonsense[10],Piqa[10],Hellaswag[10],Arc-easy[10],Arc-challenge[10],Boolq[10],AVG
|
| 2 |
+
gemma-2-2b,2.0,0.8800000000000001,0.6980399767125947,0.366,0.6874506708760852,0.5314057826520439,0.6502866502866503,0.7970620239390642,0.7453694483170683,0.8200757575757576,0.5149317406143346,0.7966360856269113,0.6806598306000463
|
| 3 |
+
Qwen2.5-1.5B,18.0,0.83,0.6208034154861246,0.361,0.63114969744804,0.6087451929924512,0.7575757575757575,0.7649619151251361,0.6799442342162916,0.8072390572390572,0.5170648464163822,0.7831804281345566,0.6692422313303452
|
| 4 |
+
OpenSci-ref-1.7B-nemotron-1T,1.0,0.84,0.5992625654958277,0.43,0.6274664561957379,0.499430280586811,0.6240786240786241,0.7894450489662677,0.7243576976697869,0.8005050505050505,0.5102389078498294,0.7941896024464832,0.6580885667085835
|
| 5 |
+
SmolLM2-1.7B,11.0,0.82,0.6743644478944304,0.38,0.6629834254143646,0.4999287850733513,0.6011466011466011,0.780195865070729,0.7316271659032065,0.7992424242424243,0.5166382252559727,0.7474006116207951,0.6557752319656248
|
| 6 |
+
OpenSci-ref-1.7B-DCLM-1T,1.0,0.79,0.6782456821269164,0.396,0.6416732438831886,0.2432701894317049,0.1941031941031941,0.7709466811751904,0.7027484564827724,0.757996632996633,0.4428327645051194,0.6902140672782875,0.5734573556348188
|
| 7 |
+
OpenSci-ref-1.7B-FineWeb-Edu-1T,1.0,0.81,0.5427906074131574,0.428,0.6290449881610103,0.2642785927930494,0.19000819000819,0.749183895538629,0.6661023700458076,0.7626262626262627,0.4300341296928328,0.6730886850152905,0.5586507019358391
|
| 8 |
+
OpenSci-ref-1.7B-FineWeb-Edu-300B,0.3,0.76,0.5171744614787502,0.416,0.6069455406471981,0.2618572852869961,0.1941031941031941,0.7557127312295974,0.6266679944234216,0.7462121212121212,0.439419795221843,0.6718654434250765,0.545087142457109
|
| 9 |
+
SmolLM-1.7B,1.0,0.76,0.5557927420919853,0.36,0.6093133385951065,0.2970374590514171,0.2252252252252252,0.7758433079434168,0.6730730930093607,0.7680976430976432,0.45947098976109213,0.6914373088685015,0.5613901006948862
|
| 10 |
+
ablation-model-fineweb-edu,0.3,0.78,0.5018435862604308,0.369,0.579321231254933,0.2525993448226748,0.1916461916461916,0.7513601741022851,0.6192989444333798,0.7735690235690236,0.45520477815699656,0.6608562691131499,0.5395181403053695
|
| 11 |
+
EuroLLM-1.7B,4.0,0.74,0.5247428682320978,0.331,0.585635359116022,0.2688363480985614,0.1883701883701883,0.7412948857453754,0.6032662816172077,0.7266414141414141,0.3860921501706484,0.6119266055045871,0.5188914637269183
|
| 12 |
+
ablation-model-c4,0.3,0.74,0.5609353774500291,0.3015,0.5798145224940805,0.2506765418031619,0.2014742014742014,0.7606093579978237,0.6436466839275046,0.6578282828282829,0.3188993174061433,0.6347094801223241,0.5136448877730501
|
pyproject.toml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "OpenEuroLLM-Leaderboard"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"apscheduler>=3.11.0",
|
| 9 |
+
"gradio-client>=1.3.0",
|
| 10 |
+
"gradio-leaderboard==0.0.9",
|
| 11 |
+
"gradio[oauth]==4.44.0",
|
| 12 |
+
"pandas>=2.2.3",
|
| 13 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
APScheduler
|
| 2 |
+
gradio==4.44.0
|
| 3 |
+
gradio[oauth]==4.44.0
|
| 4 |
+
gradio_leaderboard==0.0.9
|
| 5 |
+
gradio_client
|
| 6 |
+
pandas
|
| 7 |
+
python-dateutil
|