Spaces:

openeurollm
/

LLM-leaderboard

Running

App Files Files Community

geoalgo commited on Oct 10

Commit

feccacf

1 Parent(s): 862a642

add initial files

Browse files

Files changed (4) hide show

main.py +40 -0
opensci-ref-table.csv +12 -0
pyproject.toml +13 -0
requirements.txt +7 -0

main.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import random
+from pathlib import Path
+import gradio as gr
+import pandas as pd
+from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns
+abs_path = Path(__file__).parent
+df = pd.read_csv("opensci-ref-table.csv")
+df.drop("#Tokens", axis=1, inplace=True)
+df.drop("AVG", axis=1, inplace=True)
+benchmarks = df.columns[1:]
+df["Average ⬆️"] = df.loc[:, benchmarks].mean(axis=1)
+# df.set_index("Model", inplace=True)
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+    # 🥇 OpenEuroLLM Leaderboard 🇪🇺
+    """
+    )
+    Leaderboard(
+        value=df.round(2),
+        select_columns=SelectColumns(
+            default_selection=list(df.columns),
+            cant_deselect=["Model"],
+            label="Select Columns to Display:",
+        ),
+        search_columns=SearchColumns(
+            primary_column="Model",
+            label="Filter a model",
+            secondary_columns=[],
+        ),
+    )
+if __name__ == "__main__":
+    demo.launch()

opensci-ref-table.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+Model,#Tokens,Copa[0],Lambada[0],Openbookqa[0],Wino[10],Mmlu[5],Commonsense[10],Piqa[10],Hellaswag[10],Arc-easy[10],Arc-challenge[10],Boolq[10],AVG
+gemma-2-2b,2.0,0.8800000000000001,0.6980399767125947,0.366,0.6874506708760852,0.5314057826520439,0.6502866502866503,0.7970620239390642,0.7453694483170683,0.8200757575757576,0.5149317406143346,0.7966360856269113,0.6806598306000463
+Qwen2.5-1.5B,18.0,0.83,0.6208034154861246,0.361,0.63114969744804,0.6087451929924512,0.7575757575757575,0.7649619151251361,0.6799442342162916,0.8072390572390572,0.5170648464163822,0.7831804281345566,0.6692422313303452
+OpenSci-ref-1.7B-nemotron-1T,1.0,0.84,0.5992625654958277,0.43,0.6274664561957379,0.499430280586811,0.6240786240786241,0.7894450489662677,0.7243576976697869,0.8005050505050505,0.5102389078498294,0.7941896024464832,0.6580885667085835
+SmolLM2-1.7B,11.0,0.82,0.6743644478944304,0.38,0.6629834254143646,0.4999287850733513,0.6011466011466011,0.780195865070729,0.7316271659032065,0.7992424242424243,0.5166382252559727,0.7474006116207951,0.6557752319656248
+OpenSci-ref-1.7B-DCLM-1T,1.0,0.79,0.6782456821269164,0.396,0.6416732438831886,0.2432701894317049,0.1941031941031941,0.7709466811751904,0.7027484564827724,0.757996632996633,0.4428327645051194,0.6902140672782875,0.5734573556348188
+OpenSci-ref-1.7B-FineWeb-Edu-1T,1.0,0.81,0.5427906074131574,0.428,0.6290449881610103,0.2642785927930494,0.19000819000819,0.749183895538629,0.6661023700458076,0.7626262626262627,0.4300341296928328,0.6730886850152905,0.5586507019358391
+OpenSci-ref-1.7B-FineWeb-Edu-300B,0.3,0.76,0.5171744614787502,0.416,0.6069455406471981,0.2618572852869961,0.1941031941031941,0.7557127312295974,0.6266679944234216,0.7462121212121212,0.439419795221843,0.6718654434250765,0.545087142457109
+SmolLM-1.7B,1.0,0.76,0.5557927420919853,0.36,0.6093133385951065,0.2970374590514171,0.2252252252252252,0.7758433079434168,0.6730730930093607,0.7680976430976432,0.45947098976109213,0.6914373088685015,0.5613901006948862
+ablation-model-fineweb-edu,0.3,0.78,0.5018435862604308,0.369,0.579321231254933,0.2525993448226748,0.1916461916461916,0.7513601741022851,0.6192989444333798,0.7735690235690236,0.45520477815699656,0.6608562691131499,0.5395181403053695
+EuroLLM-1.7B,4.0,0.74,0.5247428682320978,0.331,0.585635359116022,0.2688363480985614,0.1883701883701883,0.7412948857453754,0.6032662816172077,0.7266414141414141,0.3860921501706484,0.6119266055045871,0.5188914637269183
+ablation-model-c4,0.3,0.74,0.5609353774500291,0.3015,0.5798145224940805,0.2506765418031619,0.2014742014742014,0.7606093579978237,0.6436466839275046,0.6578282828282829,0.3188993174061433,0.6347094801223241,0.5136448877730501

pyproject.toml ADDED Viewed

	@@ -0,0 +1,13 @@

+[project]
+name = "OpenEuroLLM-Leaderboard"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "apscheduler>=3.11.0",
+    "gradio-client>=1.3.0",
+    "gradio-leaderboard==0.0.9",
+    "gradio[oauth]==4.44.0",
+    "pandas>=2.2.3",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+APScheduler
+gradio==4.44.0
+gradio[oauth]==4.44.0
+gradio_leaderboard==0.0.9
+gradio_client
+pandas
+python-dateutil