Spaces:

openeurollm
/

LLM-leaderboard

Running

App Files Files Community

geoalgo commited on Oct 16

Commit

3288843

1 Parent(s): 47afef3

update instruction tuning data, add flags

Browse files

Files changed (2) hide show

main.py +26 -5
results_instruction_tuning.csv.zip +2 -2

main.py CHANGED Viewed

@@ -16,6 +16,9 @@ df_core["Average ⬆️"] = df_core.loc[:, benchmarks_core].mean(axis=1)
 df_core.sort_values(by="Average ⬆️", ascending=False, inplace=True)
 df_instruction_tuning = pd.read_csv("results_instruction_tuning.csv.zip")
 df_instruction_tuning.model_B = df_instruction_tuning.model_B.apply(
     lambda s: s.split("/")[-1]
 )
@@ -90,11 +93,11 @@ with gr.Blocks() as demo:
             Leaderboard(
                 value=df_instruction_tuning_pivot.round(2),
                 select_columns=SelectColumns(
-                    default_selection=[
-                        col
-                        for col in df_instruction_tuning_pivot.columns
-                        if not "-eu" in col
-                    ],
                     cant_deselect=["Model"],
                     label="Select Columns to Display:",
                 ),
@@ -111,6 +114,24 @@ with gr.Blocks() as demo:
             Winrate on m-Arena-Hard instructions against Llama-3.1-8B-Instruct using Llama-3.1-70B-Instruct as the LLM-judge.
             """
             )
             Leaderboard(
                 value=df_mah_pivot.round(2),
                 select_columns=SelectColumns(

 df_core.sort_values(by="Average ⬆️", ascending=False, inplace=True)
 df_instruction_tuning = pd.read_csv("results_instruction_tuning.csv.zip")
+df_instruction_tuning = df_instruction_tuning[
+    ~df_instruction_tuning.model_B.str.contains("12b")
+]
 df_instruction_tuning.model_B = df_instruction_tuning.model_B.apply(
     lambda s: s.split("/")[-1]
 )
             Leaderboard(
                 value=df_instruction_tuning_pivot.round(2),
                 select_columns=SelectColumns(
+                    # default_selection=[
+                    #     col
+                    #     for col in df_instruction_tuning_pivot.columns
+                    #     if not "-eu" in col
+                    # ],
                     cant_deselect=["Model"],
                     label="Select Columns to Display:",
                 ),
             Winrate on m-Arena-Hard instructions against Llama-3.1-8B-Instruct using Llama-3.1-70B-Instruct as the LLM-judge.
             """
             )
+            language_flags = {
+                "cs": "🇨🇿",
+                "de": "🇩🇪",
+                "el": "🇬🇷",
+                "en": "🇬🇧",
+                "es": "🇪🇸",
+                "fr": "🇫🇷",
+                "it": "🇮🇹",
+                "nl": "🇳🇱",
+                "pl": "🇵🇱",
+                "pt": "🇵🇹",
+                "ro": "🇷🇴",
+                "uk": "🇺🇦",
+            }
+            df_mah_pivot.columns = [
+                f"{x} {language_flags[x]}" if x in language_flags else x
+                for x in df_mah_pivot.columns
+            ]
             Leaderboard(
                 value=df_mah_pivot.round(2),
                 select_columns=SelectColumns(

results_instruction_tuning.csv.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:484a54e6946b58cdb8a76dd0bd0f48618905d8dd139b60de52f744c27eaf170d
-size 258876

 version https://git-lfs.github.com/spec/v1
+oid sha256:75682851fe317fb6a7fb0b55e662bd6c73facf88c08167173641d4c763b5c233
+size 919362