geoalgo commited on
Commit
3288843
ยท
1 Parent(s): 47afef3

update instruction tuning data, add flags

Browse files
Files changed (2) hide show
  1. main.py +26 -5
  2. results_instruction_tuning.csv.zip +2 -2
main.py CHANGED
@@ -16,6 +16,9 @@ df_core["Average โฌ†๏ธ"] = df_core.loc[:, benchmarks_core].mean(axis=1)
16
  df_core.sort_values(by="Average โฌ†๏ธ", ascending=False, inplace=True)
17
 
18
  df_instruction_tuning = pd.read_csv("results_instruction_tuning.csv.zip")
 
 
 
19
  df_instruction_tuning.model_B = df_instruction_tuning.model_B.apply(
20
  lambda s: s.split("/")[-1]
21
  )
@@ -90,11 +93,11 @@ with gr.Blocks() as demo:
90
  Leaderboard(
91
  value=df_instruction_tuning_pivot.round(2),
92
  select_columns=SelectColumns(
93
- default_selection=[
94
- col
95
- for col in df_instruction_tuning_pivot.columns
96
- if not "-eu" in col
97
- ],
98
  cant_deselect=["Model"],
99
  label="Select Columns to Display:",
100
  ),
@@ -111,6 +114,24 @@ with gr.Blocks() as demo:
111
  Winrate on m-Arena-Hard instructions against Llama-3.1-8B-Instruct using Llama-3.1-70B-Instruct as the LLM-judge.
112
  """
113
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  Leaderboard(
115
  value=df_mah_pivot.round(2),
116
  select_columns=SelectColumns(
 
16
  df_core.sort_values(by="Average โฌ†๏ธ", ascending=False, inplace=True)
17
 
18
  df_instruction_tuning = pd.read_csv("results_instruction_tuning.csv.zip")
19
+ df_instruction_tuning = df_instruction_tuning[
20
+ ~df_instruction_tuning.model_B.str.contains("12b")
21
+ ]
22
  df_instruction_tuning.model_B = df_instruction_tuning.model_B.apply(
23
  lambda s: s.split("/")[-1]
24
  )
 
93
  Leaderboard(
94
  value=df_instruction_tuning_pivot.round(2),
95
  select_columns=SelectColumns(
96
+ # default_selection=[
97
+ # col
98
+ # for col in df_instruction_tuning_pivot.columns
99
+ # if not "-eu" in col
100
+ # ],
101
  cant_deselect=["Model"],
102
  label="Select Columns to Display:",
103
  ),
 
114
  Winrate on m-Arena-Hard instructions against Llama-3.1-8B-Instruct using Llama-3.1-70B-Instruct as the LLM-judge.
115
  """
116
  )
117
+ language_flags = {
118
+ "cs": "๐Ÿ‡จ๐Ÿ‡ฟ",
119
+ "de": "๐Ÿ‡ฉ๐Ÿ‡ช",
120
+ "el": "๐Ÿ‡ฌ๐Ÿ‡ท",
121
+ "en": "๐Ÿ‡ฌ๐Ÿ‡ง",
122
+ "es": "๐Ÿ‡ช๐Ÿ‡ธ",
123
+ "fr": "๐Ÿ‡ซ๐Ÿ‡ท",
124
+ "it": "๐Ÿ‡ฎ๐Ÿ‡น",
125
+ "nl": "๐Ÿ‡ณ๐Ÿ‡ฑ",
126
+ "pl": "๐Ÿ‡ต๐Ÿ‡ฑ",
127
+ "pt": "๐Ÿ‡ต๐Ÿ‡น",
128
+ "ro": "๐Ÿ‡ท๐Ÿ‡ด",
129
+ "uk": "๐Ÿ‡บ๐Ÿ‡ฆ",
130
+ }
131
+ df_mah_pivot.columns = [
132
+ f"{x} {language_flags[x]}" if x in language_flags else x
133
+ for x in df_mah_pivot.columns
134
+ ]
135
  Leaderboard(
136
  value=df_mah_pivot.round(2),
137
  select_columns=SelectColumns(
results_instruction_tuning.csv.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:484a54e6946b58cdb8a76dd0bd0f48618905d8dd139b60de52f744c27eaf170d
3
- size 258876
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75682851fe317fb6a7fb0b55e662bd6c73facf88c08167173641d4c763b5c233
3
+ size 919362