geoalgo commited on
Commit
ca25c6f
ยท
1 Parent(s): c708975
Files changed (2) hide show
  1. main.py +78 -19
  2. results_instruction_tuning.csv.zip +3 -0
main.py CHANGED
@@ -7,14 +7,27 @@ from gradio_leaderboard import Leaderboard, SelectColumns, SearchColumns
7
 
8
  abs_path = Path(__file__).parent
9
 
10
- df = pd.read_csv("opensci-ref-table.csv")
11
 
12
- df.drop("#Tokens", axis=1, inplace=True)
13
- df.drop("AVG", axis=1, inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- benchmarks = df.columns[1:]
16
- df["Average โฌ†๏ธ"] = df.loc[:, benchmarks].mean(axis=1)
17
- # df.set_index("Model", inplace=True)
18
 
19
  with gr.Blocks() as demo:
20
  gr.Markdown(
@@ -22,19 +35,65 @@ with gr.Blocks() as demo:
22
  # ๐Ÿฅ‡ OpenEuroLLM Leaderboard ๐Ÿ‡ช๐Ÿ‡บ
23
  """
24
  )
25
- Leaderboard(
26
- value=df.round(2),
27
- select_columns=SelectColumns(
28
- default_selection=list(df.columns),
29
- cant_deselect=["Model"],
30
- label="Select Columns to Display:",
31
- ),
32
- search_columns=SearchColumns(
33
- primary_column="Model",
34
- label="Filter a model",
35
- secondary_columns=[],
36
- ),
37
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  if __name__ == "__main__":
40
  demo.launch()
 
7
 
8
  abs_path = Path(__file__).parent
9
 
10
+ df_core = pd.read_csv("opensci-ref-table.csv")
11
 
12
+ df_core.drop("#Tokens", axis=1, inplace=True)
13
+ df_core.drop("AVG", axis=1, inplace=True)
14
+ benchmarks_core = df_core.columns[1:]
15
+ df_core["Average โฌ†๏ธ"] = df_core.loc[:, benchmarks_core].mean(axis=1)
16
+
17
+
18
+ df_instruction_tuning = pd.read_csv("results_instruction_tuning.csv.zip")
19
+ df_instruction_tuning = df_instruction_tuning.pivot_table(
20
+ index="model_B", columns="benchmark", values="preference"
21
+ )
22
+ df_instruction_tuning.index.rename("Model", inplace=True)
23
+ df_instruction_tuning.reset_index(drop=False, inplace=True)
24
+ df_instruction_tuning.columns = [x.capitalize() for x in df_instruction_tuning.columns]
25
+ # first column is model
26
+ df_instruction_tuning["Average"] = df_instruction_tuning.loc[
27
+ :, df_instruction_tuning.columns[1:]
28
+ ].mean(axis=1)
29
+ # df_instruction_tuning.drop("benchmark", axis=1, inplace=True)
30
 
 
 
 
31
 
32
  with gr.Blocks() as demo:
33
  gr.Markdown(
 
35
  # ๐Ÿฅ‡ OpenEuroLLM Leaderboard ๐Ÿ‡ช๐Ÿ‡บ
36
  """
37
  )
38
+
39
+ with gr.Tabs():
40
+ with gr.Tab("English Core ๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ๐Ÿ‡บ๐Ÿ‡ธ"):
41
+ Leaderboard(
42
+ value=df_core.round(2),
43
+ select_columns=SelectColumns(
44
+ default_selection=list(df_core.columns),
45
+ cant_deselect=["Model"],
46
+ label="Select Columns to Display:",
47
+ ),
48
+ search_columns=SearchColumns(
49
+ primary_column="Model",
50
+ label="Filter a model",
51
+ secondary_columns=[],
52
+ ),
53
+ )
54
+
55
+ with gr.Tab("Instruction-tuning ๐ŸŽฏ๓ ง๓ ข๓ ฅ๐Ÿด๓ ง๓ ข๓ ฅ๓ ฎ๓ ง๓ ฟ"):
56
+ gr.Markdown(
57
+ """
58
+ Winrate against Llama-3.1-8B-Instruct using Llama-3.1-70B-Instruct as the LLM-judge.
59
+ """
60
+ )
61
+ Leaderboard(
62
+ value=df_instruction_tuning.round(2),
63
+ select_columns=SelectColumns(
64
+ default_selection=[
65
+ col for col in df_instruction_tuning.columns if not "-eu" in col
66
+ ],
67
+ cant_deselect=["Model"],
68
+ label="Select Columns to Display:",
69
+ ),
70
+ search_columns=SearchColumns(
71
+ primary_column="Model",
72
+ label="Filter a model",
73
+ secondary_columns=[],
74
+ ),
75
+ )
76
+
77
+ with gr.Tab("Instruction-tuning multi-lingual ๐ŸŽฏ๐Ÿ‡ช๐Ÿ‡บ"):
78
+ gr.Markdown(
79
+ """
80
+ Winrate against Llama-3.1-8B-Instruct using Llama-3.1-70B-Instruct as the LLM-judge.
81
+ """
82
+ )
83
+ Leaderboard(
84
+ value=df_instruction_tuning.round(2),
85
+ select_columns=SelectColumns(
86
+ default_selection=list(df_instruction_tuning.columns),
87
+ cant_deselect=["Model"],
88
+ label="Select Columns to Display:",
89
+ ),
90
+ search_columns=SearchColumns(
91
+ primary_column="Model",
92
+ label="Filter a model",
93
+ secondary_columns=[],
94
+ ),
95
+ )
96
+
97
 
98
  if __name__ == "__main__":
99
  demo.launch()
results_instruction_tuning.csv.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76e51ad41bb386359abb58e10ea274cdd5189dfd468f5bb58850c61fb8c16f0
3
+ size 209306