Spaces:
Sleeping
Sleeping
Yotam-Perlitz
commited on
Commit
·
32f9aec
1
Parent(s):
a3b611d
replace mybench
Browse filesSigned-off-by: Yotam-Perlitz <y.perlitz@ibm.com>
- assets/mybench_240901.csv +28 -53
assets/mybench_240901.csv
CHANGED
|
@@ -1,53 +1,28 @@
|
|
| 1 |
-
model,
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
qwen1.5_72b_chat,28.89,mybench_average
|
| 30 |
-
command_r,27.23,mybench_average
|
| 31 |
-
phi_3_small_128k_instruct,27.19,mybench_average
|
| 32 |
-
meta_llama_3_8b_instruct,26.67,mybench_average
|
| 33 |
-
qwen2_7b_instruct,26.45,mybench_average
|
| 34 |
-
phi_3_small_8k_instruct,26.24,mybench_average
|
| 35 |
-
openhermes_2.5_mistral_7b,23.3,mybench_average
|
| 36 |
-
mixtral_8x7b_instruct_v0.1,22.5,mybench_average
|
| 37 |
-
mistral_7b_instruct_v0.2,19.33,mybench_average
|
| 38 |
-
phi_3_mini_4k_instruct,19.27,mybench_average
|
| 39 |
-
zephyr_7b_alpha,19.22,mybench_average
|
| 40 |
-
phi_3_mini_128k_instruct,18.04,mybench_average
|
| 41 |
-
zephyr_7b_beta,17.32,mybench_average
|
| 42 |
-
deepseek_v2_lite_chat,17.14,mybench_average
|
| 43 |
-
qwen1.5_7b_chat,16.5,mybench_average
|
| 44 |
-
starling_lm_7b_beta,16.44,mybench_average
|
| 45 |
-
vicuna_7b_v1.5_16k,13.71,mybench_average
|
| 46 |
-
vicuna_7b_v1.5,11.73,mybench_average
|
| 47 |
-
qwen1.5_4b_chat,11.13,mybench_average
|
| 48 |
-
llama_2_7b_chat,10.25,mybench_average
|
| 49 |
-
qwen2_1.5b_instruct,9.96,mybench_average
|
| 50 |
-
yi_6b_chat,8.79,mybench_average
|
| 51 |
-
qwen2_0.5b_instruct,6.78,mybench_average
|
| 52 |
-
qwen1.5_1.8b_chat,6.09,mybench_average
|
| 53 |
-
qwen1.5_0.5b_chat,5.26,mybench_average
|
|
|
|
| 1 |
+
model,agentbench
|
| 2 |
+
gpt-4-0613,4.01
|
| 3 |
+
claude-2,2.49
|
| 4 |
+
claude-v1.3,2.44
|
| 5 |
+
gpt-3.5-turbo-0613,2.32
|
| 6 |
+
text-davinci-003,1.71
|
| 7 |
+
claude-instant-v1.1,1.60
|
| 8 |
+
chat-bison-001,1.39
|
| 9 |
+
text-davinci-002,1.25
|
| 10 |
+
llama-2-70b-chat,0.78
|
| 11 |
+
guanaco-65b,0.54
|
| 12 |
+
codellama-34b-instruct,0.96
|
| 13 |
+
vicuna-33b-v1.3,0.73
|
| 14 |
+
wizardlm-30b-v1.0,0.46
|
| 15 |
+
guanaco-33b,0.39
|
| 16 |
+
vicuna-13b-v1.5,0.93
|
| 17 |
+
llama-2-13b-chat,0.77
|
| 18 |
+
openchat-13b-v3.2,0.70
|
| 19 |
+
wizardlm-13b-v1.2,0.66
|
| 20 |
+
vicuna-7b-v1.5,0.56
|
| 21 |
+
codellama-13b-instruct,0.56
|
| 22 |
+
codellama-7b-instruct,0.50
|
| 23 |
+
koala-13b,0.34
|
| 24 |
+
llama-2-7b-chat,0.34
|
| 25 |
+
codegeex2-6b,0.27
|
| 26 |
+
dolly-12b-v2,0.14
|
| 27 |
+
chatglm-6b-v1.1,0.11
|
| 28 |
+
oasst-12b-sft-4,0.03
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|