Spaces:
Running
Running
add gemini pro results
#10
by
JessicaOjo
- opened
data/community_results/New Results - June2025.csv
CHANGED
|
@@ -63,3 +63,10 @@ Claude 4.5 Sonnet,afrimmlu,,89.6,65.0,56.0,57.2,53.4,51.8,57.8,58.0,62.4,59.4,58
|
|
| 63 |
Claude 4.5 Sonnet,injongointent,,88.3,90.6,89.2,83.8,70.8,78.6,75.9,71.9,86.1,70.3,93.6,87.3,83.4,79.8,81.6,4.0
|
| 64 |
Claude 4.5 Sonnet,sib,,87.7,89.7,86.8,85.3,87.3,79.4,76.5,83.3,80.4,84.8,88.7,85.8,82.8,84.3,84.2,5.0
|
| 65 |
Claude 4.5 Sonnet,belebele,,83.9,81.8,73.4,68.7,73.2,70.1,66.3,71.6,75.2,75.3,87.7,79.4,72.2,77.0,74.8,5.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
Claude 4.5 Sonnet,injongointent,,88.3,90.6,89.2,83.8,70.8,78.6,75.9,71.9,86.1,70.3,93.6,87.3,83.4,79.8,81.6,4.0
|
| 64 |
Claude 4.5 Sonnet,sib,,87.7,89.7,86.8,85.3,87.3,79.4,76.5,83.3,80.4,84.8,88.7,85.8,82.8,84.3,84.2,5.0
|
| 65 |
Claude 4.5 Sonnet,belebele,,83.9,81.8,73.4,68.7,73.2,70.1,66.3,71.6,75.2,75.3,87.7,79.4,72.2,77.0,74.8,5.0
|
| 66 |
+
Gemini 2.5 Pro,afrixnli,,90.0,79.8,76.3,79.2,72.3,32.2,73.2,78.2,78.2,74.0,76.5,78.5,72.0,75.3,72.7,3.0
|
| 67 |
+
Gemini 2.5 Pro,afrimgsm,,89.2,80.8,74.8,75.2,74.4,62.8,63.2,82.8,77.6,72.8,88.4,66.8,77.6,69.6,74.4,2.0
|
| 68 |
+
Gemini 2.5 Pro,flores - en_xx,,68.2,41.2,49.4,42.9,51.0,47.8,42.8,44.8,47.4,50.3,60.7,51.7,30.5,55.4,47.4,3.0
|
| 69 |
+
Gemini 2.5 Pro,afrimmlu,,72.2,82.2,68.4,76.4,75.8,79.2,79.8,75.0,81.2,75.2,80.4,85.0,76.0,82.2,78.2,1.0
|
| 70 |
+
Gemini 2.5 Pro,injongointent,,87.9,91.7,94.1,92.0,80.9,90.5,85.5,85.3,93.8,76.6,93.8,90.5,89.2,83.8,88.3,4.0
|
| 71 |
+
Gemini 2.5 Pro,sib,,88.7,88.7,89.2,89.7,87.3,87.3,85.3,86.8,87.7,87.7,90.2,89.2,88.2,87.7,88.1,3.0
|
| 72 |
+
Gemini 2.5 Pro,belebele,,78.1,85.6,56.1,62.2,80.9,78.9,75.4,74.1,84.4,73.7,87.8,86.3,68.8,83.0,76.7,5.0
|
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -21,7 +21,8 @@
|
|
| 21 |
"Gemini-2.5 Flash": 69.9,
|
| 22 |
"Claude 4.0 Sonnet": 68.1,
|
| 23 |
"Claude 3.7 Sonnet": 59.8,
|
| 24 |
-
"Claude 4.5 Sonnet": 71.7
|
|
|
|
| 25 |
}
|
| 26 |
},
|
| 27 |
"Intent": {
|
|
@@ -46,7 +47,8 @@
|
|
| 46 |
"Gemini-2.5 Flash": 87.9,
|
| 47 |
"Claude 4.0 Sonnet": 80.4,
|
| 48 |
"Claude 3.7 Sonnet": 73.4,
|
| 49 |
-
"Claude 4.5 Sonnet": 81.6
|
|
|
|
| 50 |
}
|
| 51 |
},
|
| 52 |
"MT(en/fr-xx)": {
|
|
@@ -70,7 +72,8 @@
|
|
| 70 |
"Lugha-Llama 8B": 22.1,
|
| 71 |
"Gemini-2.5 Flash": 46.5,
|
| 72 |
"Claude 4.0 Sonnet": 46.0,
|
| 73 |
-
"Claude 3.7 Sonnet": 44.0
|
|
|
|
| 74 |
}
|
| 75 |
},
|
| 76 |
"MMLU": {
|
|
@@ -95,7 +98,8 @@
|
|
| 95 |
"Gemini-2.5 Flash": 67.7,
|
| 96 |
"Claude 4.0 Sonnet": 75.5,
|
| 97 |
"Claude 3.7 Sonnet": 66.7,
|
| 98 |
-
"Claude 4.5 Sonnet": 58.6
|
|
|
|
| 99 |
}
|
| 100 |
},
|
| 101 |
"Math": {
|
|
@@ -120,7 +124,8 @@
|
|
| 120 |
"Gemini-2.5 Flash": 70.6,
|
| 121 |
"Claude 4.0 Sonnet": 66.9,
|
| 122 |
"Claude 3.7 Sonnet": 35.2,
|
| 123 |
-
"Claude 4.5 Sonnet": 73.1
|
|
|
|
| 124 |
}
|
| 125 |
},
|
| 126 |
"Topic": {
|
|
@@ -145,7 +150,8 @@
|
|
| 145 |
"Gemini-2.5 Flash": 87.2,
|
| 146 |
"Claude 4.0 Sonnet": 83.2,
|
| 147 |
"Claude 3.7 Sonnet": 84.9,
|
| 148 |
-
"Claude 4.5 Sonnet": 84.2
|
|
|
|
| 149 |
}
|
| 150 |
},
|
| 151 |
"RC": {
|
|
@@ -170,7 +176,8 @@
|
|
| 170 |
"Gemini-2.5 Flash": 42.2,
|
| 171 |
"Claude 4.0 Sonnet": 76.2,
|
| 172 |
"Claude 3.7 Sonnet": 65.1,
|
| 173 |
-
"Claude 4.5 Sonnet": 74.8
|
|
|
|
| 174 |
}
|
| 175 |
}
|
| 176 |
}
|
|
|
|
| 21 |
"Gemini-2.5 Flash": 69.9,
|
| 22 |
"Claude 4.0 Sonnet": 68.1,
|
| 23 |
"Claude 3.7 Sonnet": 59.8,
|
| 24 |
+
"Claude 4.5 Sonnet": 71.7,
|
| 25 |
+
"Gemini 2.5 Pro": 72.7
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
|
|
| 47 |
"Gemini-2.5 Flash": 87.9,
|
| 48 |
"Claude 4.0 Sonnet": 80.4,
|
| 49 |
"Claude 3.7 Sonnet": 73.4,
|
| 50 |
+
"Claude 4.5 Sonnet": 81.6,
|
| 51 |
+
"Gemini 2.5 Pro": 88.3
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
|
|
| 72 |
"Lugha-Llama 8B": 22.1,
|
| 73 |
"Gemini-2.5 Flash": 46.5,
|
| 74 |
"Claude 4.0 Sonnet": 46.0,
|
| 75 |
+
"Claude 3.7 Sonnet": 44.0,
|
| 76 |
+
"Gemini 2.5 Pro": 47.4
|
| 77 |
}
|
| 78 |
},
|
| 79 |
"MMLU": {
|
|
|
|
| 98 |
"Gemini-2.5 Flash": 67.7,
|
| 99 |
"Claude 4.0 Sonnet": 75.5,
|
| 100 |
"Claude 3.7 Sonnet": 66.7,
|
| 101 |
+
"Claude 4.5 Sonnet": 58.6,
|
| 102 |
+
"Gemini 2.5 Pro": 78.2
|
| 103 |
}
|
| 104 |
},
|
| 105 |
"Math": {
|
|
|
|
| 124 |
"Gemini-2.5 Flash": 70.6,
|
| 125 |
"Claude 4.0 Sonnet": 66.9,
|
| 126 |
"Claude 3.7 Sonnet": 35.2,
|
| 127 |
+
"Claude 4.5 Sonnet": 73.1,
|
| 128 |
+
"Gemini 2.5 Pro": 74.4
|
| 129 |
}
|
| 130 |
},
|
| 131 |
"Topic": {
|
|
|
|
| 150 |
"Gemini-2.5 Flash": 87.2,
|
| 151 |
"Claude 4.0 Sonnet": 83.2,
|
| 152 |
"Claude 3.7 Sonnet": 84.9,
|
| 153 |
+
"Claude 4.5 Sonnet": 84.2,
|
| 154 |
+
"Gemini 2.5 Pro": 88.1
|
| 155 |
}
|
| 156 |
},
|
| 157 |
"RC": {
|
|
|
|
| 176 |
"Gemini-2.5 Flash": 42.2,
|
| 177 |
"Claude 4.0 Sonnet": 76.2,
|
| 178 |
"Claude 3.7 Sonnet": 65.1,
|
| 179 |
+
"Claude 4.5 Sonnet": 74.8,
|
| 180 |
+
"Gemini 2.5 Pro": 76.7
|
| 181 |
}
|
| 182 |
}
|
| 183 |
}
|
data/leaderboard_json/lite_language_scores.json
CHANGED
|
@@ -265,5 +265,80 @@
|
|
| 265 |
"xho": 21.4,
|
| 266 |
"yor": 21.9,
|
| 267 |
"zul": 20.6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
}
|
| 269 |
}
|
|
|
|
| 265 |
"xho": 21.4,
|
| 266 |
"yor": 21.9,
|
| 267 |
"zul": 20.6
|
| 268 |
+
},
|
| 269 |
+
"Gemini-2.5 Flash": {
|
| 270 |
+
"amh": 71.2,
|
| 271 |
+
"hau": 67.1,
|
| 272 |
+
"ibo": 65.4,
|
| 273 |
+
"kin": 65.9,
|
| 274 |
+
"lin": 60.9,
|
| 275 |
+
"lug": 64.4,
|
| 276 |
+
"orm": 67.4,
|
| 277 |
+
"sna": 70.2,
|
| 278 |
+
"sot": 65.1,
|
| 279 |
+
"swa": 73.7,
|
| 280 |
+
"xho": 70.1,
|
| 281 |
+
"yor": 65.5,
|
| 282 |
+
"zul": 69.4
|
| 283 |
+
},
|
| 284 |
+
"Claude 4.0 Sonnet": {
|
| 285 |
+
"amh": 78.0,
|
| 286 |
+
"hau": 73.9,
|
| 287 |
+
"ibo": 71.8,
|
| 288 |
+
"kin": 69.0,
|
| 289 |
+
"lin": 62.9,
|
| 290 |
+
"lug": 62.5,
|
| 291 |
+
"orm": 65.6,
|
| 292 |
+
"sna": 71.3,
|
| 293 |
+
"sot": 70.2,
|
| 294 |
+
"swa": 82.0,
|
| 295 |
+
"xho": 74.2,
|
| 296 |
+
"yor": 67.8,
|
| 297 |
+
"zul": 72.4
|
| 298 |
+
},
|
| 299 |
+
"Claude 3.7 Sonnet": {
|
| 300 |
+
"amh": 66.0,
|
| 301 |
+
"hau": 60.9,
|
| 302 |
+
"ibo": 59.0,
|
| 303 |
+
"kin": 61.3,
|
| 304 |
+
"lin": 55.0,
|
| 305 |
+
"lug": 58.3,
|
| 306 |
+
"orm": 57.7,
|
| 307 |
+
"sna": 61.1,
|
| 308 |
+
"sot": 61.6,
|
| 309 |
+
"swa": 71.9,
|
| 310 |
+
"xho": 64.1,
|
| 311 |
+
"yor": 57.9,
|
| 312 |
+
"zul": 62.2
|
| 313 |
+
},
|
| 314 |
+
"Claude 4.5 Sonnet": {
|
| 315 |
+
"amh": 81.9,
|
| 316 |
+
"hau": 75.6,
|
| 317 |
+
"ibo": 74.1,
|
| 318 |
+
"kin": 71.1,
|
| 319 |
+
"lin": 62.9,
|
| 320 |
+
"lug": 68.5,
|
| 321 |
+
"orm": 72.3,
|
| 322 |
+
"sna": 75.1,
|
| 323 |
+
"sot": 72.7,
|
| 324 |
+
"swa": 82.9,
|
| 325 |
+
"xho": 76.6,
|
| 326 |
+
"yor": 75.0,
|
| 327 |
+
"zul": 73.4
|
| 328 |
+
},
|
| 329 |
+
"Gemini 2.5 Pro": {
|
| 330 |
+
"amh": 78.6,
|
| 331 |
+
"hau": 72.6,
|
| 332 |
+
"ibo": 73.9,
|
| 333 |
+
"kin": 74.7,
|
| 334 |
+
"lin": 68.4,
|
| 335 |
+
"lug": 72.2,
|
| 336 |
+
"orm": 75.3,
|
| 337 |
+
"sna": 78.6,
|
| 338 |
+
"sot": 72.9,
|
| 339 |
+
"swa": 82.5,
|
| 340 |
+
"xho": 78.3,
|
| 341 |
+
"yor": 71.8,
|
| 342 |
+
"zul": 76.7
|
| 343 |
}
|
| 344 |
}
|