Spaces:
Running
Running
zebra logic bench
Browse files
ZeroEval-main/result_dirs/zebra-grid.summary.json
CHANGED
|
@@ -285,6 +285,17 @@
|
|
| 285 |
"Total Puzzles": 1000,
|
| 286 |
"Reason Lens": "1216.40"
|
| 287 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 288 |
{
|
| 289 |
"Model": "gpt-3.5-turbo-0125",
|
| 290 |
"Mode": "greedy",
|
|
|
|
| 285 |
"Total Puzzles": 1000,
|
| 286 |
"Reason Lens": "1216.40"
|
| 287 |
},
|
| 288 |
+
{
|
| 289 |
+
"Model": "Meta-Llama-3-8B-Instruct",
|
| 290 |
+
"Mode": "sampling",
|
| 291 |
+
"Puzzle Acc": "11.00",
|
| 292 |
+
"Cell Acc": "26.11",
|
| 293 |
+
"No answer": "22.30",
|
| 294 |
+
"Easy Puzzle Acc": "36.79",
|
| 295 |
+
"Hard Puzzle Acc": "0.97",
|
| 296 |
+
"Total Puzzles": 1000,
|
| 297 |
+
"Reason Lens": "1282.40"
|
| 298 |
+
},
|
| 299 |
{
|
| 300 |
"Model": "gpt-3.5-turbo-0125",
|
| 301 |
"Mode": "greedy",
|
zebra_banner.png
ADDED
|