Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update ZeroEval-main/result_dirs/zebra-grid.summary.json
Browse files
    	
        ZeroEval-main/result_dirs/zebra-grid.summary.json
    CHANGED
    
    | @@ -1,4 +1,21 @@ | |
| 1 | 
             
            [
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2 | 
             
              {
         | 
| 3 | 
             
                "Model": "o3-mini-2025-01-31-high",
         | 
| 4 | 
             
                "Mode": "greedy",
         | 
| @@ -50,6 +67,23 @@ | |
| 50 | 
             
                "N_Mode": "single",
         | 
| 51 | 
             
                "N_Size": 1
         | 
| 52 | 
             
              },
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 53 | 
             
              {
         | 
| 54 | 
             
                "Model": "deepseek-R1",
         | 
| 55 | 
             
                "Mode": "greedy",
         | 
|  | |
| 1 | 
             
            [
         | 
| 2 | 
            +
              {
         | 
| 3 | 
            +
                "Model": "grok-3-mini-fast-beta-high",
         | 
| 4 | 
            +
                "Mode": "greedy",
         | 
| 5 | 
            +
                "Puzzle Acc": "92.60",
         | 
| 6 | 
            +
                "Cell Acc": "94.63",
         | 
| 7 | 
            +
                "No answer": "1.00",
         | 
| 8 | 
            +
                "Easy Puzzle Acc": "98.93",
         | 
| 9 | 
            +
                "Hard Puzzle Acc": "90.14",
         | 
| 10 | 
            +
                "Small Puzzle Acc": "98.75",
         | 
| 11 | 
            +
                "Medium Puzzle Acc": "96.43",
         | 
| 12 | 
            +
                "Large Puzzle Acc": "93.50",
         | 
| 13 | 
            +
                "XL Puzzle Acc": "76.50",
         | 
| 14 | 
            +
                "Total Puzzles": 1000,
         | 
| 15 | 
            +
                "Reason Lens": "782.25",
         | 
| 16 | 
            +
                "N_Mode": "single",
         | 
| 17 | 
            +
                "N_Size": 1
         | 
| 18 | 
            +
              },
         | 
| 19 | 
             
              {
         | 
| 20 | 
             
                "Model": "o3-mini-2025-01-31-high",
         | 
| 21 | 
             
                "Mode": "greedy",
         | 
|  | |
| 67 | 
             
                "N_Mode": "single",
         | 
| 68 | 
             
                "N_Size": 1
         | 
| 69 | 
             
              },
         | 
| 70 | 
            +
              {
         | 
| 71 | 
            +
                "Model": "grok-3-mini-fast-beta-low",
         | 
| 72 | 
            +
                "Mode": "greedy",
         | 
| 73 | 
            +
                "Puzzle Acc": "80.70",
         | 
| 74 | 
            +
                "Cell Acc": "84.22",
         | 
| 75 | 
            +
                "No answer": "0.00",
         | 
| 76 | 
            +
                "Easy Puzzle Acc": "98.57",
         | 
| 77 | 
            +
                "Hard Puzzle Acc": "73.75",
         | 
| 78 | 
            +
                "Small Puzzle Acc": "98.75",
         | 
| 79 | 
            +
                "Medium Puzzle Acc": "96.43",
         | 
| 80 | 
            +
                "Large Puzzle Acc": "77.00",
         | 
| 81 | 
            +
                "XL Puzzle Acc": "33.50",
         | 
| 82 | 
            +
                "Total Puzzles": 1000,
         | 
| 83 | 
            +
                "Reason Lens": "874.09",
         | 
| 84 | 
            +
                "N_Mode": "single",
         | 
| 85 | 
            +
                "N_Size": 1
         | 
| 86 | 
            +
              },
         | 
| 87 | 
             
              {
         | 
| 88 | 
             
                "Model": "deepseek-R1",
         | 
| 89 | 
             
                "Mode": "greedy",
         | 
 
			

