Use official test keys
Browse files- functions.py +4 -2
    	
        functions.py
    CHANGED
    
    | @@ -21,12 +21,14 @@ The purpose of this PR is to add evaluation results from the Open LLM Leaderboar | |
| 21 |  | 
| 22 | 
             
            Please report any issues here: https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard/discussions"""
         | 
| 23 |  | 
|  | |
|  | |
| 24 | 
             
            KEY_IFEVAL = "IFEval"
         | 
| 25 | 
             
            KEY_BBH = "BBH"
         | 
| 26 | 
             
            KEY_MATH = "MATH Lvl 5"
         | 
| 27 | 
             
            KEY_GPQA = "GPQA"
         | 
| 28 | 
            -
            KEY_MUSR = " | 
| 29 | 
            -
            KEY_MMLU = "MMLU- | 
| 30 |  | 
| 31 | 
             
            def normalize_within_range(value, lower_bound=0, higher_bound=1):
         | 
| 32 | 
             
                return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100
         | 
|  | |
| 21 |  | 
| 22 | 
             
            Please report any issues here: https://huggingface.co/spaces/T145/open-llm-leaderboard-results-to-modelcard/discussions"""
         | 
| 23 |  | 
| 24 | 
            +
            # Keys are named after the backend keys
         | 
| 25 | 
            +
            # https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard/blob/main/backend/README.md#leaderboard
         | 
| 26 | 
             
            KEY_IFEVAL = "IFEval"
         | 
| 27 | 
             
            KEY_BBH = "BBH"
         | 
| 28 | 
             
            KEY_MATH = "MATH Lvl 5"
         | 
| 29 | 
             
            KEY_GPQA = "GPQA"
         | 
| 30 | 
            +
            KEY_MUSR = "MUSR"
         | 
| 31 | 
            +
            KEY_MMLU = "MMLU-PRO"
         | 
| 32 |  | 
| 33 | 
             
            def normalize_within_range(value, lower_bound=0, higher_bound=1):
         | 
| 34 | 
             
                return (np.clip(value - lower_bound, 0, None)) / (higher_bound - lower_bound) * 100
         | 
