Spaces:
Running
Running
Commit
·
bb95637
1
Parent(s):
61c1fca
up
Browse files- src/constants.py +3 -2
src/constants.py
CHANGED
|
@@ -31,7 +31,7 @@ example_counts = {
|
|
| 31 |
"mt-bench-easy": 28,
|
| 32 |
"mt-bench-med": 40,
|
| 33 |
"mt-bench-hard": 37,
|
| 34 |
-
"math-prm": 984, # actual length 447, upweighting to be equal to code
|
| 35 |
"refusals-dangerous": 100,
|
| 36 |
"refusals-offensive": 100,
|
| 37 |
"llmbar-natural": 100,
|
|
@@ -54,5 +54,6 @@ subset_mapping = {
|
|
| 54 |
"Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
|
| 55 |
"Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
|
| 56 |
"Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
|
| 57 |
-
"Reasoning": ["math-prm",
|
|
|
|
| 58 |
}
|
|
|
|
| 31 |
"mt-bench-easy": 28,
|
| 32 |
"mt-bench-med": 40,
|
| 33 |
"mt-bench-hard": 37,
|
| 34 |
+
# "math-prm": 984, # actual length 447, upweighting to be equal to code
|
| 35 |
"refusals-dangerous": 100,
|
| 36 |
"refusals-offensive": 100,
|
| 37 |
"llmbar-natural": 100,
|
|
|
|
| 54 |
"Chat": ["alpacaeval-easy", "alpacaeval-length", "alpacaeval-hard", "mt-bench-easy", "mt-bench-med"],
|
| 55 |
"Chat Hard": ["mt-bench-hard", "llmbar-natural", "llmbar-adver-neighbor", "llmbar-adver-GPTInst", "llmbar-adver-GPTOut", "llmbar-adver-manual"],
|
| 56 |
"Safety": ["refusals-dangerous", "refusals-offensive", "xstest-should-refuse", "xstest-should-respond", "donotanswer"],
|
| 57 |
+
"Reasoning": [#"math-prm",
|
| 58 |
+
"hep-cpp", "hep-go", "hep-java", "hep-js", "hep-python", "hep-rust"]
|
| 59 |
}
|