Spaces:
Runtime error
Runtime error
Yotam-Perlitz
commited on
Commit
·
f32be22
1
Parent(s):
363d8ae
remove HFv2 BBH Raw
Browse filesSigned-off-by: Yotam-Perlitz <y.perlitz@ibm.com>
app.py
CHANGED
|
@@ -75,8 +75,8 @@ with st.expander("Leaderboard configurations (defaults are great BTW)", icon="
|
|
| 75 |
n_models_taken_list = st.slider(
|
| 76 |
label="Select number of models to use",
|
| 77 |
min_value=3,
|
| 78 |
-
max_value=
|
| 79 |
-
value=
|
| 80 |
)
|
| 81 |
|
| 82 |
n_models_taken_list = [n_models_taken_list]
|
|
@@ -140,7 +140,7 @@ def run_load(
|
|
| 140 |
corr_types=["kendall"],
|
| 141 |
n_exps=10,
|
| 142 |
my_benchmark=Benchmark(),
|
| 143 |
-
use_caching=
|
| 144 |
):
|
| 145 |
# Create a hash of the inputs to generate a unique cache file for each set of inputs
|
| 146 |
input_str = (
|
|
@@ -182,13 +182,16 @@ def run_load(
|
|
| 182 |
allbench = Benchmark()
|
| 183 |
allbench.load_local_catalog()
|
| 184 |
|
|
|
|
|
|
|
|
|
|
| 185 |
allbench.add_aggregate(
|
| 186 |
new_col_name="aggregate",
|
| 187 |
agg_source_name="aggregate",
|
| 188 |
scenario_whitelist=aggregate_scenario_whitelist,
|
| 189 |
min_scenario_for_models_to_appear_in_agg=1
|
| 190 |
if len(aggregate_scenario_whitelist) == 1
|
| 191 |
-
else 2,
|
| 192 |
)
|
| 193 |
|
| 194 |
allbench.extend(my_benchmark)
|
|
|
|
| 75 |
n_models_taken_list = st.slider(
|
| 76 |
label="Select number of models to use",
|
| 77 |
min_value=3,
|
| 78 |
+
max_value=15,
|
| 79 |
+
value=8,
|
| 80 |
)
|
| 81 |
|
| 82 |
n_models_taken_list = [n_models_taken_list]
|
|
|
|
| 140 |
corr_types=["kendall"],
|
| 141 |
n_exps=10,
|
| 142 |
my_benchmark=Benchmark(),
|
| 143 |
+
use_caching=True,
|
| 144 |
):
|
| 145 |
# Create a hash of the inputs to generate a unique cache file for each set of inputs
|
| 146 |
input_str = (
|
|
|
|
| 182 |
allbench = Benchmark()
|
| 183 |
allbench.load_local_catalog()
|
| 184 |
|
| 185 |
+
scenarios_to_drop = ["HFv2 BBH Raw"]
|
| 186 |
+
allbench.df = allbench.df.query("scenario not in @scenarios_to_drop")
|
| 187 |
+
|
| 188 |
allbench.add_aggregate(
|
| 189 |
new_col_name="aggregate",
|
| 190 |
agg_source_name="aggregate",
|
| 191 |
scenario_whitelist=aggregate_scenario_whitelist,
|
| 192 |
min_scenario_for_models_to_appear_in_agg=1
|
| 193 |
if len(aggregate_scenario_whitelist) == 1
|
| 194 |
+
else len(aggregate_scenario_whitelist) // 2,
|
| 195 |
)
|
| 196 |
|
| 197 |
allbench.extend(my_benchmark)
|