Spaces:
Running
Running
Commit
Β·
0de05c0
1
Parent(s):
6ce351e
up
Browse files
app.py
CHANGED
|
@@ -202,10 +202,14 @@ def regex_table(dataframe, regex, filter_button):
|
|
| 202 |
# Join the list into a single regex pattern with '|' acting as OR
|
| 203 |
combined_regex = '|'.join(regex_list)
|
| 204 |
|
|
|
|
|
|
|
|
|
|
| 205 |
# if filter_button, remove all rows with "ai2" in the model name
|
|
|
|
| 206 |
if isinstance(filter_button, list) or isinstance(filter_button, str):
|
| 207 |
-
if "
|
| 208 |
-
|
| 209 |
if "Seq. Classifiers" not in filter_button:
|
| 210 |
dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
|
| 211 |
if "DPO" not in filter_button:
|
|
@@ -220,6 +224,13 @@ def regex_table(dataframe, regex, filter_button):
|
|
| 220 |
# replace column '' with count/rank
|
| 221 |
data[''] = np.arange(1, 1 + len(data))
|
| 222 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
# if Score exists, round to 2 decimals
|
| 224 |
if "Score" in data.columns:
|
| 225 |
data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
|
|
@@ -255,8 +266,8 @@ with gr.Blocks(css=custom_css) as app:
|
|
| 255 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
| 256 |
placeholder="Model Search (delimit with , )",
|
| 257 |
show_label=False)
|
| 258 |
-
model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "
|
| 259 |
-
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
| 260 |
label="Model Types",
|
| 261 |
show_label=False,
|
| 262 |
# info="Which model types to include.",
|
|
@@ -270,7 +281,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
| 270 |
visible=False,
|
| 271 |
)
|
| 272 |
rewardbench_table = gr.Dataframe(
|
| 273 |
-
regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers"]).values,
|
| 274 |
datatype=col_types_rewardbench_avg,
|
| 275 |
headers=rewardbench_data_avg.columns.tolist(),
|
| 276 |
elem_id="rewardbench_dataframe_avg",
|
|
@@ -280,7 +291,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
| 280 |
with gr.TabItem("π RewardBench - Detailed"):
|
| 281 |
with gr.Row():
|
| 282 |
search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
| 283 |
-
model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"
|
| 284 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
| 285 |
label="Model Types",
|
| 286 |
show_label=False,
|
|
@@ -320,7 +331,7 @@ with gr.Blocks(css=custom_css) as app:
|
|
| 320 |
with gr.TabItem("Prior Test Sets"):
|
| 321 |
with gr.Row():
|
| 322 |
search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
| 323 |
-
model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"
|
| 324 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
| 325 |
label="Model Types",
|
| 326 |
show_label=False,
|
|
|
|
| 202 |
# Join the list into a single regex pattern with '|' acting as OR
|
| 203 |
combined_regex = '|'.join(regex_list)
|
| 204 |
|
| 205 |
+
# remove internal ai2 data
|
| 206 |
+
dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
|
| 207 |
+
|
| 208 |
# if filter_button, remove all rows with "ai2" in the model name
|
| 209 |
+
update_scores = False
|
| 210 |
if isinstance(filter_button, list) or isinstance(filter_button, str):
|
| 211 |
+
if "Prior Sets" not in filter_button and 'Prior Sets (0.5 weight)' in dataframe.columns:
|
| 212 |
+
update_scores = True
|
| 213 |
if "Seq. Classifiers" not in filter_button:
|
| 214 |
dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
|
| 215 |
if "DPO" not in filter_button:
|
|
|
|
| 224 |
# replace column '' with count/rank
|
| 225 |
data[''] = np.arange(1, 1 + len(data))
|
| 226 |
|
| 227 |
+
# if update the score to not use prior sets, do so
|
| 228 |
+
if update_scores:
|
| 229 |
+
data["Score"] = (data["Chat"] + data["Chat Hard"] + data["Safety"] + data["Reasoning"]) / 4
|
| 230 |
+
data["Prior Sets (0.5 weight)"] = np.NaN
|
| 231 |
+
# sort array by Score column
|
| 232 |
+
data = data.sort_values(by='Score', ascending=False)
|
| 233 |
+
|
| 234 |
# if Score exists, round to 2 decimals
|
| 235 |
if "Score" in data.columns:
|
| 236 |
data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
|
|
|
|
| 266 |
search_1 = gr.Textbox(label="Model Search (delimit with , )",
|
| 267 |
placeholder="Model Search (delimit with , )",
|
| 268 |
show_label=False)
|
| 269 |
+
model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "Prior Sets"],
|
| 270 |
+
value=["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"],
|
| 271 |
label="Model Types",
|
| 272 |
show_label=False,
|
| 273 |
# info="Which model types to include.",
|
|
|
|
| 281 |
visible=False,
|
| 282 |
)
|
| 283 |
rewardbench_table = gr.Dataframe(
|
| 284 |
+
regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"]).values,
|
| 285 |
datatype=col_types_rewardbench_avg,
|
| 286 |
headers=rewardbench_data_avg.columns.tolist(),
|
| 287 |
elem_id="rewardbench_dataframe_avg",
|
|
|
|
| 291 |
with gr.TabItem("π RewardBench - Detailed"):
|
| 292 |
with gr.Row():
|
| 293 |
search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
| 294 |
+
model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
|
| 295 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
| 296 |
label="Model Types",
|
| 297 |
show_label=False,
|
|
|
|
| 331 |
with gr.TabItem("Prior Test Sets"):
|
| 332 |
with gr.Row():
|
| 333 |
search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
|
| 334 |
+
model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
|
| 335 |
value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
|
| 336 |
label="Model Types",
|
| 337 |
show_label=False,
|