Spaces:

allenai
/

reward-bench

Running

natolambert commited on Feb 8, 2024

Commit

35e2ca1

1 Parent(s): 702ff77

update dataloading

Files changed (3) hide show

README.md CHANGED Viewed

@@ -11,3 +11,8 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+To develop this app, it can be run with:
+```
+gradio app.py
+```

app.py CHANGED Viewed

@@ -46,13 +46,18 @@ def avg_over_herm(dataframe):
     subsets = ["alpacaeval", "mt-bench", "llmbar", "refusals", "hep"]
     # for each subset, avg the columns that have the subset in the column name, then add a new column with subset name and avg
     for subset in subsets:
-        subset_cols = [col for col in new_df.columns if subset in col]
         new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
     keep_columns = ["model", "average"] + subsets
     new_df = new_df[keep_columns]
     # replace average column with new average
-    new_df["average"] = np.round(np.nanmean(new_df[subsets].values, axis=1), 2)
     return new_df
 def expand_subsets(dataframe):
@@ -83,7 +88,7 @@ def random_sample(r: gr.Request, subset):
         sample_index = np.random.randint(0, len(eval_set_filtered) - 1)
         sample = eval_set_filtered[sample_index]
-    markdown_text = '\n\n'.join([f"**{key}**: {value}" for key, value in sample.items()])
     return markdown_text
 subsets = eval_set.unique("subset")

     subsets = ["alpacaeval", "mt-bench", "llmbar", "refusals", "hep"]
     # for each subset, avg the columns that have the subset in the column name, then add a new column with subset name and avg
     for subset in subsets:
+        if subset == "refusals":
+            subset_cols = ["refusals-dangerous", "refusals-offensive", "donotanswer","xstest"]
+        else:
+            subset_cols = [col for col in new_df.columns if subset in col]
         new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
     keep_columns = ["model", "average"] + subsets
     new_df = new_df[keep_columns]
     # replace average column with new average
+    new_df["average"] = np.round(np.nanmean(new_df[subsets].values, axis=1), 2)
+    # rename column "hep" to "hep (code)"
+    new_df = new_df.rename(columns={"hep": "hep (code)"})
     return new_df
 def expand_subsets(dataframe):
         sample_index = np.random.randint(0, len(eval_set_filtered) - 1)
         sample = eval_set_filtered[sample_index]
+    markdown_text = '\n\n'.join([f"**{key}**:\n{value}" for key, value in sample.items()])
     return markdown_text
 subsets = eval_set.unique("subset")

src/utils.py CHANGED Viewed

@@ -36,6 +36,9 @@ def load_all_data(data_repo, subsubsets=False):    # use HF api to pull the git
     # remove chat_template comlumn
     df = df.drop(columns=["chat_template"])
     # move column "model" to the front
     cols = list(df.columns)
     cols.insert(0, cols.pop(cols.index('model')))

     # remove chat_template comlumn
     df = df.drop(columns=["chat_template"])
+    # sort columns alphabetically
+    df = df.reindex(sorted(df.columns), axis=1)
     # move column "model" to the front
     cols = list(df.columns)
     cols.insert(0, cols.pop(cols.index('model')))