Spaces:
Running
Running
Commit
·
35e2ca1
1
Parent(s):
702ff77
update dataloading
Browse files- README.md +5 -0
- app.py +8 -3
- src/utils.py +3 -0
README.md
CHANGED
|
@@ -11,3 +11,8 @@ license: apache-2.0
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
+
|
| 15 |
+
To develop this app, it can be run with:
|
| 16 |
+
```
|
| 17 |
+
gradio app.py
|
| 18 |
+
```
|
app.py
CHANGED
|
@@ -46,13 +46,18 @@ def avg_over_herm(dataframe):
|
|
| 46 |
subsets = ["alpacaeval", "mt-bench", "llmbar", "refusals", "hep"]
|
| 47 |
# for each subset, avg the columns that have the subset in the column name, then add a new column with subset name and avg
|
| 48 |
for subset in subsets:
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
| 50 |
new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
|
| 51 |
|
| 52 |
keep_columns = ["model", "average"] + subsets
|
| 53 |
new_df = new_df[keep_columns]
|
| 54 |
# replace average column with new average
|
| 55 |
-
new_df["average"] = np.round(np.nanmean(new_df[subsets].values, axis=1), 2)
|
|
|
|
|
|
|
| 56 |
return new_df
|
| 57 |
|
| 58 |
def expand_subsets(dataframe):
|
|
@@ -83,7 +88,7 @@ def random_sample(r: gr.Request, subset):
|
|
| 83 |
sample_index = np.random.randint(0, len(eval_set_filtered) - 1)
|
| 84 |
sample = eval_set_filtered[sample_index]
|
| 85 |
|
| 86 |
-
markdown_text = '\n\n'.join([f"**{key}
|
| 87 |
return markdown_text
|
| 88 |
|
| 89 |
subsets = eval_set.unique("subset")
|
|
|
|
| 46 |
subsets = ["alpacaeval", "mt-bench", "llmbar", "refusals", "hep"]
|
| 47 |
# for each subset, avg the columns that have the subset in the column name, then add a new column with subset name and avg
|
| 48 |
for subset in subsets:
|
| 49 |
+
if subset == "refusals":
|
| 50 |
+
subset_cols = ["refusals-dangerous", "refusals-offensive", "donotanswer","xstest"]
|
| 51 |
+
else:
|
| 52 |
+
subset_cols = [col for col in new_df.columns if subset in col]
|
| 53 |
new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
|
| 54 |
|
| 55 |
keep_columns = ["model", "average"] + subsets
|
| 56 |
new_df = new_df[keep_columns]
|
| 57 |
# replace average column with new average
|
| 58 |
+
new_df["average"] = np.round(np.nanmean(new_df[subsets].values, axis=1), 2)
|
| 59 |
+
# rename column "hep" to "hep (code)"
|
| 60 |
+
new_df = new_df.rename(columns={"hep": "hep (code)"})
|
| 61 |
return new_df
|
| 62 |
|
| 63 |
def expand_subsets(dataframe):
|
|
|
|
| 88 |
sample_index = np.random.randint(0, len(eval_set_filtered) - 1)
|
| 89 |
sample = eval_set_filtered[sample_index]
|
| 90 |
|
| 91 |
+
markdown_text = '\n\n'.join([f"**{key}**:\n{value}" for key, value in sample.items()])
|
| 92 |
return markdown_text
|
| 93 |
|
| 94 |
subsets = eval_set.unique("subset")
|
src/utils.py
CHANGED
|
@@ -36,6 +36,9 @@ def load_all_data(data_repo, subsubsets=False): # use HF api to pull the git
|
|
| 36 |
# remove chat_template comlumn
|
| 37 |
df = df.drop(columns=["chat_template"])
|
| 38 |
|
|
|
|
|
|
|
|
|
|
| 39 |
# move column "model" to the front
|
| 40 |
cols = list(df.columns)
|
| 41 |
cols.insert(0, cols.pop(cols.index('model')))
|
|
|
|
| 36 |
# remove chat_template comlumn
|
| 37 |
df = df.drop(columns=["chat_template"])
|
| 38 |
|
| 39 |
+
# sort columns alphabetically
|
| 40 |
+
df = df.reindex(sorted(df.columns), axis=1)
|
| 41 |
+
|
| 42 |
# move column "model" to the front
|
| 43 |
cols = list(df.columns)
|
| 44 |
cols.insert(0, cols.pop(cols.index('model')))
|