benchmark

Running

App Files Files Community

cdminix commited on Aug 2, 2024

Commit

4bc7be5

verified ·

1 Parent(s): adc647c

add option to exclude environment from mean

Browse files

Files changed (1) hide show

app.py +15 -1

app.py CHANGED Viewed

@@ -28,6 +28,15 @@ def filter_dfs(tags, lb):
         lb = lb[lb["Tags"].apply(lambda x: any(tag in x for tag in tags))]
     return lb
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
@@ -129,7 +138,8 @@ agg_df = BenchmarkSuite.aggregate_df(results_df)
 agg_df = agg_df.pivot(index="dataset", columns="benchmark_category", values="score")
 agg_df.rename(columns={"OVERALL": "General"}, inplace=True)
 agg_df.columns = [x.capitalize() for x in agg_df.columns]
-agg_df["Mean"] = agg_df.mean(axis=1)
 # make sure mean is the first column
 agg_df = agg_df[["Mean"] + [col for col in agg_df.columns if col != "Mean"]]
 for col in agg_df.columns:
@@ -212,6 +222,9 @@ app = gr.Blocks(css=custom_css, title="TTS Benchmark Leaderboard")
 with app:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 TTSDB Scores", elem_id="llm-benchmark-tab-table", id=0):
             tags = gr.Dropdown(
                 TAGS,
                 value=[],
@@ -221,6 +234,7 @@ with app:
             )
             leaderboard = init_leaderboard(f_a_df)
             tags.change(filter_dfs, [tags, leaderboard], [leaderboard])
         with gr.TabItem("🏅 Individual Benchmarks", elem_id="llm-benchmark-tab-table", id=1):
             tags = gr.Dropdown(
                 TAGS,

         lb = lb[lb["Tags"].apply(lambda x: any(tag in x for tag in tags))]
     return lb
+def change_mean(env, lb):
+    global f_b_df, f_a_df
+    lb = f_a_df.copy()
+    if env:
+        mean_cols = [col for col in lb.columns if str(col) not in ["Mean", "Environment", "Model", "Tags"]]
+    else:
+        mean_cols = [col for col in lb.columns if str(col) not in ["Mean", "Model", "Tags"]]
+    lb["Mean"] = lb[mean_cols].mean(axis=1)
+    return lb
 def restart_space():
     API.restart_space(repo_id=REPO_ID)
 agg_df = agg_df.pivot(index="dataset", columns="benchmark_category", values="score")
 agg_df.rename(columns={"OVERALL": "General"}, inplace=True)
 agg_df.columns = [x.capitalize() for x in agg_df.columns]
+mean_cols = [col for col in agg_df.columns if str(col) not in ["Mean", "Environment", "Model", "Tags"]]
+agg_df["Mean"] = agg_df[mean_cols].mean(axis=1)
 # make sure mean is the first column
 agg_df = agg_df[["Mean"] + [col for col in agg_df.columns if col != "Mean"]]
 for col in agg_df.columns:
 with app:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 TTSDB Scores", elem_id="llm-benchmark-tab-table", id=0):
+            with gr.Group():
+                env = gr.Checkbox(value=True, label="Exclude environment from mean.")
+                gr.Markdown("**Environment** measures how well the system can reproduce noise in the training data. This doesn't correlate with human judgements for 'naturalness'")
             tags = gr.Dropdown(
                 TAGS,
                 value=[],
             )
             leaderboard = init_leaderboard(f_a_df)
             tags.change(filter_dfs, [tags, leaderboard], [leaderboard])
+            env.change(change_mean, [env, leaderboard], [leaderboard])
         with gr.TabItem("🏅 Individual Benchmarks", elem_id="llm-benchmark-tab-table", id=1):
             tags = gr.Dropdown(
                 TAGS,