Add award icons for 5-shot and 0-shot models; shorten some table column names for clarity
Browse files- app.py +46 -12
- src/display/utils.py +1 -1
app.py
CHANGED
|
@@ -24,14 +24,14 @@ def line_chart(dataframe):
|
|
| 24 |
|
| 25 |
# Estrai valori x, y e labels per True e False
|
| 26 |
x_true = df_true['#Params (B)'].tolist()
|
| 27 |
-
y_true = df_true['Avg.
|
| 28 |
labels_true = [
|
| 29 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
| 30 |
for m in df_true['Model'].tolist()
|
| 31 |
]
|
| 32 |
|
| 33 |
x_false = df_false['#Params (B)'].tolist()
|
| 34 |
-
y_false = df_false['Avg.
|
| 35 |
labels_false = [
|
| 36 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
| 37 |
for m in df_false['Model'].tolist()
|
|
@@ -77,6 +77,7 @@ def line_chart(dataframe):
|
|
| 77 |
return fig
|
| 78 |
|
| 79 |
|
|
|
|
| 80 |
# Define task metadata (icons, names, descriptions)
|
| 81 |
TASK_METADATA_MULTIPLECHOICE = {
|
| 82 |
"TE": {"icon": "📊", "name": "Textual Entailment", "tooltip": ""},
|
|
@@ -108,18 +109,51 @@ def init_leaderboard(dataframe, default_selection=None, hidden_columns=None):
|
|
| 108 |
if dataframe is None or dataframe.empty:
|
| 109 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 110 |
|
| 111 |
-
sorted_dataframe = dataframe.sort_values(by="Avg.
|
| 112 |
|
| 113 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
| 114 |
sorted_dataframe["rank"] = sorted_dataframe.index + 1
|
| 115 |
|
| 116 |
-
#
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
field_list = fields(AutoEvalColumn)
|
| 125 |
|
|
@@ -268,8 +302,8 @@ with demo:
|
|
| 268 |
|
| 269 |
leaderboard = init_leaderboard(
|
| 270 |
LEADERBOARD_DF,
|
| 271 |
-
default_selection=['rank', 'FS', 'Model', "Avg.
|
| 272 |
-
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg.
|
| 273 |
)
|
| 274 |
|
| 275 |
with gr.TabItem("📈 Charts"):
|
|
|
|
| 24 |
|
| 25 |
# Estrai valori x, y e labels per True e False
|
| 26 |
x_true = df_true['#Params (B)'].tolist()
|
| 27 |
+
y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
|
| 28 |
labels_true = [
|
| 29 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
| 30 |
for m in df_true['Model'].tolist()
|
| 31 |
]
|
| 32 |
|
| 33 |
x_false = df_false['#Params (B)'].tolist()
|
| 34 |
+
y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
|
| 35 |
labels_false = [
|
| 36 |
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
| 37 |
for m in df_false['Model'].tolist()
|
|
|
|
| 77 |
return fig
|
| 78 |
|
| 79 |
|
| 80 |
+
|
| 81 |
# Define task metadata (icons, names, descriptions)
|
| 82 |
TASK_METADATA_MULTIPLECHOICE = {
|
| 83 |
"TE": {"icon": "📊", "name": "Textual Entailment", "tooltip": ""},
|
|
|
|
| 109 |
if dataframe is None or dataframe.empty:
|
| 110 |
raise ValueError("Leaderboard DataFrame is empty or None.")
|
| 111 |
|
| 112 |
+
sorted_dataframe = dataframe.sort_values(by="Avg. Comb. Perf. ⬆️", ascending=False)
|
| 113 |
|
| 114 |
sorted_dataframe = sorted_dataframe.reset_index(drop=True)
|
| 115 |
sorted_dataframe["rank"] = sorted_dataframe.index + 1
|
| 116 |
|
| 117 |
+
# Flag per sapere se la medaglia è già stata assegnata per categoria e tipo
|
| 118 |
+
large_medal_fs_assigned = False
|
| 119 |
+
medium_medal_fs_assigned = False
|
| 120 |
+
small_medal_fs_assigned = False
|
| 121 |
+
|
| 122 |
+
large_medal_0shot_assigned = False
|
| 123 |
+
medium_medal_0shot_assigned = False
|
| 124 |
+
small_medal_0shot_assigned = False
|
| 125 |
+
|
| 126 |
+
# Lista temporanea per salvare i nuovi valori della colonna Model
|
| 127 |
+
new_model_column = []
|
| 128 |
+
|
| 129 |
+
for _, row in sorted_dataframe.iterrows():
|
| 130 |
+
if row['IS_FS']: # 5-Few-Shot
|
| 131 |
+
if row["#Params (B)"] > 30 and not large_medal_fs_assigned:
|
| 132 |
+
new_model_column.append(f"{row['Model']} 7️⃣0️⃣🅱️🏆")
|
| 133 |
+
large_medal_fs_assigned = True
|
| 134 |
+
elif 10 < row["#Params (B)"] <= 30 and not medium_medal_fs_assigned:
|
| 135 |
+
new_model_column.append(f"{row['Model']} 3️⃣0️⃣🅱️🏆")
|
| 136 |
+
medium_medal_fs_assigned = True
|
| 137 |
+
elif row["#Params (B)"] <= 10 and not small_medal_fs_assigned:
|
| 138 |
+
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🏆")
|
| 139 |
+
small_medal_fs_assigned = True
|
| 140 |
+
else:
|
| 141 |
+
new_model_column.append(row["Model"])
|
| 142 |
+
else: # 0-Shot
|
| 143 |
+
if row["#Params (B)"] > 30 and not large_medal_0shot_assigned:
|
| 144 |
+
new_model_column.append(f"{row['Model']} 7️⃣0️⃣🅱️🎖️")
|
| 145 |
+
large_medal_0shot_assigned = True
|
| 146 |
+
elif 10 < row["#Params (B)"] <= 30 and not medium_medal_0shot_assigned:
|
| 147 |
+
new_model_column.append(f"{row['Model']} 3️⃣0️⃣🅱️🎖️")
|
| 148 |
+
medium_medal_0shot_assigned = True
|
| 149 |
+
elif row["#Params (B)"] <= 10 and not small_medal_0shot_assigned:
|
| 150 |
+
new_model_column.append(f"{row['Model']} 1️⃣0️⃣🅱️🎖️")
|
| 151 |
+
small_medal_0shot_assigned = True
|
| 152 |
+
else:
|
| 153 |
+
new_model_column.append(row["Model"])
|
| 154 |
+
|
| 155 |
+
# Aggiorna la colonna Model
|
| 156 |
+
sorted_dataframe["Model"] = new_model_column
|
| 157 |
|
| 158 |
field_list = fields(AutoEvalColumn)
|
| 159 |
|
|
|
|
| 302 |
|
| 303 |
leaderboard = init_leaderboard(
|
| 304 |
LEADERBOARD_DF,
|
| 305 |
+
default_selection=['rank', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"],
|
| 306 |
+
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['rank', 'FS', 'Model', "Avg. Comb. Perf. ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
| 307 |
)
|
| 308 |
|
| 309 |
with gr.TabItem("📈 Charts"):
|
src/display/utils.py
CHANGED
|
@@ -34,7 +34,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
| 34 |
#auto_eval_column_dict.append(["fewshot", ColumnContent, ColumnContent("Few-Shot", "str", True)])
|
| 35 |
|
| 36 |
#Scores
|
| 37 |
-
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg.
|
| 38 |
for task in Tasks:
|
| 39 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 40 |
|
|
|
|
| 34 |
#auto_eval_column_dict.append(["fewshot", ColumnContent, ColumnContent("Few-Shot", "str", True)])
|
| 35 |
|
| 36 |
#Scores
|
| 37 |
+
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg. Comb. Perf. ⬆️", "number", True)])
|
| 38 |
for task in Tasks:
|
| 39 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
| 40 |
|