Spaces:
Sleeping
Sleeping
lowercase models
Browse files- app.py +24 -49
- src/display/utils.py +2 -2
app.py
CHANGED
|
@@ -225,6 +225,11 @@ def init_leaderboard(dataframe, visible_columns=None):
|
|
| 225 |
dataframe = pd.DataFrame(columns=columns)
|
| 226 |
logger.warning("Initializing empty leaderboard")
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
# print("\n\n", "dataframe", dataframe, "--------------------------------\n\n")
|
| 229 |
|
| 230 |
# Determine which columns to display
|
|
@@ -588,36 +593,21 @@ def create_performance_plot(
|
|
| 588 |
if df.empty:
|
| 589 |
return go.Figure()
|
| 590 |
|
| 591 |
-
#
|
|
|
|
|
|
|
|
|
|
| 592 |
df = df[df["model_name"].isin(selected_models)]
|
| 593 |
-
|
| 594 |
-
# Get the relevant metric columns
|
| 595 |
metric_cols = [col for col in df.columns if metric in col]
|
| 596 |
-
|
| 597 |
-
# Create figure
|
| 598 |
fig = go.Figure()
|
| 599 |
-
|
| 600 |
-
# Custom colors for different models
|
| 601 |
-
colors = [
|
| 602 |
-
"#8FCCCC",
|
| 603 |
-
"#C2A4B6",
|
| 604 |
-
"#98B4A6",
|
| 605 |
-
"#B68F7C",
|
| 606 |
-
] # Pale Cyan, Pale Pink, Pale Green, Pale Orange
|
| 607 |
-
|
| 608 |
-
# Add traces for each model
|
| 609 |
for idx, model in enumerate(selected_models):
|
| 610 |
model_data = df[df["model_name"] == model]
|
| 611 |
if not model_data.empty:
|
| 612 |
values = model_data[metric_cols].values[0].tolist()
|
| 613 |
-
# Add the first value again at the end to complete the polygon
|
| 614 |
values = values + [values[0]]
|
| 615 |
-
|
| 616 |
-
# Clean up test type names
|
| 617 |
categories = [col.replace(f"_{metric}", "") for col in metric_cols]
|
| 618 |
-
# Add the first category again at the end to complete the polygon
|
| 619 |
categories = categories + [categories[0]]
|
| 620 |
-
|
| 621 |
fig.add_trace(
|
| 622 |
go.Scatterpolar(
|
| 623 |
r=values,
|
|
@@ -627,8 +617,6 @@ def create_performance_plot(
|
|
| 627 |
fill="toself",
|
| 628 |
)
|
| 629 |
)
|
| 630 |
-
|
| 631 |
-
# Update layout with all settings at once
|
| 632 |
fig.update_layout(
|
| 633 |
paper_bgcolor="#000000",
|
| 634 |
plot_bgcolor="#000000",
|
|
@@ -663,7 +651,6 @@ def create_performance_plot(
|
|
| 663 |
font={"color": "#ffffff"},
|
| 664 |
),
|
| 665 |
)
|
| 666 |
-
|
| 667 |
return fig
|
| 668 |
|
| 669 |
|
|
@@ -674,7 +661,7 @@ def update_model_choices(version):
|
|
| 674 |
df = get_leaderboard_df(version=version)
|
| 675 |
if df.empty:
|
| 676 |
return []
|
| 677 |
-
return sorted(df["model_name"].unique().tolist())
|
| 678 |
|
| 679 |
|
| 680 |
def update_visualization(selected_models, selected_category, selected_metric, version):
|
|
@@ -744,7 +731,7 @@ with demo:
|
|
| 744 |
)
|
| 745 |
model_type_filter = gr.Dropdown(
|
| 746 |
choices=[
|
| 747 |
-
t.to_str("
|
| 748 |
],
|
| 749 |
label="Access Type",
|
| 750 |
multiselect=True,
|
|
@@ -981,15 +968,10 @@ with demo:
|
|
| 981 |
df = get_leaderboard_df(version=version)
|
| 982 |
if df.empty:
|
| 983 |
return []
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
[
|
| 987 |
-
|
| 988 |
-
for _, row in df.drop_duplicates(
|
| 989 |
-
subset=["model_name", "mode"]
|
| 990 |
-
).iterrows()
|
| 991 |
-
]
|
| 992 |
-
)
|
| 993 |
|
| 994 |
model_mode_selector = gr.Dropdown(
|
| 995 |
choices=get_model_mode_choices(CURRENT_VERSION),
|
|
@@ -1032,27 +1014,23 @@ with demo:
|
|
| 1032 |
df = (
|
| 1033 |
get_leaderboard_df(version=version)
|
| 1034 |
if selected_category == "All Results"
|
| 1035 |
-
else get_category_leaderboard_df(
|
| 1036 |
-
selected_category, version=version
|
| 1037 |
-
)
|
| 1038 |
)
|
| 1039 |
if df.empty:
|
| 1040 |
return go.Figure()
|
| 1041 |
-
|
|
|
|
| 1042 |
selected_pairs = [s.rsplit(" [", 1) for s in selected_model_modes]
|
| 1043 |
selected_pairs = [
|
| 1044 |
-
(name.strip(), mode.strip("] "))
|
| 1045 |
for name, mode in selected_pairs
|
| 1046 |
]
|
| 1047 |
mask = df.apply(
|
| 1048 |
-
lambda row: (row["model_name"], str(row["mode"]))
|
| 1049 |
-
in selected_pairs,
|
| 1050 |
axis=1,
|
| 1051 |
)
|
| 1052 |
filtered_df = df[mask]
|
| 1053 |
-
metric_cols = [
|
| 1054 |
-
col for col in filtered_df.columns if selected_metric in col
|
| 1055 |
-
]
|
| 1056 |
fig = go.Figure()
|
| 1057 |
colors = ["#8FCCCC", "#C2A4B6", "#98B4A6", "#B68F7C"]
|
| 1058 |
for idx, (model_name, mode) in enumerate(selected_pairs):
|
|
@@ -1063,10 +1041,7 @@ with demo:
|
|
| 1063 |
if not model_data.empty:
|
| 1064 |
values = model_data[metric_cols].values[0].tolist()
|
| 1065 |
values = values + [values[0]]
|
| 1066 |
-
categories = [
|
| 1067 |
-
col.replace(f"_{selected_metric}", "")
|
| 1068 |
-
for col in metric_cols
|
| 1069 |
-
]
|
| 1070 |
categories = categories + [categories[0]]
|
| 1071 |
fig.add_trace(
|
| 1072 |
go.Scatterpolar(
|
|
@@ -1175,7 +1150,7 @@ with demo:
|
|
| 1175 |
)
|
| 1176 |
model_type = gr.Dropdown(
|
| 1177 |
choices=[
|
| 1178 |
-
t.to_str("
|
| 1179 |
for t in ModelType
|
| 1180 |
if t != ModelType.Unknown
|
| 1181 |
],
|
|
|
|
| 225 |
dataframe = pd.DataFrame(columns=columns)
|
| 226 |
logger.warning("Initializing empty leaderboard")
|
| 227 |
|
| 228 |
+
# Lowercase model_name for display
|
| 229 |
+
if "model_name" in dataframe.columns:
|
| 230 |
+
dataframe = dataframe.copy()
|
| 231 |
+
dataframe["model_name"] = dataframe["model_name"].str.lower()
|
| 232 |
+
|
| 233 |
# print("\n\n", "dataframe", dataframe, "--------------------------------\n\n")
|
| 234 |
|
| 235 |
# Determine which columns to display
|
|
|
|
| 593 |
if df.empty:
|
| 594 |
return go.Figure()
|
| 595 |
|
| 596 |
+
# Lowercase model_name in df and selected_models
|
| 597 |
+
df = df.copy()
|
| 598 |
+
df["model_name"] = df["model_name"].str.lower()
|
| 599 |
+
selected_models = [m.lower() for m in selected_models]
|
| 600 |
df = df[df["model_name"].isin(selected_models)]
|
|
|
|
|
|
|
| 601 |
metric_cols = [col for col in df.columns if metric in col]
|
|
|
|
|
|
|
| 602 |
fig = go.Figure()
|
| 603 |
+
colors = ["#8FCCCC", "#C2A4B6", "#98B4A6", "#B68F7C"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
for idx, model in enumerate(selected_models):
|
| 605 |
model_data = df[df["model_name"] == model]
|
| 606 |
if not model_data.empty:
|
| 607 |
values = model_data[metric_cols].values[0].tolist()
|
|
|
|
| 608 |
values = values + [values[0]]
|
|
|
|
|
|
|
| 609 |
categories = [col.replace(f"_{metric}", "") for col in metric_cols]
|
|
|
|
| 610 |
categories = categories + [categories[0]]
|
|
|
|
| 611 |
fig.add_trace(
|
| 612 |
go.Scatterpolar(
|
| 613 |
r=values,
|
|
|
|
| 617 |
fill="toself",
|
| 618 |
)
|
| 619 |
)
|
|
|
|
|
|
|
| 620 |
fig.update_layout(
|
| 621 |
paper_bgcolor="#000000",
|
| 622 |
plot_bgcolor="#000000",
|
|
|
|
| 651 |
font={"color": "#ffffff"},
|
| 652 |
),
|
| 653 |
)
|
|
|
|
| 654 |
return fig
|
| 655 |
|
| 656 |
|
|
|
|
| 661 |
df = get_leaderboard_df(version=version)
|
| 662 |
if df.empty:
|
| 663 |
return []
|
| 664 |
+
return sorted(df["model_name"].str.lower().unique().tolist())
|
| 665 |
|
| 666 |
|
| 667 |
def update_visualization(selected_models, selected_category, selected_metric, version):
|
|
|
|
| 731 |
)
|
| 732 |
model_type_filter = gr.Dropdown(
|
| 733 |
choices=[
|
| 734 |
+
t.to_str("-") for t in ModelType if t != ModelType.Unknown
|
| 735 |
],
|
| 736 |
label="Access Type",
|
| 737 |
multiselect=True,
|
|
|
|
| 968 |
df = get_leaderboard_df(version=version)
|
| 969 |
if df.empty:
|
| 970 |
return []
|
| 971 |
+
return sorted([
|
| 972 |
+
f"{str(row['model_name']).lower()} [{row['mode']}]"
|
| 973 |
+
for _, row in df.drop_duplicates(subset=["model_name", "mode"]).iterrows()
|
| 974 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 975 |
|
| 976 |
model_mode_selector = gr.Dropdown(
|
| 977 |
choices=get_model_mode_choices(CURRENT_VERSION),
|
|
|
|
| 1014 |
df = (
|
| 1015 |
get_leaderboard_df(version=version)
|
| 1016 |
if selected_category == "All Results"
|
| 1017 |
+
else get_category_leaderboard_df(selected_category, version=version)
|
|
|
|
|
|
|
| 1018 |
)
|
| 1019 |
if df.empty:
|
| 1020 |
return go.Figure()
|
| 1021 |
+
df = df.copy()
|
| 1022 |
+
df["model_name"] = df["model_name"].str.lower()
|
| 1023 |
selected_pairs = [s.rsplit(" [", 1) for s in selected_model_modes]
|
| 1024 |
selected_pairs = [
|
| 1025 |
+
(name.strip().lower(), mode.strip("] "))
|
| 1026 |
for name, mode in selected_pairs
|
| 1027 |
]
|
| 1028 |
mask = df.apply(
|
| 1029 |
+
lambda row: (row["model_name"], str(row["mode"])) in selected_pairs,
|
|
|
|
| 1030 |
axis=1,
|
| 1031 |
)
|
| 1032 |
filtered_df = df[mask]
|
| 1033 |
+
metric_cols = [col for col in filtered_df.columns if selected_metric in col]
|
|
|
|
|
|
|
| 1034 |
fig = go.Figure()
|
| 1035 |
colors = ["#8FCCCC", "#C2A4B6", "#98B4A6", "#B68F7C"]
|
| 1036 |
for idx, (model_name, mode) in enumerate(selected_pairs):
|
|
|
|
| 1041 |
if not model_data.empty:
|
| 1042 |
values = model_data[metric_cols].values[0].tolist()
|
| 1043 |
values = values + [values[0]]
|
| 1044 |
+
categories = [col.replace(f"_{selected_metric}", "") for col in metric_cols]
|
|
|
|
|
|
|
|
|
|
| 1045 |
categories = categories + [categories[0]]
|
| 1046 |
fig.add_trace(
|
| 1047 |
go.Scatterpolar(
|
|
|
|
| 1150 |
)
|
| 1151 |
model_type = gr.Dropdown(
|
| 1152 |
choices=[
|
| 1153 |
+
t.to_str("-")
|
| 1154 |
for t in ModelType
|
| 1155 |
if t != ModelType.Unknown
|
| 1156 |
],
|
src/display/utils.py
CHANGED
|
@@ -24,7 +24,7 @@ class ModelType(Enum):
|
|
| 24 |
ClosedSource = auto()
|
| 25 |
API = auto()
|
| 26 |
|
| 27 |
-
def to_str(self, separator: str = "
|
| 28 |
"""Convert enum to string with separator."""
|
| 29 |
if self == ModelType.Unknown:
|
| 30 |
return "Unknown"
|
|
@@ -44,7 +44,7 @@ class GuardModelType(str, Enum):
|
|
| 44 |
OPENAI_MODERATION = "openai_moderation"
|
| 45 |
LLM_REGEXP = "llm_regexp"
|
| 46 |
LLM_SO = "llm_so"
|
| 47 |
-
|
| 48 |
|
| 49 |
def __str__(self):
|
| 50 |
"""String representation of the guard model type."""
|
|
|
|
| 24 |
ClosedSource = auto()
|
| 25 |
API = auto()
|
| 26 |
|
| 27 |
+
def to_str(self, separator: str = "-") -> str:
|
| 28 |
"""Convert enum to string with separator."""
|
| 29 |
if self == ModelType.Unknown:
|
| 30 |
return "Unknown"
|
|
|
|
| 44 |
OPENAI_MODERATION = "openai_moderation"
|
| 45 |
LLM_REGEXP = "llm_regexp"
|
| 46 |
LLM_SO = "llm_so"
|
| 47 |
+
WHITECIRCLE_GUARD = "whitecircle_guard"
|
| 48 |
|
| 49 |
def __str__(self):
|
| 50 |
"""String representation of the guard model type."""
|