Spaces:
Sleeping
Sleeping
categories rename
Browse files- app.py +26 -8
- logs/guardbench_20250424_132721_81730502.log +2 -0
- logs/guardbench_20250424_132735_5fe7accd.log +2 -0
- logs/guardbench_20250424_133606_bfae8d28.log +2 -0
- logs/guardbench_20250424_133744_d3ca5956.log +2 -0
- logs/guardbench_20250424_133847_cf6f7f0f.log +2 -0
- logs/guardbench_20250424_133952_920f4d61.log +2 -0
- logs/guardbench_20250424_134044_2df3fadc.log +17 -0
app.py
CHANGED
|
@@ -577,6 +577,22 @@ def update_visualization(selected_models, selected_category, selected_metric, ve
|
|
| 577 |
# Create Gradio app
|
| 578 |
demo = gr.Blocks(css=custom_css, theme=custom_theme)
|
| 579 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
with demo:
|
| 581 |
gr.HTML(TITLE)
|
| 582 |
# gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
@@ -628,12 +644,14 @@ with demo:
|
|
| 628 |
# Create tabs for each category
|
| 629 |
with gr.Tabs(elem_classes="category-tabs") as category_tabs:
|
| 630 |
# First tab for average metrics across all categories
|
| 631 |
-
with gr.TabItem("
|
| 632 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 633 |
|
| 634 |
-
# Create a tab for each category
|
| 635 |
for category in CATEGORIES:
|
| 636 |
-
|
|
|
|
|
|
|
| 637 |
category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
|
| 638 |
category_leaderboard = init_leaderboard(category_df)
|
| 639 |
|
|
@@ -752,12 +770,12 @@ with demo:
|
|
| 752 |
interactive=True
|
| 753 |
)
|
| 754 |
with gr.Column():
|
| 755 |
-
# Add Overall Performance to categories
|
| 756 |
-
|
| 757 |
category_selector = gr.Dropdown(
|
| 758 |
-
choices=
|
| 759 |
label="Select Category",
|
| 760 |
-
value=
|
| 761 |
interactive=True
|
| 762 |
)
|
| 763 |
metric_selector = gr.Dropdown(
|
|
@@ -772,7 +790,7 @@ with demo:
|
|
| 772 |
# Update visualization when any selector changes
|
| 773 |
for control in [viz_version_selector, model_selector, category_selector, metric_selector]:
|
| 774 |
control.change(
|
| 775 |
-
fn=update_visualization,
|
| 776 |
inputs=[model_selector, category_selector, metric_selector, viz_version_selector],
|
| 777 |
outputs=plot_output
|
| 778 |
)
|
|
|
|
| 577 |
# Create Gradio app
|
| 578 |
demo = gr.Blocks(css=custom_css, theme=custom_theme)
|
| 579 |
|
| 580 |
+
# Mapping from original category names to display names
|
| 581 |
+
CATEGORY_DISPLAY_MAP = {
|
| 582 |
+
"Criminal, Violent, and Terrorist Activity": "Crime & Violence",
|
| 583 |
+
"Manipulation, Deception, and Misinformation": "Misinformation",
|
| 584 |
+
"Creative Content Involving Illicit Themes": "Illicit Creative",
|
| 585 |
+
"Sexual Content and Violence": "Sexual Content",
|
| 586 |
+
"Political Corruption and Legal Evasion": "Corruption & Legal Evasion",
|
| 587 |
+
"Labor Exploitation and Human Trafficking": "Labor Exploitation",
|
| 588 |
+
"Environmental and Industrial Harm": "Environmental & Industrial Harm",
|
| 589 |
+
"Animal Cruelty and Exploitation": "Animal Harm",
|
| 590 |
+
"Self–Harm and Suicidal Ideation": "Self-Harm",
|
| 591 |
+
"Safe Prompts": "Safe Prompts"
|
| 592 |
+
}
|
| 593 |
+
# Create reverse mapping for lookups
|
| 594 |
+
CATEGORY_REVERSE_MAP = {v: k for k, v in CATEGORY_DISPLAY_MAP.items()}
|
| 595 |
+
|
| 596 |
with demo:
|
| 597 |
gr.HTML(TITLE)
|
| 598 |
# gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
|
| 644 |
# Create tabs for each category
|
| 645 |
with gr.Tabs(elem_classes="category-tabs") as category_tabs:
|
| 646 |
# First tab for average metrics across all categories
|
| 647 |
+
with gr.TabItem("All Results", elem_id="overall-tab"):
|
| 648 |
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 649 |
|
| 650 |
+
# Create a tab for each category using display names
|
| 651 |
for category in CATEGORIES:
|
| 652 |
+
display_name = CATEGORY_DISPLAY_MAP.get(category, category)
|
| 653 |
+
elem_id = f"category-{display_name.lower().replace(' ', '-').replace('&', 'and')}-tab"
|
| 654 |
+
with gr.TabItem(display_name, elem_id=elem_id):
|
| 655 |
category_df = get_category_leaderboard_df(category, version=CURRENT_VERSION)
|
| 656 |
category_leaderboard = init_leaderboard(category_df)
|
| 657 |
|
|
|
|
| 770 |
interactive=True
|
| 771 |
)
|
| 772 |
with gr.Column():
|
| 773 |
+
# Add Overall Performance to categories, use display names
|
| 774 |
+
viz_categories_display = ["All Results"] + [CATEGORY_DISPLAY_MAP.get(cat, cat) for cat in CATEGORIES]
|
| 775 |
category_selector = gr.Dropdown(
|
| 776 |
+
choices=viz_categories_display,
|
| 777 |
label="Select Category",
|
| 778 |
+
value=viz_categories_display[0],
|
| 779 |
interactive=True
|
| 780 |
)
|
| 781 |
metric_selector = gr.Dropdown(
|
|
|
|
| 790 |
# Update visualization when any selector changes
|
| 791 |
for control in [viz_version_selector, model_selector, category_selector, metric_selector]:
|
| 792 |
control.change(
|
| 793 |
+
fn=lambda sm, sc, s_metric, v: update_visualization(sm, CATEGORY_REVERSE_MAP.get(sc, sc), s_metric, v),
|
| 794 |
inputs=[model_selector, category_selector, metric_selector, viz_version_selector],
|
| 795 |
outputs=plot_output
|
| 796 |
)
|
logs/guardbench_20250424_132721_81730502.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:27:22,432 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:27:22,600 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_132735_5fe7accd.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:27:35,986 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:27:36,096 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133606_bfae8d28.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:36:07,557 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:36:07,754 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133744_d3ca5956.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:37:44,999 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:37:45,167 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133847_cf6f7f0f.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:38:48,713 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:38:48,956 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_133952_920f4d61.log
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:39:53,041 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:39:53,311 - __main__ - INFO - Loaded leaderboard with 0 entries
|
logs/guardbench_20250424_134044_2df3fadc.log
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-04-24 13:40:44,934 - __main__ - INFO - Initializing leaderboard data...
|
| 2 |
+
2025-04-24 13:40:45,088 - __main__ - INFO - Loaded leaderboard with 0 entries
|
| 3 |
+
2025-04-24 13:40:45,155 - __main__ - INFO - Available columns in LEADERBOARD_DF: ['model_name', 'model_type', 'guard_model_type', 'integral_score', 'macro_accuracy', 'macro_recall', 'micro_avg_error_ratio', 'micro_avg_runtime_ms', 'total_evals_count']
|
| 4 |
+
2025-04-24 13:40:45,158 - __main__ - WARNING - Initializing empty leaderboard
|
| 5 |
+
2025-04-24 13:40:45,672 - __main__ - WARNING - Initializing empty leaderboard
|
| 6 |
+
2025-04-24 13:40:45,758 - __main__ - WARNING - Initializing empty leaderboard
|
| 7 |
+
2025-04-24 13:40:45,862 - __main__ - WARNING - Initializing empty leaderboard
|
| 8 |
+
2025-04-24 13:40:45,950 - __main__ - WARNING - Initializing empty leaderboard
|
| 9 |
+
2025-04-24 13:40:46,035 - __main__ - WARNING - Initializing empty leaderboard
|
| 10 |
+
2025-04-24 13:40:46,122 - __main__ - WARNING - Initializing empty leaderboard
|
| 11 |
+
2025-04-24 13:40:46,389 - __main__ - WARNING - Initializing empty leaderboard
|
| 12 |
+
2025-04-24 13:40:46,473 - __main__ - WARNING - Initializing empty leaderboard
|
| 13 |
+
2025-04-24 13:40:46,593 - __main__ - WARNING - Initializing empty leaderboard
|
| 14 |
+
2025-04-24 13:40:46,686 - __main__ - WARNING - Initializing empty leaderboard
|
| 15 |
+
2025-04-24 13:40:46,869 - apscheduler.scheduler - INFO - Adding job tentatively -- it will be properly scheduled when the scheduler starts
|
| 16 |
+
2025-04-24 13:40:46,869 - apscheduler.scheduler - INFO - Added job "refresh_data" to job store "default"
|
| 17 |
+
2025-04-24 13:40:46,870 - apscheduler.scheduler - INFO - Scheduler started
|