Commit
·
d4d998a
0
Parent(s):
init
Browse files- .env.template +6 -0
- .gitignore +45 -0
- README.md +82 -0
- app.py +281 -0
- requirements.txt +7 -0
- src/about.py +75 -0
- src/display/css_html_js.py +46 -0
- src/display/formatting.py +71 -0
- src/display/utils.py +177 -0
- src/envs.py +27 -0
- src/leaderboard/processor.py +180 -0
- src/populate.py +211 -0
- src/submission/submit.py +105 -0
.env.template
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
HF_TOKEN="your_huggingface_write_token"
|
| 2 |
+
OWNER="your_huggingface_username_or_org"
|
| 3 |
+
RESULTS_DATASET_ID="your_username/guardbench-results"
|
| 4 |
+
SUBMITTER_TOKEN="your_secret_submission_token"
|
| 5 |
+
ADMIN_USERNAME="admin"
|
| 6 |
+
ADMIN_PASSWORD="password" # Change this!
|
.gitignore
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
build/
|
| 9 |
+
develop-eggs/
|
| 10 |
+
dist/
|
| 11 |
+
downloads/
|
| 12 |
+
eggs/
|
| 13 |
+
.eggs/
|
| 14 |
+
lib/
|
| 15 |
+
lib64/
|
| 16 |
+
parts/
|
| 17 |
+
sdist/
|
| 18 |
+
var/
|
| 19 |
+
.venv/
|
| 20 |
+
*.egg-info/
|
| 21 |
+
.installed.cfg
|
| 22 |
+
*.egg
|
| 23 |
+
|
| 24 |
+
# Environment variables
|
| 25 |
+
.env
|
| 26 |
+
|
| 27 |
+
# Virtual Environment
|
| 28 |
+
venv/
|
| 29 |
+
ENV/
|
| 30 |
+
|
| 31 |
+
# IDE
|
| 32 |
+
.idea/
|
| 33 |
+
.vscode/
|
| 34 |
+
*.swp
|
| 35 |
+
*.swo
|
| 36 |
+
|
| 37 |
+
# OS
|
| 38 |
+
.DS_Store
|
| 39 |
+
Thumbs.db
|
| 40 |
+
|
| 41 |
+
# Hugging Face cache
|
| 42 |
+
eval-queue/
|
| 43 |
+
eval-results/
|
| 44 |
+
eval-queue-bk/
|
| 45 |
+
eval-results-bk/
|
README.md
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# GuardBench Leaderboard
|
| 2 |
+
|
| 3 |
+
A HuggingFace leaderboard for the GuardBench project that allows users to submit evaluation results and view the performance of different models on safety guardrails.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- Display model performance across multiple safety categories
|
| 8 |
+
- Accept JSONL submissions with evaluation results
|
| 9 |
+
- Store submissions in a HuggingFace dataset
|
| 10 |
+
- Secure submission process with token authentication
|
| 11 |
+
- Automatic data refresh from HuggingFace
|
| 12 |
+
|
| 13 |
+
## Setup
|
| 14 |
+
|
| 15 |
+
1. Clone this repository
|
| 16 |
+
2. Install dependencies:
|
| 17 |
+
```
|
| 18 |
+
pip install -r requirements.txt
|
| 19 |
+
```
|
| 20 |
+
3. Create a `.env` file based on the `.env.template`:
|
| 21 |
+
```
|
| 22 |
+
cp .env.template .env
|
| 23 |
+
```
|
| 24 |
+
4. Edit the `.env` file with your HuggingFace credentials and settings
|
| 25 |
+
5. Run the application:
|
| 26 |
+
```
|
| 27 |
+
python app.py
|
| 28 |
+
```
|
| 29 |
+
|
| 30 |
+
## Submission Format
|
| 31 |
+
|
| 32 |
+
Submissions should be in JSONL format, with each line containing a JSON object with the following structure:
|
| 33 |
+
|
| 34 |
+
```json
|
| 35 |
+
{
|
| 36 |
+
"model_name": "model-name",
|
| 37 |
+
"per_category_metrics": {
|
| 38 |
+
"Category Name": {
|
| 39 |
+
"default_prompts": {
|
| 40 |
+
"f1_binary": 0.95,
|
| 41 |
+
"recall_binary": 0.93,
|
| 42 |
+
"precision_binary": 1.0,
|
| 43 |
+
"error_ratio": 0.0,
|
| 44 |
+
"avg_runtime_ms": 3000
|
| 45 |
+
},
|
| 46 |
+
"jailbreaked_prompts": { ... },
|
| 47 |
+
"default_answers": { ... },
|
| 48 |
+
"jailbreaked_answers": { ... }
|
| 49 |
+
},
|
| 50 |
+
...
|
| 51 |
+
},
|
| 52 |
+
"avg_metrics": {
|
| 53 |
+
"default_prompts": {
|
| 54 |
+
"f1_binary": 0.97,
|
| 55 |
+
"recall_binary": 0.95,
|
| 56 |
+
"precision_binary": 1.0,
|
| 57 |
+
"error_ratio": 0.0,
|
| 58 |
+
"avg_runtime_ms": 3000
|
| 59 |
+
},
|
| 60 |
+
"jailbreaked_prompts": { ... },
|
| 61 |
+
"default_answers": { ... },
|
| 62 |
+
"jailbreaked_answers": { ... }
|
| 63 |
+
}
|
| 64 |
+
}
|
| 65 |
+
```
|
| 66 |
+
|
| 67 |
+
## Environment Variables
|
| 68 |
+
|
| 69 |
+
- `HF_TOKEN`: Your HuggingFace write token
|
| 70 |
+
- `OWNER`: Your HuggingFace username or organization
|
| 71 |
+
- `RESULTS_DATASET_ID`: The ID of the dataset to store results (e.g., "username/guardbench-results")
|
| 72 |
+
- `SUBMITTER_TOKEN`: A secret token required for submissions
|
| 73 |
+
- `ADMIN_USERNAME`: Username for admin access to the leaderboard
|
| 74 |
+
- `ADMIN_PASSWORD`: Password for admin access to the leaderboard
|
| 75 |
+
|
| 76 |
+
## Deployment
|
| 77 |
+
|
| 78 |
+
This application can be deployed as a HuggingFace Space for public access. Follow the HuggingFace Spaces documentation for deployment instructions.
|
| 79 |
+
|
| 80 |
+
## License
|
| 81 |
+
|
| 82 |
+
MIT
|
app.py
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GuardBench Leaderboard Application
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
import tempfile
|
| 8 |
+
import logging
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
| 11 |
+
import pandas as pd
|
| 12 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
| 13 |
+
|
| 14 |
+
from src.about import (
|
| 15 |
+
CITATION_BUTTON_LABEL,
|
| 16 |
+
CITATION_BUTTON_TEXT,
|
| 17 |
+
EVALUATION_QUEUE_TEXT,
|
| 18 |
+
INTRODUCTION_TEXT,
|
| 19 |
+
LLM_BENCHMARKS_TEXT,
|
| 20 |
+
TITLE,
|
| 21 |
+
)
|
| 22 |
+
from src.display.css_html_js import custom_css
|
| 23 |
+
from src.display.utils import (
|
| 24 |
+
GUARDBENCH_COLUMN,
|
| 25 |
+
DISPLAY_COLS,
|
| 26 |
+
METRIC_COLS,
|
| 27 |
+
HIDDEN_COLS,
|
| 28 |
+
NEVER_HIDDEN_COLS,
|
| 29 |
+
CATEGORIES,
|
| 30 |
+
TEST_TYPES,
|
| 31 |
+
ModelType,
|
| 32 |
+
Precision,
|
| 33 |
+
WeightType
|
| 34 |
+
)
|
| 35 |
+
from src.display.formatting import styled_message, styled_error, styled_warning
|
| 36 |
+
from src.envs import (
|
| 37 |
+
ADMIN_USERNAME,
|
| 38 |
+
ADMIN_PASSWORD,
|
| 39 |
+
RESULTS_DATASET_ID,
|
| 40 |
+
SUBMITTER_TOKEN,
|
| 41 |
+
TOKEN,
|
| 42 |
+
DATA_PATH
|
| 43 |
+
)
|
| 44 |
+
from src.populate import get_leaderboard_df, download_leaderboard_data, get_category_leaderboard_df
|
| 45 |
+
from src.submission.submit import process_submission
|
| 46 |
+
|
| 47 |
+
# Configure logging
|
| 48 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 49 |
+
logger = logging.getLogger(__name__)
|
| 50 |
+
|
| 51 |
+
# Ensure data directory exists
|
| 52 |
+
os.makedirs(DATA_PATH, exist_ok=True)
|
| 53 |
+
|
| 54 |
+
# Initialize leaderboard data
|
| 55 |
+
try:
|
| 56 |
+
logger.info("Initializing leaderboard data...")
|
| 57 |
+
LEADERBOARD_DF = get_leaderboard_df()
|
| 58 |
+
logger.info(f"Loaded leaderboard with {len(LEADERBOARD_DF)} entries")
|
| 59 |
+
except Exception as e:
|
| 60 |
+
logger.error(f"Error loading leaderboard data: {e}")
|
| 61 |
+
LEADERBOARD_DF = pd.DataFrame()
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def init_leaderboard(dataframe):
|
| 65 |
+
"""
|
| 66 |
+
Initialize the leaderboard component.
|
| 67 |
+
"""
|
| 68 |
+
if dataframe is None or dataframe.empty:
|
| 69 |
+
# Create an empty dataframe with the right columns
|
| 70 |
+
columns = [getattr(GUARDBENCH_COLUMN, col).name for col in DISPLAY_COLS]
|
| 71 |
+
dataframe = pd.DataFrame(columns=columns)
|
| 72 |
+
logger.warning("Initializing empty leaderboard")
|
| 73 |
+
|
| 74 |
+
return Leaderboard(
|
| 75 |
+
value=dataframe,
|
| 76 |
+
datatype=[getattr(GUARDBENCH_COLUMN, col).type for col in DISPLAY_COLS],
|
| 77 |
+
select_columns=SelectColumns(
|
| 78 |
+
default_selection=[getattr(GUARDBENCH_COLUMN, col).name for col in DISPLAY_COLS],
|
| 79 |
+
cant_deselect=[getattr(GUARDBENCH_COLUMN, col).name for col in NEVER_HIDDEN_COLS],
|
| 80 |
+
label="Select Columns to Display:",
|
| 81 |
+
),
|
| 82 |
+
search_columns=[GUARDBENCH_COLUMN.model.name],
|
| 83 |
+
hide_columns=[getattr(GUARDBENCH_COLUMN, col).name for col in HIDDEN_COLS],
|
| 84 |
+
filter_columns=[
|
| 85 |
+
ColumnFilter(GUARDBENCH_COLUMN.model_type.name, type="checkboxgroup", label="Model types"),
|
| 86 |
+
],
|
| 87 |
+
interactive=False,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def submit_results(
|
| 92 |
+
model_name: str,
|
| 93 |
+
base_model: str,
|
| 94 |
+
revision: str,
|
| 95 |
+
precision: str,
|
| 96 |
+
weight_type: str,
|
| 97 |
+
model_type: str,
|
| 98 |
+
submission_file: tempfile._TemporaryFileWrapper
|
| 99 |
+
):
|
| 100 |
+
"""
|
| 101 |
+
Handle submission of results with model metadata.
|
| 102 |
+
"""
|
| 103 |
+
if submission_file is None:
|
| 104 |
+
return styled_error("No submission file provided")
|
| 105 |
+
|
| 106 |
+
if not model_name:
|
| 107 |
+
return styled_error("Model name is required")
|
| 108 |
+
|
| 109 |
+
if not model_type:
|
| 110 |
+
return styled_error("Please select a model type")
|
| 111 |
+
|
| 112 |
+
file_path = submission_file.name
|
| 113 |
+
logger.info(f"Received submission for model {model_name}: {file_path}")
|
| 114 |
+
|
| 115 |
+
# Add metadata to the submission
|
| 116 |
+
metadata = {
|
| 117 |
+
"model_name": model_name,
|
| 118 |
+
"base_model": base_model,
|
| 119 |
+
"revision": revision if revision else "main",
|
| 120 |
+
"precision": precision,
|
| 121 |
+
"weight_type": weight_type,
|
| 122 |
+
"model_type": model_type
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
# Process the submission
|
| 126 |
+
result = process_submission(file_path, metadata)
|
| 127 |
+
|
| 128 |
+
# Refresh the leaderboard data
|
| 129 |
+
global LEADERBOARD_DF
|
| 130 |
+
try:
|
| 131 |
+
logger.info("Refreshing leaderboard data after submission...")
|
| 132 |
+
LEADERBOARD_DF = get_leaderboard_df()
|
| 133 |
+
logger.info("Refreshed leaderboard data after submission")
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logger.error(f"Error refreshing leaderboard data: {e}")
|
| 136 |
+
|
| 137 |
+
return result
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def refresh_data():
|
| 141 |
+
"""
|
| 142 |
+
Refresh the leaderboard data from HuggingFace.
|
| 143 |
+
"""
|
| 144 |
+
global LEADERBOARD_DF
|
| 145 |
+
try:
|
| 146 |
+
logger.info("Performing scheduled refresh of leaderboard data...")
|
| 147 |
+
LEADERBOARD_DF = get_leaderboard_df()
|
| 148 |
+
logger.info("Scheduled refresh of leaderboard data completed")
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.error(f"Error in scheduled refresh: {e}")
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# Create Gradio app
|
| 154 |
+
demo = gr.Blocks(css=custom_css)
|
| 155 |
+
|
| 156 |
+
with demo:
|
| 157 |
+
gr.HTML(TITLE)
|
| 158 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
| 159 |
+
|
| 160 |
+
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 161 |
+
with gr.TabItem("🏅 Leaderboard", elem_id="guardbench-leaderboard-tab", id=0):
|
| 162 |
+
refresh_button = gr.Button("Refresh Leaderboard")
|
| 163 |
+
|
| 164 |
+
# Create tabs for each category
|
| 165 |
+
with gr.Tabs(elem_classes="category-tabs") as category_tabs:
|
| 166 |
+
# First tab for average metrics across all categories
|
| 167 |
+
with gr.TabItem("📊 Overall Performance", elem_id="overall-tab"):
|
| 168 |
+
leaderboard = init_leaderboard(LEADERBOARD_DF)
|
| 169 |
+
|
| 170 |
+
# Create a tab for each category
|
| 171 |
+
for category in CATEGORIES:
|
| 172 |
+
with gr.TabItem(f"{category}", elem_id=f"category-{category.lower().replace(' ', '-')}-tab"):
|
| 173 |
+
category_df = get_category_leaderboard_df(category)
|
| 174 |
+
category_leaderboard = init_leaderboard(category_df)
|
| 175 |
+
|
| 176 |
+
# Refresh button functionality
|
| 177 |
+
refresh_button.click(
|
| 178 |
+
fn=lambda: [
|
| 179 |
+
init_leaderboard(get_leaderboard_df()),
|
| 180 |
+
*[init_leaderboard(get_category_leaderboard_df(category)) for category in CATEGORIES]
|
| 181 |
+
],
|
| 182 |
+
inputs=[],
|
| 183 |
+
outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
with gr.TabItem("📝 About", elem_id="guardbench-about-tab", id=1):
|
| 187 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 188 |
+
|
| 189 |
+
with gr.TabItem("🚀 Submit", elem_id="guardbench-submit-tab", id=2):
|
| 190 |
+
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
| 191 |
+
|
| 192 |
+
with gr.Row():
|
| 193 |
+
gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
|
| 194 |
+
|
| 195 |
+
with gr.Row():
|
| 196 |
+
with gr.Column():
|
| 197 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
| 198 |
+
revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
|
| 199 |
+
model_type = gr.Dropdown(
|
| 200 |
+
choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
|
| 201 |
+
label="Model type",
|
| 202 |
+
multiselect=False,
|
| 203 |
+
value=None,
|
| 204 |
+
interactive=True,
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
with gr.Column():
|
| 208 |
+
precision = gr.Dropdown(
|
| 209 |
+
choices=[i.value.name for i in Precision if i != Precision.Unknown],
|
| 210 |
+
label="Precision",
|
| 211 |
+
multiselect=False,
|
| 212 |
+
value="float16",
|
| 213 |
+
interactive=True,
|
| 214 |
+
)
|
| 215 |
+
weight_type = gr.Dropdown(
|
| 216 |
+
choices=[i.value.name for i in WeightType],
|
| 217 |
+
label="Weights type",
|
| 218 |
+
multiselect=False,
|
| 219 |
+
value="Original",
|
| 220 |
+
interactive=True,
|
| 221 |
+
)
|
| 222 |
+
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
|
| 223 |
+
|
| 224 |
+
with gr.Row():
|
| 225 |
+
file_input = gr.File(
|
| 226 |
+
label="Upload JSONL Results File",
|
| 227 |
+
file_types=[".jsonl"]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
submit_button = gr.Button("Submit Results")
|
| 231 |
+
result_output = gr.Markdown()
|
| 232 |
+
|
| 233 |
+
submit_button.click(
|
| 234 |
+
fn=submit_results,
|
| 235 |
+
inputs=[
|
| 236 |
+
model_name_textbox,
|
| 237 |
+
base_model_name_textbox,
|
| 238 |
+
revision_name_textbox,
|
| 239 |
+
precision,
|
| 240 |
+
weight_type,
|
| 241 |
+
model_type,
|
| 242 |
+
file_input
|
| 243 |
+
],
|
| 244 |
+
outputs=result_output
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
with gr.Row():
|
| 248 |
+
with gr.Accordion("📙 Citation", open=False):
|
| 249 |
+
citation_button = gr.Textbox(
|
| 250 |
+
value=CITATION_BUTTON_TEXT,
|
| 251 |
+
label=CITATION_BUTTON_LABEL,
|
| 252 |
+
lines=10,
|
| 253 |
+
elem_id="citation-button",
|
| 254 |
+
show_copy_button=True,
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
with gr.Accordion("ℹ️ Dataset Information", open=False):
|
| 258 |
+
dataset_info = gr.Markdown(f"""
|
| 259 |
+
## Dataset Information
|
| 260 |
+
|
| 261 |
+
Results are stored in the HuggingFace dataset: [{RESULTS_DATASET_ID}](https://huggingface.co/datasets/{RESULTS_DATASET_ID})
|
| 262 |
+
|
| 263 |
+
Last updated: {pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
|
| 264 |
+
""")
|
| 265 |
+
|
| 266 |
+
# Set up scheduler to refresh data periodically
|
| 267 |
+
scheduler = BackgroundScheduler()
|
| 268 |
+
scheduler.add_job(refresh_data, 'interval', minutes=30)
|
| 269 |
+
scheduler.start()
|
| 270 |
+
|
| 271 |
+
# Launch the app
|
| 272 |
+
if __name__ == "__main__":
|
| 273 |
+
# Set up authentication if credentials are provided
|
| 274 |
+
if not ADMIN_USERNAME or not ADMIN_PASSWORD:
|
| 275 |
+
logger.warning("Admin username or password not set. Running without authentication.")
|
| 276 |
+
auth = None
|
| 277 |
+
else:
|
| 278 |
+
auth = (ADMIN_USERNAME, ADMIN_PASSWORD)
|
| 279 |
+
|
| 280 |
+
# Launch the app
|
| 281 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, auth=auth)
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=3.50.2
|
| 2 |
+
huggingface_hub>=0.19.0
|
| 3 |
+
datasets>=2.16.0
|
| 4 |
+
pandas>=2.0.0
|
| 5 |
+
python-dotenv>=1.0.0
|
| 6 |
+
apscheduler>=3.10.1
|
| 7 |
+
gradio-leaderboard
|
src/about.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Text content for the GuardBench Leaderboard.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
TITLE = """
|
| 6 |
+
<div style="text-align: center; margin-bottom: 1rem">
|
| 7 |
+
<h1>GuardBench Leaderboard</h1>
|
| 8 |
+
</div>
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
INTRODUCTION_TEXT = """
|
| 12 |
+
## Introduction
|
| 13 |
+
|
| 14 |
+
GuardBench is a comprehensive benchmark for evaluating the safety guardrails of large language models (LLMs).
|
| 15 |
+
This leaderboard tracks model performance across various safety categories, including harmful content detection,
|
| 16 |
+
jailbreak resistance, and more.
|
| 17 |
+
|
| 18 |
+
Models are evaluated on their ability to properly refuse harmful requests and detect problematic content
|
| 19 |
+
across multiple categories and test scenarios.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
LLM_BENCHMARKS_TEXT = """
|
| 23 |
+
## GuardBench Evaluation Methodology
|
| 24 |
+
|
| 25 |
+
GuardBench evaluates models across multiple safety categories:
|
| 26 |
+
|
| 27 |
+
- **Criminal, Violent, and Terrorist Activity**
|
| 28 |
+
- **Manipulation, Deception, and Misinformation**
|
| 29 |
+
- **Creative Content Involving Illicit Themes**
|
| 30 |
+
- **Sexual Content and Violence**
|
| 31 |
+
- **Political Corruption and Legal Evasion**
|
| 32 |
+
- **Labor Exploitation and Human Trafficking**
|
| 33 |
+
- **Environmental and Industrial Harm**
|
| 34 |
+
- **Animal Cruelty and Exploitation**
|
| 35 |
+
- **Self-Harm and Suicidal Ideation**
|
| 36 |
+
- **Safe Prompts**
|
| 37 |
+
|
| 38 |
+
Each category is tested with:
|
| 39 |
+
- **Default Prompts**: Standard harmful requests
|
| 40 |
+
- **Jailbreaked Prompts**: Adversarial attempts to bypass safety measures
|
| 41 |
+
- **Default Answers**: Evaluation of harmful content in responses
|
| 42 |
+
- **Jailbreaked Answers**: Evaluation of harmful content in responses to jailbreak attempts
|
| 43 |
+
|
| 44 |
+
Metrics include:
|
| 45 |
+
- **F1 Binary**: Harmonic mean of precision and recall
|
| 46 |
+
- **Recall Binary**: Proportion of actual harmful content correctly identified
|
| 47 |
+
- **Precision Binary**: Proportion of identified harmful content that was actually harmful
|
| 48 |
+
- **Error Ratio**: Rate of errors during evaluation
|
| 49 |
+
- **Average Runtime**: Average processing time in milliseconds
|
| 50 |
+
"""
|
| 51 |
+
|
| 52 |
+
EVALUATION_QUEUE_TEXT = """
|
| 53 |
+
## Submission Process
|
| 54 |
+
|
| 55 |
+
To submit your model results to the GuardBench leaderboard:
|
| 56 |
+
|
| 57 |
+
1. Evaluate your model using the [GuardBench framework](https://github.com/huggingface/guard-bench)
|
| 58 |
+
2. Format your results as a JSONL file according to our schema
|
| 59 |
+
3. Submit your results using the submission form with your authorized token
|
| 60 |
+
|
| 61 |
+
Results will be processed and added to the leaderboard once validated.
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
CITATION_BUTTON_LABEL = "Cite GuardBench"
|
| 65 |
+
|
| 66 |
+
CITATION_BUTTON_TEXT = """
|
| 67 |
+
@misc{guardbench2023,
|
| 68 |
+
author = {GuardBench Team},
|
| 69 |
+
title = {GuardBench: Comprehensive Benchmark for LLM Safety Guardrails},
|
| 70 |
+
year = {2023},
|
| 71 |
+
publisher = {GitHub},
|
| 72 |
+
journal = {GitHub repository},
|
| 73 |
+
howpublished = {\\url{https://github.com/huggingface/guard-bench}}
|
| 74 |
+
}
|
| 75 |
+
"""
|
src/display/css_html_js.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CSS and styling for the GuardBench Leaderboard.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
custom_css = """
|
| 6 |
+
.markdown-text {
|
| 7 |
+
font-size: 16px !important;
|
| 8 |
+
text-align: justify !important;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
.tab-buttons button.selected {
|
| 12 |
+
border-color: #2196F3 !important;
|
| 13 |
+
background: #E3F2FD !important;
|
| 14 |
+
color: #2196F3 !important;
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
#citation-button textarea {
|
| 18 |
+
font-family: monospace !important;
|
| 19 |
+
}
|
| 20 |
+
|
| 21 |
+
.leaderboard-container {
|
| 22 |
+
margin-top: 20px;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.category-header {
|
| 26 |
+
font-weight: bold;
|
| 27 |
+
background-color: #f5f5f5;
|
| 28 |
+
padding: 10px;
|
| 29 |
+
margin-top: 15px;
|
| 30 |
+
border-radius: 5px;
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
.metric-name {
|
| 34 |
+
font-weight: bold;
|
| 35 |
+
color: #2196F3;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
.model-name {
|
| 39 |
+
font-weight: bold;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.model-link:hover {
|
| 43 |
+
text-decoration: underline;
|
| 44 |
+
color: #1976D2;
|
| 45 |
+
}
|
| 46 |
+
"""
|
src/display/formatting.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Formatting utilities for the GuardBench Leaderboard.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def make_clickable_model(model_name: str) -> str:
|
| 10 |
+
"""
|
| 11 |
+
Create a clickable link for a model name.
|
| 12 |
+
"""
|
| 13 |
+
return f'<a href="https://huggingface.co/{model_name}" target="_blank">{model_name}</a>'
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def has_no_nan_values(df: pd.DataFrame, columns: list) -> pd.Series:
|
| 17 |
+
"""
|
| 18 |
+
Check if a row has no NaN values in the specified columns.
|
| 19 |
+
"""
|
| 20 |
+
return ~df[columns].isna().any(axis=1)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def format_percentage(value: float) -> str:
|
| 24 |
+
"""
|
| 25 |
+
Format a value as a percentage.
|
| 26 |
+
"""
|
| 27 |
+
if pd.isna(value):
|
| 28 |
+
return "N/A"
|
| 29 |
+
return f"{value * 100:.2f}%"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def format_number(value: float, precision: int = 2) -> str:
|
| 33 |
+
"""
|
| 34 |
+
Format a number with specified precision.
|
| 35 |
+
"""
|
| 36 |
+
if pd.isna(value):
|
| 37 |
+
return "N/A"
|
| 38 |
+
return f"{value:.{precision}f}"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def styled_message(message: str) -> str:
|
| 42 |
+
"""
|
| 43 |
+
Format a success message with styling.
|
| 44 |
+
"""
|
| 45 |
+
return f"""
|
| 46 |
+
<div style="padding: 10px; border-radius: 5px; background-color: #e6f7e6; color: #2e7d32; border: 1px solid #2e7d32;">
|
| 47 |
+
✅ {message}
|
| 48 |
+
</div>
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def styled_warning(message: str) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Format a warning message with styling.
|
| 55 |
+
"""
|
| 56 |
+
return f"""
|
| 57 |
+
<div style="padding: 10px; border-radius: 5px; background-color: #fff8e1; color: #ff8f00; border: 1px solid #ff8f00;">
|
| 58 |
+
⚠️ {message}
|
| 59 |
+
</div>
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def styled_error(message: str) -> str:
|
| 64 |
+
"""
|
| 65 |
+
Format an error message with styling.
|
| 66 |
+
"""
|
| 67 |
+
return f"""
|
| 68 |
+
<div style="padding: 10px; border-radius: 5px; background-color: #ffebee; color: #c62828; border: 1px solid #c62828;">
|
| 69 |
+
❌ {message}
|
| 70 |
+
</div>
|
| 71 |
+
"""
|
src/display/utils.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility classes and functions for the GuardBench Leaderboard display.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from dataclasses import dataclass, field, fields
|
| 6 |
+
from enum import Enum, auto
|
| 7 |
+
from typing import List, Optional
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class ModelType(Enum):
|
| 11 |
+
"""Model types for the leaderboard."""
|
| 12 |
+
Unknown = auto()
|
| 13 |
+
OpenSource = auto()
|
| 14 |
+
ClosedSource = auto()
|
| 15 |
+
API = auto()
|
| 16 |
+
|
| 17 |
+
def to_str(self, separator: str = " ") -> str:
|
| 18 |
+
"""Convert enum to string with separator."""
|
| 19 |
+
if self == ModelType.Unknown:
|
| 20 |
+
return "Unknown"
|
| 21 |
+
elif self == ModelType.OpenSource:
|
| 22 |
+
return f"Open{separator}Source"
|
| 23 |
+
elif self == ModelType.ClosedSource:
|
| 24 |
+
return f"Closed{separator}Source"
|
| 25 |
+
elif self == ModelType.API:
|
| 26 |
+
return "API"
|
| 27 |
+
return "Unknown"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class Precision(Enum):
|
| 31 |
+
"""Model precision types."""
|
| 32 |
+
Unknown = auto()
|
| 33 |
+
float16 = auto()
|
| 34 |
+
bfloat16 = auto()
|
| 35 |
+
float32 = auto()
|
| 36 |
+
int8 = auto()
|
| 37 |
+
int4 = auto()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class WeightType(Enum):
|
| 41 |
+
"""Model weight types."""
|
| 42 |
+
Original = auto()
|
| 43 |
+
Delta = auto()
|
| 44 |
+
Adapter = auto()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass
|
| 48 |
+
class ColumnInfo:
|
| 49 |
+
"""Information about a column in the leaderboard."""
|
| 50 |
+
name: str
|
| 51 |
+
display_name: str
|
| 52 |
+
type: str = "text"
|
| 53 |
+
hidden: bool = False
|
| 54 |
+
never_hidden: bool = False
|
| 55 |
+
displayed_by_default: bool = True
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@dataclass
|
| 59 |
+
class GuardBenchColumn:
|
| 60 |
+
"""Columns for the GuardBench leaderboard."""
|
| 61 |
+
model: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 62 |
+
name="model_name",
|
| 63 |
+
display_name="Model",
|
| 64 |
+
never_hidden=True,
|
| 65 |
+
displayed_by_default=True
|
| 66 |
+
))
|
| 67 |
+
|
| 68 |
+
model_type: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 69 |
+
name="model_type",
|
| 70 |
+
display_name="Type",
|
| 71 |
+
displayed_by_default=True
|
| 72 |
+
))
|
| 73 |
+
|
| 74 |
+
# Metrics for all categories
|
| 75 |
+
default_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 76 |
+
name="default_prompts_f1",
|
| 77 |
+
display_name="Default Prompts F1",
|
| 78 |
+
type="number",
|
| 79 |
+
displayed_by_default=True
|
| 80 |
+
))
|
| 81 |
+
|
| 82 |
+
jailbreaked_prompts_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 83 |
+
name="jailbreaked_prompts_f1",
|
| 84 |
+
display_name="Jailbreaked Prompts F1",
|
| 85 |
+
type="number",
|
| 86 |
+
displayed_by_default=True
|
| 87 |
+
))
|
| 88 |
+
|
| 89 |
+
default_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 90 |
+
name="default_answers_f1",
|
| 91 |
+
display_name="Default Answers F1",
|
| 92 |
+
type="number",
|
| 93 |
+
displayed_by_default=True
|
| 94 |
+
))
|
| 95 |
+
|
| 96 |
+
jailbreaked_answers_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 97 |
+
name="jailbreaked_answers_f1",
|
| 98 |
+
display_name="Jailbreaked Answers F1",
|
| 99 |
+
type="number",
|
| 100 |
+
displayed_by_default=True
|
| 101 |
+
))
|
| 102 |
+
|
| 103 |
+
# Average metrics
|
| 104 |
+
average_f1: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 105 |
+
name="average_f1",
|
| 106 |
+
display_name="Average F1",
|
| 107 |
+
type="number",
|
| 108 |
+
displayed_by_default=True,
|
| 109 |
+
never_hidden=True
|
| 110 |
+
))
|
| 111 |
+
|
| 112 |
+
average_recall: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 113 |
+
name="average_recall",
|
| 114 |
+
display_name="Average Recall",
|
| 115 |
+
type="number",
|
| 116 |
+
displayed_by_default=False
|
| 117 |
+
))
|
| 118 |
+
|
| 119 |
+
average_precision: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 120 |
+
name="average_precision",
|
| 121 |
+
display_name="Average Precision",
|
| 122 |
+
type="number",
|
| 123 |
+
displayed_by_default=False
|
| 124 |
+
))
|
| 125 |
+
|
| 126 |
+
# Additional metadata
|
| 127 |
+
submission_date: ColumnInfo = field(default_factory=lambda: ColumnInfo(
|
| 128 |
+
name="submission_date",
|
| 129 |
+
display_name="Submission Date",
|
| 130 |
+
displayed_by_default=False
|
| 131 |
+
))
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
# Create instances for easy access
|
| 135 |
+
GUARDBENCH_COLUMN = GuardBenchColumn()
|
| 136 |
+
|
| 137 |
+
# Extract column lists for different views
|
| 138 |
+
COLS = [f.name for f in fields(GUARDBENCH_COLUMN)]
|
| 139 |
+
DISPLAY_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
| 140 |
+
if getattr(GUARDBENCH_COLUMN, f.name).displayed_by_default]
|
| 141 |
+
METRIC_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
| 142 |
+
if getattr(GUARDBENCH_COLUMN, f.name).type == "number"]
|
| 143 |
+
HIDDEN_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
| 144 |
+
if getattr(GUARDBENCH_COLUMN, f.name).hidden]
|
| 145 |
+
NEVER_HIDDEN_COLS = [getattr(GUARDBENCH_COLUMN, f.name).name for f in fields(GUARDBENCH_COLUMN)
|
| 146 |
+
if getattr(GUARDBENCH_COLUMN, f.name).never_hidden]
|
| 147 |
+
|
| 148 |
+
# Categories in GuardBench
|
| 149 |
+
CATEGORIES = [
|
| 150 |
+
"Criminal, Violent, and Terrorist Activity",
|
| 151 |
+
"Manipulation, Deception, and Misinformation",
|
| 152 |
+
"Creative Content Involving Illicit Themes",
|
| 153 |
+
"Sexual Content and Violence",
|
| 154 |
+
"Political Corruption and Legal Evasion",
|
| 155 |
+
"Labor Exploitation and Human Trafficking",
|
| 156 |
+
"Environmental and Industrial Harm",
|
| 157 |
+
"Animal Cruelty and Exploitation",
|
| 158 |
+
"Self–Harm and Suicidal Ideation",
|
| 159 |
+
"Safe Prompts"
|
| 160 |
+
]
|
| 161 |
+
|
| 162 |
+
# Test types in GuardBench
|
| 163 |
+
TEST_TYPES = [
|
| 164 |
+
"default_prompts",
|
| 165 |
+
"jailbreaked_prompts",
|
| 166 |
+
"default_answers",
|
| 167 |
+
"jailbreaked_answers"
|
| 168 |
+
]
|
| 169 |
+
|
| 170 |
+
# Metrics in GuardBench
|
| 171 |
+
METRICS = [
|
| 172 |
+
"f1_binary",
|
| 173 |
+
"recall_binary",
|
| 174 |
+
"precision_binary",
|
| 175 |
+
"error_ratio",
|
| 176 |
+
"avg_runtime_ms"
|
| 177 |
+
]
|
src/envs.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from huggingface_hub import HfApi
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
# Load environment variables
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# Hugging Face configuration
|
| 9 |
+
TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
|
| 10 |
+
OWNER = os.environ.get("OWNER", "guard-bench") # Change to your org
|
| 11 |
+
SUBMITTER_TOKEN = os.environ.get("SUBMITTER_TOKEN")
|
| 12 |
+
ADMIN_USERNAME = os.environ.get("ADMIN_USERNAME")
|
| 13 |
+
ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD")
|
| 14 |
+
|
| 15 |
+
# Repository IDs
|
| 16 |
+
REPO_ID = f"{OWNER}/leaderboard"
|
| 17 |
+
RESULTS_DATASET_ID = os.environ.get("RESULTS_DATASET_ID", f"{OWNER}/guardbench-results")
|
| 18 |
+
|
| 19 |
+
# Cache paths
|
| 20 |
+
CACHE_PATH = os.getenv("HF_HOME", ".")
|
| 21 |
+
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
|
| 22 |
+
|
| 23 |
+
# Local data paths
|
| 24 |
+
LEADERBOARD_FILE = os.path.join(DATA_PATH, "leaderboard.json")
|
| 25 |
+
|
| 26 |
+
# HF API instance
|
| 27 |
+
API = HfApi(token=TOKEN)
|
src/leaderboard/processor.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Process and transform GuardBench leaderboard data.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from typing import Dict, List, Any, Tuple
|
| 10 |
+
|
| 11 |
+
from src.display.utils import CATEGORIES, TEST_TYPES, METRICS
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def load_leaderboard_data(file_path: str) -> Dict:
|
| 15 |
+
"""
|
| 16 |
+
Load the leaderboard data from a JSON file.
|
| 17 |
+
"""
|
| 18 |
+
if not os.path.exists(file_path):
|
| 19 |
+
return {"entries": [], "last_updated": datetime.now().isoformat()}
|
| 20 |
+
|
| 21 |
+
with open(file_path, 'r') as f:
|
| 22 |
+
data = json.load(f)
|
| 23 |
+
|
| 24 |
+
return data
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def save_leaderboard_data(data: Dict, file_path: str) -> None:
|
| 28 |
+
"""
|
| 29 |
+
Save the leaderboard data to a JSON file.
|
| 30 |
+
"""
|
| 31 |
+
# Ensure the directory exists
|
| 32 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
| 33 |
+
|
| 34 |
+
# Update the last_updated timestamp
|
| 35 |
+
data["last_updated"] = datetime.now().isoformat()
|
| 36 |
+
|
| 37 |
+
with open(file_path, 'w') as f:
|
| 38 |
+
json.dump(data, f, indent=2)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def process_submission(submission_data: List[Dict]) -> List[Dict]:
|
| 42 |
+
"""
|
| 43 |
+
Process submission data and convert it to leaderboard entries.
|
| 44 |
+
"""
|
| 45 |
+
entries = []
|
| 46 |
+
|
| 47 |
+
for item in submission_data:
|
| 48 |
+
# Create a new entry for the leaderboard
|
| 49 |
+
entry = {
|
| 50 |
+
"model_name": item.get("model_name", "Unknown Model"),
|
| 51 |
+
"per_category_metrics": {},
|
| 52 |
+
"avg_metrics": {},
|
| 53 |
+
"submission_date": datetime.now().isoformat()
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
# Process per-category metrics
|
| 57 |
+
if "per_category_metrics" in item:
|
| 58 |
+
entry["per_category_metrics"] = item["per_category_metrics"]
|
| 59 |
+
|
| 60 |
+
# Process average metrics
|
| 61 |
+
if "avg_metrics" in item:
|
| 62 |
+
entry["avg_metrics"] = item["avg_metrics"]
|
| 63 |
+
|
| 64 |
+
entries.append(entry)
|
| 65 |
+
|
| 66 |
+
return entries
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def leaderboard_to_dataframe(leaderboard_data: Dict) -> pd.DataFrame:
|
| 70 |
+
"""
|
| 71 |
+
Convert leaderboard data to a pandas DataFrame for display.
|
| 72 |
+
"""
|
| 73 |
+
rows = []
|
| 74 |
+
|
| 75 |
+
for entry in leaderboard_data.get("entries", []):
|
| 76 |
+
model_name = entry.get("model_name", "Unknown Model")
|
| 77 |
+
|
| 78 |
+
# Extract average metrics for main display
|
| 79 |
+
row = {
|
| 80 |
+
"model_name": model_name,
|
| 81 |
+
"model_type": entry.get("model_type", "Unknown"),
|
| 82 |
+
"submission_date": entry.get("submission_date", "")
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# Add average metrics
|
| 86 |
+
avg_metrics = entry.get("avg_metrics", {})
|
| 87 |
+
for test_type in TEST_TYPES:
|
| 88 |
+
if test_type in avg_metrics:
|
| 89 |
+
for metric in METRICS:
|
| 90 |
+
if metric in avg_metrics[test_type]:
|
| 91 |
+
col_name = f"{test_type}_{metric}"
|
| 92 |
+
row[col_name] = avg_metrics[test_type][metric]
|
| 93 |
+
|
| 94 |
+
# Calculate overall averages for key metrics
|
| 95 |
+
f1_values = []
|
| 96 |
+
recall_values = []
|
| 97 |
+
precision_values = []
|
| 98 |
+
|
| 99 |
+
for test_type in TEST_TYPES:
|
| 100 |
+
if test_type in avg_metrics and "f1_binary" in avg_metrics[test_type]:
|
| 101 |
+
f1_values.append(avg_metrics[test_type]["f1_binary"])
|
| 102 |
+
if test_type in avg_metrics and "recall_binary" in avg_metrics[test_type]:
|
| 103 |
+
recall_values.append(avg_metrics[test_type]["recall_binary"])
|
| 104 |
+
if test_type in avg_metrics and "precision_binary" in avg_metrics[test_type]:
|
| 105 |
+
precision_values.append(avg_metrics[test_type]["precision_binary"])
|
| 106 |
+
|
| 107 |
+
# Add overall averages
|
| 108 |
+
if f1_values:
|
| 109 |
+
row["average_f1"] = sum(f1_values) / len(f1_values)
|
| 110 |
+
if recall_values:
|
| 111 |
+
row["average_recall"] = sum(recall_values) / len(recall_values)
|
| 112 |
+
if precision_values:
|
| 113 |
+
row["average_precision"] = sum(precision_values) / len(precision_values)
|
| 114 |
+
|
| 115 |
+
# Add specific test type F1 scores for display
|
| 116 |
+
if "default_prompts" in avg_metrics and "f1_binary" in avg_metrics["default_prompts"]:
|
| 117 |
+
row["default_prompts_f1"] = avg_metrics["default_prompts"]["f1_binary"]
|
| 118 |
+
if "jailbreaked_prompts" in avg_metrics and "f1_binary" in avg_metrics["jailbreaked_prompts"]:
|
| 119 |
+
row["jailbreaked_prompts_f1"] = avg_metrics["jailbreaked_prompts"]["f1_binary"]
|
| 120 |
+
if "default_answers" in avg_metrics and "f1_binary" in avg_metrics["default_answers"]:
|
| 121 |
+
row["default_answers_f1"] = avg_metrics["default_answers"]["f1_binary"]
|
| 122 |
+
if "jailbreaked_answers" in avg_metrics and "f1_binary" in avg_metrics["jailbreaked_answers"]:
|
| 123 |
+
row["jailbreaked_answers_f1"] = avg_metrics["jailbreaked_answers"]["f1_binary"]
|
| 124 |
+
|
| 125 |
+
rows.append(row)
|
| 126 |
+
|
| 127 |
+
# Create DataFrame and sort by average F1 score
|
| 128 |
+
df = pd.DataFrame(rows)
|
| 129 |
+
if not df.empty and "average_f1" in df.columns:
|
| 130 |
+
df = df.sort_values(by="average_f1", ascending=False)
|
| 131 |
+
|
| 132 |
+
return df
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def add_entries_to_leaderboard(leaderboard_data: Dict, new_entries: List[Dict]) -> Dict:
|
| 136 |
+
"""
|
| 137 |
+
Add new entries to the leaderboard, replacing any with the same model name.
|
| 138 |
+
"""
|
| 139 |
+
# Create a mapping of existing entries by model name
|
| 140 |
+
existing_entries = {entry["model_name"]: i for i, entry in enumerate(leaderboard_data.get("entries", []))}
|
| 141 |
+
|
| 142 |
+
# Process each new entry
|
| 143 |
+
for new_entry in new_entries:
|
| 144 |
+
model_name = new_entry.get("model_name")
|
| 145 |
+
|
| 146 |
+
if model_name in existing_entries:
|
| 147 |
+
# Replace existing entry
|
| 148 |
+
leaderboard_data["entries"][existing_entries[model_name]] = new_entry
|
| 149 |
+
else:
|
| 150 |
+
# Add new entry
|
| 151 |
+
if "entries" not in leaderboard_data:
|
| 152 |
+
leaderboard_data["entries"] = []
|
| 153 |
+
leaderboard_data["entries"].append(new_entry)
|
| 154 |
+
|
| 155 |
+
# Update the last_updated timestamp
|
| 156 |
+
leaderboard_data["last_updated"] = datetime.now().isoformat()
|
| 157 |
+
|
| 158 |
+
return leaderboard_data
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def process_jsonl_submission(file_path: str) -> Tuple[List[Dict], str]:
|
| 162 |
+
"""
|
| 163 |
+
Process a JSONL submission file and extract entries.
|
| 164 |
+
"""
|
| 165 |
+
entries = []
|
| 166 |
+
try:
|
| 167 |
+
with open(file_path, 'r') as f:
|
| 168 |
+
for line in f:
|
| 169 |
+
try:
|
| 170 |
+
entry = json.loads(line)
|
| 171 |
+
entries.append(entry)
|
| 172 |
+
except json.JSONDecodeError as e:
|
| 173 |
+
return [], f"Invalid JSON in submission file: {e}"
|
| 174 |
+
|
| 175 |
+
if not entries:
|
| 176 |
+
return [], "Submission file is empty"
|
| 177 |
+
|
| 178 |
+
return entries, "Successfully processed submission"
|
| 179 |
+
except Exception as e:
|
| 180 |
+
return [], f"Error processing submission file: {e}"
|
src/populate.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Populate the GuardBench leaderboard from HuggingFace datasets.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import tempfile
|
| 9 |
+
from typing import Dict, Tuple, List
|
| 10 |
+
from glob import glob
|
| 11 |
+
|
| 12 |
+
from huggingface_hub import snapshot_download, hf_hub_download, HfApi
|
| 13 |
+
from datasets import load_dataset
|
| 14 |
+
|
| 15 |
+
from src.display.utils import GUARDBENCH_COLUMN, DISPLAY_COLS, CATEGORIES
|
| 16 |
+
from src.envs import RESULTS_DATASET_ID, TOKEN, LEADERBOARD_FILE, CACHE_PATH
|
| 17 |
+
from src.leaderboard.processor import leaderboard_to_dataframe, load_leaderboard_data, save_leaderboard_data, process_jsonl_submission, add_entries_to_leaderboard
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def download_leaderboard_data() -> bool:
|
| 21 |
+
"""
|
| 22 |
+
Download the latest leaderboard data from HuggingFace.
|
| 23 |
+
"""
|
| 24 |
+
try:
|
| 25 |
+
# Create a temporary directory to download the submissions
|
| 26 |
+
temp_dir = os.path.join(CACHE_PATH, "temp_submissions")
|
| 27 |
+
os.makedirs(temp_dir, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
# Download the entire repository
|
| 30 |
+
try:
|
| 31 |
+
snapshot_path = snapshot_download(
|
| 32 |
+
repo_id=RESULTS_DATASET_ID,
|
| 33 |
+
repo_type="dataset",
|
| 34 |
+
local_dir=temp_dir,
|
| 35 |
+
token=TOKEN,
|
| 36 |
+
ignore_patterns=["*.md", ".*"],
|
| 37 |
+
etag_timeout=30
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Process all submission files
|
| 41 |
+
all_entries = []
|
| 42 |
+
submission_files = []
|
| 43 |
+
|
| 44 |
+
# Look for submission files in the submissions directory
|
| 45 |
+
submissions_dir = os.path.join(snapshot_path, "submissions")
|
| 46 |
+
if os.path.exists(submissions_dir):
|
| 47 |
+
submission_files.extend(glob(os.path.join(submissions_dir, "*.jsonl")))
|
| 48 |
+
|
| 49 |
+
# Also look for any JSONL files in the root
|
| 50 |
+
submission_files.extend(glob(os.path.join(snapshot_path, "*.jsonl")))
|
| 51 |
+
|
| 52 |
+
# Process each submission file
|
| 53 |
+
for file_path in submission_files:
|
| 54 |
+
entries, _ = process_jsonl_submission(file_path)
|
| 55 |
+
all_entries.extend(entries)
|
| 56 |
+
|
| 57 |
+
# Create leaderboard data structure
|
| 58 |
+
leaderboard_data = {
|
| 59 |
+
"entries": all_entries,
|
| 60 |
+
"last_updated": pd.Timestamp.now().isoformat()
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
# Save to local file
|
| 64 |
+
save_leaderboard_data(leaderboard_data, LEADERBOARD_FILE)
|
| 65 |
+
|
| 66 |
+
return True
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"Error downloading repository: {e}")
|
| 69 |
+
|
| 70 |
+
# If we can't download the repository, try to download individual files
|
| 71 |
+
try:
|
| 72 |
+
api = HfApi(token=TOKEN)
|
| 73 |
+
files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset")
|
| 74 |
+
|
| 75 |
+
submission_files = [f for f in files if f.endswith('.jsonl')]
|
| 76 |
+
all_entries = []
|
| 77 |
+
|
| 78 |
+
for file_path in submission_files:
|
| 79 |
+
try:
|
| 80 |
+
local_path = hf_hub_download(
|
| 81 |
+
repo_id=RESULTS_DATASET_ID,
|
| 82 |
+
filename=file_path,
|
| 83 |
+
repo_type="dataset",
|
| 84 |
+
token=TOKEN
|
| 85 |
+
)
|
| 86 |
+
entries, _ = process_jsonl_submission(local_path)
|
| 87 |
+
all_entries.extend(entries)
|
| 88 |
+
except Exception as file_error:
|
| 89 |
+
print(f"Error downloading file {file_path}: {file_error}")
|
| 90 |
+
|
| 91 |
+
# Create leaderboard data structure
|
| 92 |
+
leaderboard_data = {
|
| 93 |
+
"entries": all_entries,
|
| 94 |
+
"last_updated": pd.Timestamp.now().isoformat()
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
# Save to local file
|
| 98 |
+
save_leaderboard_data(leaderboard_data, LEADERBOARD_FILE)
|
| 99 |
+
|
| 100 |
+
return True
|
| 101 |
+
except Exception as list_error:
|
| 102 |
+
print(f"Error listing repository files: {list_error}")
|
| 103 |
+
|
| 104 |
+
# If we can't download anything, create an empty leaderboard
|
| 105 |
+
if not os.path.exists(LEADERBOARD_FILE):
|
| 106 |
+
empty_data = {"entries": [], "last_updated": pd.Timestamp.now().isoformat()}
|
| 107 |
+
save_leaderboard_data(empty_data, LEADERBOARD_FILE)
|
| 108 |
+
|
| 109 |
+
return False
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"Error downloading leaderboard data: {e}")
|
| 112 |
+
|
| 113 |
+
# Ensure we have at least an empty leaderboard file
|
| 114 |
+
if not os.path.exists(LEADERBOARD_FILE):
|
| 115 |
+
empty_data = {"entries": [], "last_updated": pd.Timestamp.now().isoformat()}
|
| 116 |
+
save_leaderboard_data(empty_data, LEADERBOARD_FILE)
|
| 117 |
+
|
| 118 |
+
return False
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def get_leaderboard_df() -> pd.DataFrame:
|
| 122 |
+
"""
|
| 123 |
+
Get the leaderboard data as a DataFrame.
|
| 124 |
+
"""
|
| 125 |
+
# Try to download the latest data
|
| 126 |
+
download_leaderboard_data()
|
| 127 |
+
|
| 128 |
+
# Load from local file
|
| 129 |
+
leaderboard_data = load_leaderboard_data(LEADERBOARD_FILE)
|
| 130 |
+
|
| 131 |
+
# Convert to DataFrame
|
| 132 |
+
df = leaderboard_to_dataframe(leaderboard_data)
|
| 133 |
+
|
| 134 |
+
return df
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def get_category_leaderboard_df(category: str) -> pd.DataFrame:
|
| 138 |
+
"""
|
| 139 |
+
Get the leaderboard data filtered by a specific category.
|
| 140 |
+
|
| 141 |
+
Args:
|
| 142 |
+
category: The category to filter by (e.g., "Criminal, Violent, and Terrorist Activity")
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
DataFrame with metrics for the specified category
|
| 146 |
+
"""
|
| 147 |
+
# Load the leaderboard data
|
| 148 |
+
leaderboard_data = load_leaderboard_data(LEADERBOARD_FILE)
|
| 149 |
+
|
| 150 |
+
# Filter entries to only include those with data for the specified category
|
| 151 |
+
filtered_entries = []
|
| 152 |
+
|
| 153 |
+
for entry in leaderboard_data.get("entries", []):
|
| 154 |
+
# Check if the entry has data for this category
|
| 155 |
+
if "per_category_metrics" in entry and category in entry["per_category_metrics"]:
|
| 156 |
+
# Create a new entry with just the overall info and this category's metrics
|
| 157 |
+
filtered_entry = {
|
| 158 |
+
"model_name": entry.get("model_name", "Unknown Model"),
|
| 159 |
+
"model_type": entry.get("model_type", "Unknown"),
|
| 160 |
+
"submission_date": entry.get("submission_date", ""),
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
# Extract metrics for this category
|
| 164 |
+
category_metrics = entry["per_category_metrics"][category]
|
| 165 |
+
|
| 166 |
+
# Add metrics for each test type
|
| 167 |
+
for test_type in category_metrics:
|
| 168 |
+
if test_type and isinstance(category_metrics[test_type], dict):
|
| 169 |
+
for metric, value in category_metrics[test_type].items():
|
| 170 |
+
col_name = f"{test_type}_{metric}"
|
| 171 |
+
filtered_entry[col_name] = value
|
| 172 |
+
|
| 173 |
+
# Calculate average F1 for this category
|
| 174 |
+
f1_values = []
|
| 175 |
+
for test_type in category_metrics:
|
| 176 |
+
if test_type and isinstance(category_metrics[test_type], dict) and "f1_binary" in category_metrics[test_type]:
|
| 177 |
+
f1_values.append(category_metrics[test_type]["f1_binary"])
|
| 178 |
+
|
| 179 |
+
if f1_values:
|
| 180 |
+
filtered_entry["average_f1"] = sum(f1_values) / len(f1_values)
|
| 181 |
+
|
| 182 |
+
# Add specific test type F1 scores for display
|
| 183 |
+
for test_type in ["default_prompts", "jailbreaked_prompts", "default_answers", "jailbreaked_answers"]:
|
| 184 |
+
if test_type in category_metrics and "f1_binary" in category_metrics[test_type]:
|
| 185 |
+
filtered_entry[f"{test_type}_f1"] = category_metrics[test_type]["f1_binary"]
|
| 186 |
+
|
| 187 |
+
filtered_entries.append(filtered_entry)
|
| 188 |
+
|
| 189 |
+
# Create a new leaderboard data structure with the filtered entries
|
| 190 |
+
filtered_leaderboard = {
|
| 191 |
+
"entries": filtered_entries,
|
| 192 |
+
"last_updated": leaderboard_data.get("last_updated", pd.Timestamp.now().isoformat())
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
# Convert to DataFrame
|
| 196 |
+
df = leaderboard_to_dataframe(filtered_leaderboard)
|
| 197 |
+
|
| 198 |
+
return df
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def get_detailed_model_data(model_name: str) -> Dict:
|
| 202 |
+
"""
|
| 203 |
+
Get detailed data for a specific model.
|
| 204 |
+
"""
|
| 205 |
+
leaderboard_data = load_leaderboard_data(LEADERBOARD_FILE)
|
| 206 |
+
|
| 207 |
+
for entry in leaderboard_data.get("entries", []):
|
| 208 |
+
if entry.get("model_name") == model_name:
|
| 209 |
+
return entry
|
| 210 |
+
|
| 211 |
+
return {}
|
src/submission/submit.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Handle submissions to the GuardBench leaderboard.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import tempfile
|
| 8 |
+
import uuid
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from typing import Dict, List, Tuple
|
| 11 |
+
|
| 12 |
+
from huggingface_hub import HfApi
|
| 13 |
+
from datasets import load_dataset, Dataset
|
| 14 |
+
|
| 15 |
+
from src.display.formatting import styled_error, styled_message, styled_warning
|
| 16 |
+
from src.envs import API, RESULTS_DATASET_ID, TOKEN
|
| 17 |
+
from src.leaderboard.processor import process_jsonl_submission, add_entries_to_leaderboard, load_leaderboard_data
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def validate_submission(file_path: str) -> Tuple[bool, str]:
|
| 21 |
+
"""
|
| 22 |
+
Validate a submission file.
|
| 23 |
+
"""
|
| 24 |
+
try:
|
| 25 |
+
entries, message = process_jsonl_submission(file_path)
|
| 26 |
+
if not entries:
|
| 27 |
+
return False, message
|
| 28 |
+
|
| 29 |
+
# Additional validation could be added here
|
| 30 |
+
|
| 31 |
+
return True, "Submission is valid"
|
| 32 |
+
except Exception as e:
|
| 33 |
+
return False, f"Error validating submission: {e}"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def submit_to_hub(file_path: str, metadata: Dict, dataset_id: str, token: str) -> Tuple[bool, str]:
|
| 37 |
+
"""
|
| 38 |
+
Submit results to a HuggingFace dataset repository as individual files.
|
| 39 |
+
"""
|
| 40 |
+
try:
|
| 41 |
+
# Process the submission file to validate
|
| 42 |
+
entries, message = process_jsonl_submission(file_path)
|
| 43 |
+
if not entries:
|
| 44 |
+
return False, message
|
| 45 |
+
|
| 46 |
+
# Generate a unique submission ID
|
| 47 |
+
model_name = metadata.get("model_name", "unknown")
|
| 48 |
+
model_name_safe = model_name.replace("/", "_").replace(" ", "_")
|
| 49 |
+
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
| 50 |
+
submission_id = f"{model_name_safe}_{timestamp}"
|
| 51 |
+
|
| 52 |
+
# Create an API instance
|
| 53 |
+
api = HfApi(token=token)
|
| 54 |
+
|
| 55 |
+
# Create a temporary file with metadata added
|
| 56 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False) as temp_file:
|
| 57 |
+
# Add metadata to each entry
|
| 58 |
+
for entry in entries:
|
| 59 |
+
# If the entry already has a model_name, don't override it
|
| 60 |
+
if "model_name" not in entry:
|
| 61 |
+
entry["model_name"] = metadata.get("model_name")
|
| 62 |
+
|
| 63 |
+
# Add other metadata if not present
|
| 64 |
+
for key, value in metadata.items():
|
| 65 |
+
if key != "model_name" and key not in entry:
|
| 66 |
+
entry[key] = value
|
| 67 |
+
|
| 68 |
+
# Write to temp file
|
| 69 |
+
temp_file.write(json.dumps(entry) + "\n")
|
| 70 |
+
|
| 71 |
+
temp_path = temp_file.name
|
| 72 |
+
|
| 73 |
+
# Upload the file directly to the repository
|
| 74 |
+
submission_path = f"submissions/{submission_id}.jsonl"
|
| 75 |
+
api.upload_file(
|
| 76 |
+
path_or_fileobj=temp_path,
|
| 77 |
+
path_in_repo=submission_path,
|
| 78 |
+
repo_id=dataset_id,
|
| 79 |
+
repo_type="dataset",
|
| 80 |
+
commit_message=f"Add submission for {model_name}"
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# Clean up the temporary file
|
| 84 |
+
os.unlink(temp_path)
|
| 85 |
+
|
| 86 |
+
return True, f"Successfully uploaded submission for {model_name} to {dataset_id}"
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return False, f"Error submitting to dataset: {e}"
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def process_submission(file_path: str, metadata: Dict) -> str:
|
| 92 |
+
"""
|
| 93 |
+
Process a submission to the GuardBench leaderboard.
|
| 94 |
+
"""
|
| 95 |
+
# Validate submission file
|
| 96 |
+
is_valid, validation_message = validate_submission(file_path)
|
| 97 |
+
if not is_valid:
|
| 98 |
+
return styled_error(validation_message)
|
| 99 |
+
|
| 100 |
+
# Submit to HuggingFace dataset repository
|
| 101 |
+
success, message = submit_to_hub(file_path, metadata, RESULTS_DATASET_ID, TOKEN)
|
| 102 |
+
if not success:
|
| 103 |
+
return styled_error(message)
|
| 104 |
+
|
| 105 |
+
return styled_message(f"Submission successful! {message}")
|