Spaces:
Sleeping
Sleeping
style theme
Browse files- app.py +4 -2
- src/about.py +5 -25
app.py
CHANGED
|
@@ -731,6 +731,8 @@ with demo:
|
|
| 731 |
inputs=[column_selector],
|
| 732 |
outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
|
| 733 |
)
|
|
|
|
|
|
|
| 734 |
|
| 735 |
with gr.TabItem("Visualize", elem_id="guardbench-viz-tab", id=1):
|
| 736 |
with gr.Row():
|
|
@@ -781,8 +783,8 @@ with demo:
|
|
| 781 |
outputs=[model_selector]
|
| 782 |
)
|
| 783 |
|
| 784 |
-
with gr.TabItem("About", elem_id="guardbench-about-tab", id=2):
|
| 785 |
-
|
| 786 |
|
| 787 |
with gr.TabItem("Submit", elem_id="guardbench-submit-tab", id=3):
|
| 788 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
|
|
|
| 731 |
inputs=[column_selector],
|
| 732 |
outputs=[leaderboard] + [category_tabs.children[i].children[0] for i in range(1, len(CATEGORIES) + 1)]
|
| 733 |
)
|
| 734 |
+
with gr.Row():
|
| 735 |
+
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 736 |
|
| 737 |
with gr.TabItem("Visualize", elem_id="guardbench-viz-tab", id=1):
|
| 738 |
with gr.Row():
|
|
|
|
| 783 |
outputs=[model_selector]
|
| 784 |
)
|
| 785 |
|
| 786 |
+
# with gr.TabItem("About", elem_id="guardbench-about-tab", id=2):
|
| 787 |
+
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 788 |
|
| 789 |
with gr.TabItem("Submit", elem_id="guardbench-submit-tab", id=3):
|
| 790 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
src/about.py
CHANGED
|
@@ -12,7 +12,7 @@ INTRODUCTION_TEXT = """
|
|
| 12 |
## Introduction
|
| 13 |
|
| 14 |
GuardBench is a comprehensive benchmark for evaluating the safety guardrails of large language models (LLMs).
|
| 15 |
-
This leaderboard tracks model performance across various safety categories, including harmful content detection,
|
| 16 |
jailbreak resistance, and more.
|
| 17 |
|
| 18 |
Models are evaluated on their ability to properly refuse harmful requests and detect problematic content
|
|
@@ -20,33 +20,13 @@ across multiple categories and test scenarios.
|
|
| 20 |
"""
|
| 21 |
|
| 22 |
LLM_BENCHMARKS_TEXT = """
|
| 23 |
-
## GuardBench
|
| 24 |
|
| 25 |
-
GuardBench
|
| 26 |
|
| 27 |
-
|
| 28 |
-
- **Manipulation, Deception, and Misinformation**
|
| 29 |
-
- **Creative Content Involving Illicit Themes**
|
| 30 |
-
- **Sexual Content and Violence**
|
| 31 |
-
- **Political Corruption and Legal Evasion**
|
| 32 |
-
- **Labor Exploitation and Human Trafficking**
|
| 33 |
-
- **Environmental and Industrial Harm**
|
| 34 |
-
- **Animal Cruelty and Exploitation**
|
| 35 |
-
- **Self-Harm and Suicidal Ideation**
|
| 36 |
-
- **Safe Prompts**
|
| 37 |
|
| 38 |
-
|
| 39 |
-
- **Default Prompts**: Standard harmful requests
|
| 40 |
-
- **Jailbreaked Prompts**: Adversarial attempts to bypass safety measures
|
| 41 |
-
- **Default Answers**: Evaluation of harmful content in responses
|
| 42 |
-
- **Jailbreaked Answers**: Evaluation of harmful content in responses to jailbreak attempts
|
| 43 |
-
|
| 44 |
-
Metrics include:
|
| 45 |
-
- **F1 Binary**: Harmonic mean of precision and recall
|
| 46 |
-
- **Recall Binary**: Proportion of actual harmful content correctly identified
|
| 47 |
-
- **Precision Binary**: Proportion of identified harmful content that was actually harmful
|
| 48 |
-
- **Error Ratio**: Rate of errors during evaluation
|
| 49 |
-
- **Average Runtime**: Average processing time in milliseconds
|
| 50 |
"""
|
| 51 |
|
| 52 |
EVALUATION_QUEUE_TEXT = """
|
|
|
|
| 12 |
## Introduction
|
| 13 |
|
| 14 |
GuardBench is a comprehensive benchmark for evaluating the safety guardrails of large language models (LLMs).
|
| 15 |
+
This leaderboard tracks model performance across various safety categories, including harmful content detection,
|
| 16 |
jailbreak resistance, and more.
|
| 17 |
|
| 18 |
Models are evaluated on their ability to properly refuse harmful requests and detect problematic content
|
|
|
|
| 20 |
"""
|
| 21 |
|
| 22 |
LLM_BENCHMARKS_TEXT = """
|
| 23 |
+
## GuardBench evaluation methodology
|
| 24 |
|
| 25 |
+
GuardBench checks how well models handle safety challenges — from misinformation and self-harm to sexual content and corruption.
|
| 26 |
|
| 27 |
+
Models are tested with regular and adversarial prompts to see if they can avoid saying harmful things.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
+
We track how accurate they are, how often they make mistakes, and how fast they respond.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"""
|
| 31 |
|
| 32 |
EVALUATION_QUEUE_TEXT = """
|