Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Harheem Kim
commited on
Commit
ยท
c6e563a
1
Parent(s):
a9d36f6
colors, fonts, dropdown problems
Browse files- banner_background_capture.png +3 -0
- components/leaderboard_components.py +13 -10
- styles/leaderboard_styles.py +75 -30
- tabs/leaderboard_v1_en.py +127 -120
- tabs/leaderboard_v1_kr.py +114 -107
- utils.py +13 -10
banner_background_capture.png
ADDED
|
Git LFS Details
|
components/leaderboard_components.py
CHANGED
|
@@ -5,8 +5,8 @@ These are stable components that don't change frequently
|
|
| 5 |
|
| 6 |
def get_chart_colors():
|
| 7 |
return {
|
| 8 |
-
"Private": "#
|
| 9 |
-
"Open source": "#
|
| 10 |
"performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
|
| 11 |
"text": "white",
|
| 12 |
"background": "#01091A",
|
|
@@ -16,10 +16,12 @@ def get_chart_colors():
|
|
| 16 |
|
| 17 |
def get_rank_badge(rank):
|
| 18 |
"""Generate HTML for rank badge with appropriate styling"""
|
|
|
|
|
|
|
| 19 |
badge_styles = {
|
| 20 |
-
1: ("1st",
|
| 21 |
-
2: ("2nd",
|
| 22 |
-
3: ("3rd",
|
| 23 |
}
|
| 24 |
|
| 25 |
if rank in badge_styles:
|
|
@@ -59,24 +61,25 @@ def get_type_badge(model_type):
|
|
| 59 |
"""Generate HTML for model type badge"""
|
| 60 |
colors = get_chart_colors()
|
| 61 |
color_map = {
|
| 62 |
-
"Open source": colors.get("Open source", "#
|
| 63 |
-
"Proprietary": colors.get("Private", "#
|
| 64 |
-
"Private": colors.get("Private", "#
|
| 65 |
}
|
| 66 |
label_map = {
|
| 67 |
"Open source": "OSS",
|
| 68 |
"Proprietary": "API",
|
| 69 |
"Private": "API",
|
| 70 |
}
|
| 71 |
-
bg_color = color_map.get(model_type, "#
|
| 72 |
display_label = label_map.get(model_type, model_type)
|
|
|
|
| 73 |
return f"""
|
| 74 |
<div style="
|
| 75 |
display: inline-flex;
|
| 76 |
align-items: center;
|
| 77 |
padding: 4px 8px;
|
| 78 |
background: {bg_color};
|
| 79 |
-
color:
|
| 80 |
border-radius: 4px;
|
| 81 |
font-size: 0.85em;
|
| 82 |
font-weight: 500;
|
|
|
|
| 5 |
|
| 6 |
def get_chart_colors():
|
| 7 |
return {
|
| 8 |
+
"Private": "#593B1D", # Rich brown for API
|
| 9 |
+
"Open source": "#FACC15", # Warm amber for OSS
|
| 10 |
"performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
|
| 11 |
"text": "white",
|
| 12 |
"background": "#01091A",
|
|
|
|
| 16 |
|
| 17 |
def get_rank_badge(rank):
|
| 18 |
"""Generate HTML for rank badge with appropriate styling"""
|
| 19 |
+
tag_background = "#593B1D"
|
| 20 |
+
tag_text_color = "#FFFFFF"
|
| 21 |
badge_styles = {
|
| 22 |
+
1: ("1st", tag_background, tag_text_color),
|
| 23 |
+
2: ("2nd", tag_background, tag_text_color),
|
| 24 |
+
3: ("3rd", tag_background, tag_text_color),
|
| 25 |
}
|
| 26 |
|
| 27 |
if rank in badge_styles:
|
|
|
|
| 61 |
"""Generate HTML for model type badge"""
|
| 62 |
colors = get_chart_colors()
|
| 63 |
color_map = {
|
| 64 |
+
"Open source": colors.get("Open source", "#FACC15"),
|
| 65 |
+
"Proprietary": colors.get("Private", "#593B1D"),
|
| 66 |
+
"Private": colors.get("Private", "#593B1D"),
|
| 67 |
}
|
| 68 |
label_map = {
|
| 69 |
"Open source": "OSS",
|
| 70 |
"Proprietary": "API",
|
| 71 |
"Private": "API",
|
| 72 |
}
|
| 73 |
+
bg_color = color_map.get(model_type, "#593B1D")
|
| 74 |
display_label = label_map.get(model_type, model_type)
|
| 75 |
+
text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
|
| 76 |
return f"""
|
| 77 |
<div style="
|
| 78 |
display: inline-flex;
|
| 79 |
align-items: center;
|
| 80 |
padding: 4px 8px;
|
| 81 |
background: {bg_color};
|
| 82 |
+
color: {text_color};
|
| 83 |
border-radius: 4px;
|
| 84 |
font-size: 0.85em;
|
| 85 |
font-weight: 500;
|
styles/leaderboard_styles.py
CHANGED
|
@@ -34,9 +34,9 @@ def get_leaderboard_css():
|
|
| 34 |
--border-subtle: rgba(245, 246, 247, 0.08);
|
| 35 |
--border-default: rgba(245, 246, 247, 0.12);
|
| 36 |
--border-strong: rgba(245, 246, 247, 0.2);
|
| 37 |
-
--text-primary: #
|
| 38 |
-
--text-secondary: #
|
| 39 |
-
--text-muted: #
|
| 40 |
--accent-primary: #ffd21e;
|
| 41 |
--accent-secondary: #1098F7;
|
| 42 |
--accent-tertiary: #F5F6F7;
|
|
@@ -44,12 +44,38 @@ def get_leaderboard_css():
|
|
| 44 |
--glow-secondary: rgba(16, 152, 247, 0.4);
|
| 45 |
--glow-tertiary: rgba(245, 246, 247, 0.3);
|
| 46 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
/* Global font and background */
|
| 49 |
-
.gradio-container {
|
| 50 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
|
| 51 |
background: var(--bg-primary) !important;
|
| 52 |
-
color: var(--text-primary) !important;
|
| 53 |
}
|
| 54 |
|
| 55 |
/* Headers and text */
|
|
@@ -60,18 +86,15 @@ def get_leaderboard_css():
|
|
| 60 |
}
|
| 61 |
|
| 62 |
p, span, div, li, ul li {
|
| 63 |
-
color: white !important;
|
| 64 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 65 |
}
|
| 66 |
|
| 67 |
/* Labels and info text */
|
| 68 |
label {
|
| 69 |
-
color: white !important;
|
| 70 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 71 |
}
|
| 72 |
|
| 73 |
.gr-box label {
|
| 74 |
-
color: white !important;
|
| 75 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 76 |
}
|
| 77 |
|
|
@@ -158,7 +181,7 @@ def get_leaderboard_css():
|
|
| 158 |
|
| 159 |
/* Radio button labels */
|
| 160 |
input[type="radio"] + label {
|
| 161 |
-
color:
|
| 162 |
}
|
| 163 |
|
| 164 |
input[type="radio"]:checked {
|
|
@@ -171,26 +194,22 @@ def get_leaderboard_css():
|
|
| 171 |
.dropdown {
|
| 172 |
border-color: var(--border-default) !important;
|
| 173 |
background: var(--bg-card) !important;
|
| 174 |
-
color: white !important;
|
| 175 |
transition: all 0.2s ease !important;
|
| 176 |
}
|
| 177 |
|
| 178 |
/* Dropdown option styling */
|
| 179 |
.dropdown option {
|
| 180 |
background: var(--bg-card) !important;
|
| 181 |
-
color: white !important;
|
| 182 |
}
|
| 183 |
|
| 184 |
/* Gradio dropdown specific styling */
|
| 185 |
.gradio-dropdown select,
|
| 186 |
.gradio-dropdown [role="combobox"],
|
| 187 |
.gradio-dropdown input {
|
| 188 |
-
color: white !important;
|
| 189 |
background: var(--bg-card) !important;
|
| 190 |
}
|
| 191 |
|
| 192 |
.gradio-dropdown option {
|
| 193 |
-
color: white !important;
|
| 194 |
background: var(--bg-card) !important;
|
| 195 |
}
|
| 196 |
|
|
@@ -210,19 +229,16 @@ def get_leaderboard_css():
|
|
| 210 |
overflow-y: auto !important;
|
| 211 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 212 |
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
|
| 213 |
-
color: white !important;
|
| 214 |
}
|
| 215 |
|
| 216 |
/* Table cells and headers */
|
| 217 |
.dataframe td,
|
| 218 |
.dataframe th {
|
| 219 |
-
color: white !important;
|
| 220 |
}
|
| 221 |
|
| 222 |
/* Button styling */
|
| 223 |
button {
|
| 224 |
background: var(--bg-card) !important;
|
| 225 |
-
color: white !important;
|
| 226 |
border: 1px solid var(--border-default) !important;
|
| 227 |
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
| 228 |
}
|
|
@@ -363,7 +379,7 @@ def get_leaderboard_css():
|
|
| 363 |
display: inline-block !important;
|
| 364 |
padding: 14px 28px !important;
|
| 365 |
background: #ffd21e !important;
|
| 366 |
-
color:
|
| 367 |
text-decoration: none !important;
|
| 368 |
border-radius: 16px !important;
|
| 369 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
|
@@ -382,7 +398,7 @@ def get_leaderboard_css():
|
|
| 382 |
transform: translateY(-3px) !important;
|
| 383 |
box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
|
| 384 |
background: #ffd21e !important;
|
| 385 |
-
color:
|
| 386 |
text-decoration: none !important;
|
| 387 |
text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
|
| 388 |
}
|
|
@@ -424,24 +440,46 @@ def get_leaderboard_css():
|
|
| 424 |
border-color: #ffd21e !important;
|
| 425 |
box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
|
| 426 |
text-decoration: none !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
color: #FFFFFF !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
}
|
| 429 |
|
| 430 |
/* Numeric content styling */
|
| 431 |
.numeric-cell, .metric-value, .rank-value,
|
| 432 |
.level-tile-score, .core-metric-card .metric-value {
|
| 433 |
-
color:
|
| 434 |
font-family: 'Geist Mono', monospace !important;
|
| 435 |
}
|
| 436 |
|
| 437 |
/* Table content */
|
| 438 |
td, th, table * {
|
| 439 |
-
color:
|
| 440 |
}
|
| 441 |
|
| 442 |
/* All numeric and data elements */
|
| 443 |
.performance-card *, .v2-styled-table *, .dataframe * {
|
| 444 |
-
color:
|
| 445 |
}
|
| 446 |
|
| 447 |
/* Enhanced dropdown styling - more specific selectors
|
|
@@ -454,20 +492,18 @@ def get_leaderboard_css():
|
|
| 454 |
.model-dropdown [role="combobox"],
|
| 455 |
.model-dropdown button {
|
| 456 |
background: rgba(1, 9, 26, 0.95) !important;
|
| 457 |
-
color: white !important;
|
| 458 |
border: 1px solid var(--border-default) !important;
|
| 459 |
border-radius: 8px !important;
|
| 460 |
}
|
| 461 |
-
|
| 462 |
.gradio-dropdown option,
|
| 463 |
.model-dropdown option {
|
| 464 |
background: rgba(1, 9, 26, 0.95) !important;
|
| 465 |
-
color: white !important;
|
| 466 |
}
|
| 467 |
|
| 468 |
/* Force dropdown text color */
|
| 469 |
/* .gradio-dropdown *, .model-dropdown * {
|
| 470 |
-
color:
|
| 471 |
} */
|
| 472 |
|
| 473 |
/* Gradio 5.x compatible dropdown styling */
|
|
@@ -475,22 +511,31 @@ def get_leaderboard_css():
|
|
| 475 |
.gradio-container [data-testid="dropdown"],
|
| 476 |
.gradio-container select {
|
| 477 |
background-color: rgba(1, 9, 26, 0.95) !important;
|
| 478 |
-
color: white !important;
|
| 479 |
border: 1px solid rgba(245, 246, 247, 0.12) !important;
|
| 480 |
}
|
| 481 |
-
|
| 482 |
.gradio-container .gradio-dropdown option,
|
| 483 |
.gradio-container select option {
|
| 484 |
background-color: rgba(1, 9, 26, 0.95) !important;
|
| 485 |
-
color: white !important;
|
| 486 |
}
|
| 487 |
-
|
| 488 |
/* Target the actual visible text in dropdown */
|
| 489 |
.gradio-container [role="combobox"],
|
| 490 |
.gradio-container .gradio-dropdown .wrap > div {
|
| 491 |
-
color: white !important;
|
| 492 |
background-color: rgba(1, 9, 26, 0.95) !important;
|
| 493 |
}
|
| 494 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
</style>
|
| 496 |
"""
|
|
|
|
| 34 |
--border-subtle: rgba(245, 246, 247, 0.08);
|
| 35 |
--border-default: rgba(245, 246, 247, 0.12);
|
| 36 |
--border-strong: rgba(245, 246, 247, 0.2);
|
| 37 |
+
--text-primary: #FFFFFF;
|
| 38 |
+
--text-secondary: #E2E8F0;
|
| 39 |
+
--text-muted: #94A3B8;
|
| 40 |
--accent-primary: #ffd21e;
|
| 41 |
--accent-secondary: #1098F7;
|
| 42 |
--accent-tertiary: #F5F6F7;
|
|
|
|
| 44 |
--glow-secondary: rgba(16, 152, 247, 0.4);
|
| 45 |
--glow-tertiary: rgba(245, 246, 247, 0.3);
|
| 46 |
}
|
| 47 |
+
|
| 48 |
+
html.light,
|
| 49 |
+
html.light body,
|
| 50 |
+
html.light .gradio-container {
|
| 51 |
+
--bg-primary: #F8FAFC;
|
| 52 |
+
--bg-secondary: rgba(15, 23, 42, 0.06);
|
| 53 |
+
--bg-card: rgba(255, 255, 255, 0.92);
|
| 54 |
+
--border-subtle: rgba(15, 23, 42, 0.08);
|
| 55 |
+
--border-default: rgba(15, 23, 42, 0.12);
|
| 56 |
+
--border-strong: rgba(15, 23, 42, 0.18);
|
| 57 |
+
--text-primary: #0B1120;
|
| 58 |
+
--text-secondary: #1E293B;
|
| 59 |
+
--text-muted: #475569;
|
| 60 |
+
--accent-primary: #F59E0B;
|
| 61 |
+
--accent-secondary: #2563EB;
|
| 62 |
+
--accent-tertiary: #111827;
|
| 63 |
+
--glow-primary: rgba(245, 158, 11, 0.25);
|
| 64 |
+
--glow-secondary: rgba(37, 99, 235, 0.2);
|
| 65 |
+
--glow-tertiary: rgba(15, 23, 42, 0.18);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
html.light [style*="color: white"],
|
| 69 |
+
html.light [style*="color:white"],
|
| 70 |
+
html.light [style*="#FFFFFF"],
|
| 71 |
+
html.light [style*="#ffffff"] {
|
| 72 |
+
color: var(--text-primary) !important;
|
| 73 |
+
}
|
| 74 |
|
| 75 |
/* Global font and background */
|
| 76 |
+
html, body, .gradio-container {
|
| 77 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
|
| 78 |
background: var(--bg-primary) !important;
|
|
|
|
| 79 |
}
|
| 80 |
|
| 81 |
/* Headers and text */
|
|
|
|
| 86 |
}
|
| 87 |
|
| 88 |
p, span, div, li, ul li {
|
|
|
|
| 89 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 90 |
}
|
| 91 |
|
| 92 |
/* Labels and info text */
|
| 93 |
label {
|
|
|
|
| 94 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 95 |
}
|
| 96 |
|
| 97 |
.gr-box label {
|
|
|
|
| 98 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 99 |
}
|
| 100 |
|
|
|
|
| 181 |
|
| 182 |
/* Radio button labels */
|
| 183 |
input[type="radio"] + label {
|
| 184 |
+
color: var(--text-primary) !important;
|
| 185 |
}
|
| 186 |
|
| 187 |
input[type="radio"]:checked {
|
|
|
|
| 194 |
.dropdown {
|
| 195 |
border-color: var(--border-default) !important;
|
| 196 |
background: var(--bg-card) !important;
|
|
|
|
| 197 |
transition: all 0.2s ease !important;
|
| 198 |
}
|
| 199 |
|
| 200 |
/* Dropdown option styling */
|
| 201 |
.dropdown option {
|
| 202 |
background: var(--bg-card) !important;
|
|
|
|
| 203 |
}
|
| 204 |
|
| 205 |
/* Gradio dropdown specific styling */
|
| 206 |
.gradio-dropdown select,
|
| 207 |
.gradio-dropdown [role="combobox"],
|
| 208 |
.gradio-dropdown input {
|
|
|
|
| 209 |
background: var(--bg-card) !important;
|
| 210 |
}
|
| 211 |
|
| 212 |
.gradio-dropdown option {
|
|
|
|
| 213 |
background: var(--bg-card) !important;
|
| 214 |
}
|
| 215 |
|
|
|
|
| 229 |
overflow-y: auto !important;
|
| 230 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 231 |
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
|
|
|
|
| 232 |
}
|
| 233 |
|
| 234 |
/* Table cells and headers */
|
| 235 |
.dataframe td,
|
| 236 |
.dataframe th {
|
|
|
|
| 237 |
}
|
| 238 |
|
| 239 |
/* Button styling */
|
| 240 |
button {
|
| 241 |
background: var(--bg-card) !important;
|
|
|
|
| 242 |
border: 1px solid var(--border-default) !important;
|
| 243 |
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
|
| 244 |
}
|
|
|
|
| 379 |
display: inline-block !important;
|
| 380 |
padding: 14px 28px !important;
|
| 381 |
background: #ffd21e !important;
|
| 382 |
+
color: var(--text-primary) !important;
|
| 383 |
text-decoration: none !important;
|
| 384 |
border-radius: 16px !important;
|
| 385 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
|
|
|
| 398 |
transform: translateY(-3px) !important;
|
| 399 |
box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
|
| 400 |
background: #ffd21e !important;
|
| 401 |
+
color: var(--text-primary) !important;
|
| 402 |
text-decoration: none !important;
|
| 403 |
text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
|
| 404 |
}
|
|
|
|
| 440 |
border-color: #ffd21e !important;
|
| 441 |
box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
|
| 442 |
text-decoration: none !important;
|
| 443 |
+
color: var(--text-primary) !important;
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
/* Ensure key hero/body text stays bright */
|
| 447 |
+
.hero-subtitle,
|
| 448 |
+
.section-lead,
|
| 449 |
+
.section-subtitle,
|
| 450 |
+
.criteria-card li,
|
| 451 |
+
.scenario-body,
|
| 452 |
+
.hero-action-button,
|
| 453 |
+
.hero-action-button span {
|
| 454 |
+
color: #FFFFFF !important;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
/* Language toggle button */
|
| 458 |
+
#lang-toggle-btn button,
|
| 459 |
+
#lang-toggle-btn {
|
| 460 |
color: #FFFFFF !important;
|
| 461 |
+
border-color: #ffd21e !important;
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
.hero-action-button {
|
| 465 |
+
border-color: #ffd21e !important;
|
| 466 |
}
|
| 467 |
|
| 468 |
/* Numeric content styling */
|
| 469 |
.numeric-cell, .metric-value, .rank-value,
|
| 470 |
.level-tile-score, .core-metric-card .metric-value {
|
| 471 |
+
color: var(--text-primary) !important;
|
| 472 |
font-family: 'Geist Mono', monospace !important;
|
| 473 |
}
|
| 474 |
|
| 475 |
/* Table content */
|
| 476 |
td, th, table * {
|
| 477 |
+
color: var(--text-primary) !important;
|
| 478 |
}
|
| 479 |
|
| 480 |
/* All numeric and data elements */
|
| 481 |
.performance-card *, .v2-styled-table *, .dataframe * {
|
| 482 |
+
color: var(--text-primary) !important;
|
| 483 |
}
|
| 484 |
|
| 485 |
/* Enhanced dropdown styling - more specific selectors
|
|
|
|
| 492 |
.model-dropdown [role="combobox"],
|
| 493 |
.model-dropdown button {
|
| 494 |
background: rgba(1, 9, 26, 0.95) !important;
|
|
|
|
| 495 |
border: 1px solid var(--border-default) !important;
|
| 496 |
border-radius: 8px !important;
|
| 497 |
}
|
| 498 |
+
|
| 499 |
.gradio-dropdown option,
|
| 500 |
.model-dropdown option {
|
| 501 |
background: rgba(1, 9, 26, 0.95) !important;
|
|
|
|
| 502 |
}
|
| 503 |
|
| 504 |
/* Force dropdown text color */
|
| 505 |
/* .gradio-dropdown *, .model-dropdown * {
|
| 506 |
+
color: var(--text-primary) !important;
|
| 507 |
} */
|
| 508 |
|
| 509 |
/* Gradio 5.x compatible dropdown styling */
|
|
|
|
| 511 |
.gradio-container [data-testid="dropdown"],
|
| 512 |
.gradio-container select {
|
| 513 |
background-color: rgba(1, 9, 26, 0.95) !important;
|
|
|
|
| 514 |
border: 1px solid rgba(245, 246, 247, 0.12) !important;
|
| 515 |
}
|
| 516 |
+
|
| 517 |
.gradio-container .gradio-dropdown option,
|
| 518 |
.gradio-container select option {
|
| 519 |
background-color: rgba(1, 9, 26, 0.95) !important;
|
|
|
|
| 520 |
}
|
| 521 |
+
|
| 522 |
/* Target the actual visible text in dropdown */
|
| 523 |
.gradio-container [role="combobox"],
|
| 524 |
.gradio-container .gradio-dropdown .wrap > div {
|
|
|
|
| 525 |
background-color: rgba(1, 9, 26, 0.95) !important;
|
| 526 |
}
|
| 527 |
|
| 528 |
+
html.light .model-dropdown .gradio-dropdown,
|
| 529 |
+
html.light .model-dropdown [role="combobox"],
|
| 530 |
+
html.light .model-dropdown button,
|
| 531 |
+
html.light .gradio-container [data-testid="dropdown"],
|
| 532 |
+
html.light .gradio-container select,
|
| 533 |
+
html.light .gradio-container [role="combobox"],
|
| 534 |
+
html.light .gradio-container .gradio-dropdown .wrap > div {
|
| 535 |
+
background-color: rgba(255, 255, 255, 0.95) !important;
|
| 536 |
+
border-color: rgba(15, 23, 42, 0.12) !important;
|
| 537 |
+
box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08) !important;
|
| 538 |
+
}
|
| 539 |
+
|
| 540 |
</style>
|
| 541 |
"""
|
tabs/leaderboard_v1_en.py
CHANGED
|
@@ -224,36 +224,36 @@ def create_leaderboard_v2_tab():
|
|
| 224 |
# Level metadata for the 7-stage task framework
|
| 225 |
level_details = {
|
| 226 |
"ALL": {
|
| 227 |
-
"title": "<span style='font-family: \"
|
| 228 |
-
"description": "<span style='font-family: \"Nanum Gothic\", sans-serif !important;'>
|
| 229 |
},
|
| 230 |
"L1": {
|
| 231 |
-
"title": "<span style='color:
|
| 232 |
-
"description": "<span style='color:
|
| 233 |
},
|
| 234 |
"L2": {
|
| 235 |
-
"title": "<span style='color:
|
| 236 |
-
"description": "<span style='color:
|
| 237 |
},
|
| 238 |
"L3": {
|
| 239 |
-
"title": "<span style='color:
|
| 240 |
-
"description": "<span style='color:
|
| 241 |
},
|
| 242 |
"L4": {
|
| 243 |
-
"title": "<span style='color:
|
| 244 |
-
"description": "<span style='color:
|
| 245 |
},
|
| 246 |
"L5": {
|
| 247 |
-
"title": "<span style='color:
|
| 248 |
-
"description": "<span style='color:
|
| 249 |
},
|
| 250 |
"L6": {
|
| 251 |
-
"title": "<span style='color:
|
| 252 |
-
"description": "<span style='color:
|
| 253 |
},
|
| 254 |
"L7": {
|
| 255 |
-
"title": "<span style='color:
|
| 256 |
-
"description": "<span style='color:
|
| 257 |
}
|
| 258 |
}
|
| 259 |
default_level = "ALL"
|
|
@@ -291,7 +291,7 @@ def create_leaderboard_v2_tab():
|
|
| 291 |
border-collapse: collapse;
|
| 292 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 293 |
background: var(--bg-card);
|
| 294 |
-
color:
|
| 295 |
}
|
| 296 |
|
| 297 |
.v2-styled-table thead {
|
|
@@ -305,7 +305,7 @@ def create_leaderboard_v2_tab():
|
|
| 305 |
padding: 14px 12px;
|
| 306 |
text-align: left;
|
| 307 |
font-weight: 600;
|
| 308 |
-
color:
|
| 309 |
border-bottom: 2px solid var(--accent-primary);
|
| 310 |
font-size: 13px;
|
| 311 |
text-transform: uppercase;
|
|
@@ -319,7 +319,7 @@ def create_leaderboard_v2_tab():
|
|
| 319 |
.v2-styled-table td {
|
| 320 |
padding: 12px;
|
| 321 |
border-bottom: 1px solid var(--border-subtle);
|
| 322 |
-
color:
|
| 323 |
transition: all 0.2s ease;
|
| 324 |
}
|
| 325 |
|
|
@@ -339,30 +339,30 @@ def create_leaderboard_v2_tab():
|
|
| 339 |
|
| 340 |
.model-name {
|
| 341 |
font-weight: 500;
|
| 342 |
-
color:
|
| 343 |
transition: color 0.2s ease;
|
| 344 |
}
|
| 345 |
|
| 346 |
/* Keep model name color consistent on hover to emphasize row highlight */
|
| 347 |
.v2-styled-table tr:hover .model-name {
|
| 348 |
-
color:
|
| 349 |
}
|
| 350 |
|
| 351 |
.numeric-cell {
|
| 352 |
font-family: 'Geist Mono', monospace;
|
| 353 |
font-size: 13px;
|
| 354 |
text-align: center;
|
| 355 |
-
color:
|
| 356 |
}
|
| 357 |
|
| 358 |
.highlight-header {
|
| 359 |
background: rgba(255, 210, 30, 0.14);
|
| 360 |
-
color:
|
| 361 |
}
|
| 362 |
|
| 363 |
.highlight-cell {
|
| 364 |
background: rgba(255, 210, 30, 0.08);
|
| 365 |
-
color:
|
| 366 |
font-weight: 600;
|
| 367 |
}
|
| 368 |
</style>
|
|
@@ -460,8 +460,8 @@ def create_leaderboard_v2_tab():
|
|
| 460 |
return f"""
|
| 461 |
<div class="domain-selector-container leaderboard-intro">
|
| 462 |
<div class="domain-header">
|
| 463 |
-
<h2 class="domain-title"
|
| 464 |
-
<p class="domain-subtitle"
|
| 465 |
</div>
|
| 466 |
<div class="dataframe-container">
|
| 467 |
"""
|
|
@@ -511,6 +511,14 @@ def create_leaderboard_v2_tab():
|
|
| 511 |
# Load initial data
|
| 512 |
initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
|
| 513 |
initial_df = load_leaderboard_data() # Load raw data for model selector
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
|
| 515 |
initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
|
| 516 |
initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
|
|
@@ -601,7 +609,7 @@ def create_leaderboard_v2_tab():
|
|
| 601 |
border-collapse: collapse;
|
| 602 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 603 |
background: var(--bg-card);
|
| 604 |
-
color:
|
| 605 |
}
|
| 606 |
|
| 607 |
.v2-styled-table thead {
|
|
@@ -615,7 +623,7 @@ def create_leaderboard_v2_tab():
|
|
| 615 |
padding: 14px 12px;
|
| 616 |
text-align: left;
|
| 617 |
font-weight: 600;
|
| 618 |
-
color:
|
| 619 |
border-bottom: 2px solid var(--accent-primary);
|
| 620 |
font-size: 14px;
|
| 621 |
text-transform: uppercase;
|
|
@@ -626,7 +634,7 @@ def create_leaderboard_v2_tab():
|
|
| 626 |
.v2-styled-table td {
|
| 627 |
padding: 12px;
|
| 628 |
border-bottom: 1px solid var(--border-subtle);
|
| 629 |
-
color:
|
| 630 |
font-size: 14px;
|
| 631 |
transition: all 0.2s ease;
|
| 632 |
}
|
|
@@ -792,25 +800,35 @@ def create_leaderboard_v2_tab():
|
|
| 792 |
filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
|
| 793 |
}
|
| 794 |
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
|
|
|
| 798 |
border-radius: 0 !important;
|
| 799 |
overflow: hidden !important;
|
| 800 |
box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
|
| 801 |
-
position: relative !important;
|
| 802 |
-
left: 50% !important;
|
| 803 |
-
right: 50% !important;
|
| 804 |
-
margin-left: -50vw !important;
|
| 805 |
-
margin-right: -50vw !important;
|
| 806 |
-
max-width: none !important;
|
| 807 |
}
|
| 808 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
#hero-banner img {
|
| 810 |
-
width: 100
|
| 811 |
-
height: auto;
|
| 812 |
-
display: block;
|
| 813 |
-
object-fit: cover;
|
| 814 |
}
|
| 815 |
|
| 816 |
.hero-title {
|
|
@@ -821,13 +839,13 @@ def create_leaderboard_v2_tab():
|
|
| 821 |
-webkit-background-clip: text;
|
| 822 |
-webkit-text-fill-color: transparent;
|
| 823 |
margin-bottom: 1rem;
|
| 824 |
-
font-family: '
|
| 825 |
}
|
| 826 |
|
| 827 |
.hero-subtitle {
|
| 828 |
color: var(--text-secondary);
|
| 829 |
font-size: 3rem;
|
| 830 |
-
font-family: '
|
| 831 |
margin-top: 0;
|
| 832 |
}
|
| 833 |
|
|
@@ -849,7 +867,7 @@ def create_leaderboard_v2_tab():
|
|
| 849 |
background: rgba(245, 246, 247, 0.06) !important;
|
| 850 |
border: 1px solid var(--border-subtle) !important;
|
| 851 |
border-radius: 999px !important;
|
| 852 |
-
color:
|
| 853 |
text-decoration: none !important;
|
| 854 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 855 |
font-weight: 600 !important;
|
|
@@ -908,10 +926,10 @@ def create_leaderboard_v2_tab():
|
|
| 908 |
.section-title {
|
| 909 |
font-size: 3.75rem;
|
| 910 |
font-weight: 700;
|
| 911 |
-
color:
|
| 912 |
margin-bottom: 12px;
|
| 913 |
text-align: center !important;
|
| 914 |
-
font-family: '
|
| 915 |
}
|
| 916 |
|
| 917 |
.section-lead, .section-subtitle {
|
|
@@ -943,7 +961,7 @@ def create_leaderboard_v2_tab():
|
|
| 943 |
|
| 944 |
.phase-card h3 {
|
| 945 |
font-size: 1.44rem !important;
|
| 946 |
-
color:
|
| 947 |
margin-bottom: 20px;
|
| 948 |
font-weight: 700;
|
| 949 |
font-family: 'Nanum Gothic', sans-serif !important;
|
|
@@ -976,7 +994,7 @@ def create_leaderboard_v2_tab():
|
|
| 976 |
position: relative;
|
| 977 |
font-size: 1.2rem !important;
|
| 978 |
font-weight: 700;
|
| 979 |
-
color:
|
| 980 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 981 |
}
|
| 982 |
|
|
@@ -1054,7 +1072,7 @@ def create_leaderboard_v2_tab():
|
|
| 1054 |
.criteria-card h3 {
|
| 1055 |
font-size: 1.25rem;
|
| 1056 |
font-weight: 700;
|
| 1057 |
-
color:
|
| 1058 |
margin: 0;
|
| 1059 |
}
|
| 1060 |
|
|
@@ -1110,6 +1128,7 @@ def create_leaderboard_v2_tab():
|
|
| 1110 |
</style>
|
| 1111 |
""")
|
| 1112 |
|
|
|
|
| 1113 |
gr.Image(
|
| 1114 |
value="banner_wide.png",
|
| 1115 |
show_label=False,
|
|
@@ -1117,6 +1136,7 @@ def create_leaderboard_v2_tab():
|
|
| 1117 |
type="filepath",
|
| 1118 |
elem_id="hero-banner"
|
| 1119 |
)
|
|
|
|
| 1120 |
|
| 1121 |
gr.HTML("""
|
| 1122 |
<div style="text-align: center; padding: 20px 0;">
|
|
@@ -1129,21 +1149,21 @@ def create_leaderboard_v2_tab():
|
|
| 1129 |
gr.HTML("""
|
| 1130 |
<div class="hero-actions">
|
| 1131 |
<a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1132 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1133 |
<path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
|
| 1134 |
<line x1="8" y1="12" x2="16" y2="12"/>
|
| 1135 |
</svg>
|
| 1136 |
<span>Blog</span>
|
| 1137 |
</a>
|
| 1138 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1139 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1140 |
<path d="M9 19c-5 1.5-5-2.5-7-3"/>
|
| 1141 |
<path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
|
| 1142 |
</svg>
|
| 1143 |
<span>GitHub</span>
|
| 1144 |
</a>
|
| 1145 |
<a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1146 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1147 |
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
| 1148 |
<polyline points="7 10 12 15 17 10"/>
|
| 1149 |
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
@@ -1151,7 +1171,7 @@ def create_leaderboard_v2_tab():
|
|
| 1151 |
<span>Dataset</span>
|
| 1152 |
</a>
|
| 1153 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1154 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1155 |
<path d="M3 3v18h18"/>
|
| 1156 |
<path d="M7 17v-6"/>
|
| 1157 |
<path d="M12 17V7"/>
|
|
@@ -1166,7 +1186,7 @@ def create_leaderboard_v2_tab():
|
|
| 1166 |
gr.HTML("""
|
| 1167 |
<div class="dashboard-section">
|
| 1168 |
<div class="section-header">
|
| 1169 |
-
<h2 class="section-title" style="font-family: '
|
| 1170 |
</div>
|
| 1171 |
<p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">We analyzed agent capabilities across seven stagesโfrom simple tool calls to long-context retention and robustness.</p>
|
| 1172 |
<div class="phase-grid">
|
|
@@ -1176,11 +1196,11 @@ def create_leaderboard_v2_tab():
|
|
| 1176 |
<span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
|
| 1177 |
</div>
|
| 1178 |
<ul class="phase-list">
|
| 1179 |
-
<li style="color:
|
| 1180 |
-
<li style="color:
|
| 1181 |
-
<li style="color:
|
| 1182 |
-
<li style="color:
|
| 1183 |
-
<li style="color:
|
| 1184 |
</ul>
|
| 1185 |
</div>
|
| 1186 |
<div class="phase-card">
|
|
@@ -1189,8 +1209,8 @@ def create_leaderboard_v2_tab():
|
|
| 1189 |
<span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
|
| 1190 |
</div>
|
| 1191 |
<ul class="phase-list">
|
| 1192 |
-
<li style="color:
|
| 1193 |
-
<li style="color:
|
| 1194 |
</ul>
|
| 1195 |
</div>
|
| 1196 |
</div>
|
|
@@ -1204,7 +1224,7 @@ def create_leaderboard_v2_tab():
|
|
| 1204 |
<h2 class="section-title" style="font-size: 2.0rem;">High-quality scenario design tailored to 18 Korea-specific APIs and real-world use cases.</h2>
|
| 1205 |
</div>
|
| 1206 |
<div class="scenario-body">
|
| 1207 |
-
<p>We built realistic scenariosโsuch as appointment booking and blog review searchโby integrating APIs widely used in Korea including Naver Maps, Kakao services, and local websites.</p>
|
| 1208 |
</div>
|
| 1209 |
|
| 1210 |
</div>
|
|
@@ -1357,7 +1377,7 @@ def create_leaderboard_v2_tab():
|
|
| 1357 |
filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
|
| 1358 |
letter-spacing: 0.02em;
|
| 1359 |
animation: title-shimmer 1.25s ease-in-out infinite;
|
| 1360 |
-
font-family: '
|
| 1361 |
}
|
| 1362 |
|
| 1363 |
@keyframes title-shimmer {
|
|
@@ -1497,7 +1517,7 @@ def create_leaderboard_v2_tab():
|
|
| 1497 |
.filter-group .gr-input-label {
|
| 1498 |
font-size: 1rem !important;
|
| 1499 |
font-weight: 600 !important;
|
| 1500 |
-
color:
|
| 1501 |
text-align: center !important;
|
| 1502 |
margin-bottom: 12px !important;
|
| 1503 |
}
|
|
@@ -1505,7 +1525,7 @@ def create_leaderboard_v2_tab():
|
|
| 1505 |
.filter-group-label {
|
| 1506 |
font-size: 1rem !important;
|
| 1507 |
font-weight: 600 !important;
|
| 1508 |
-
color:
|
| 1509 |
text-align: left !important;
|
| 1510 |
margin: 0 !important;
|
| 1511 |
font-family: 'Geist', sans-serif !important;
|
|
@@ -1536,7 +1556,7 @@ def create_leaderboard_v2_tab():
|
|
| 1536 |
text-align: center !important;
|
| 1537 |
position: relative !important;
|
| 1538 |
overflow: hidden !important;
|
| 1539 |
-
color:
|
| 1540 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1541 |
font-weight: 600 !important;
|
| 1542 |
font-size: 0.95rem !important;
|
|
@@ -1649,7 +1669,7 @@ def create_leaderboard_v2_tab():
|
|
| 1649 |
border: 1px solid #333333 !important;
|
| 1650 |
border-radius: 999px !important;
|
| 1651 |
padding: 12px 24px !important;
|
| 1652 |
-
color: #
|
| 1653 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1654 |
font-weight: 600 !important;
|
| 1655 |
font-size: 1rem !important;
|
|
@@ -1680,7 +1700,7 @@ def create_leaderboard_v2_tab():
|
|
| 1680 |
background: #000000 !important;
|
| 1681 |
border: 1px solid #333333 !important;
|
| 1682 |
border-radius: 999px !important;
|
| 1683 |
-
color: #
|
| 1684 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1685 |
font-weight: 600 !important;
|
| 1686 |
font-size: 0.95rem !important;
|
|
@@ -1707,7 +1727,7 @@ def create_leaderboard_v2_tab():
|
|
| 1707 |
.model-dropdown .tag {
|
| 1708 |
background: rgba(255, 210, 30, 0.18) !important;
|
| 1709 |
border: 1px solid rgba(255, 210, 30, 0.35) !important;
|
| 1710 |
-
color:
|
| 1711 |
border-radius: 999px !important;
|
| 1712 |
padding: 4px 10px !important;
|
| 1713 |
font-size: 0.85rem !important;
|
|
@@ -1782,7 +1802,7 @@ def create_leaderboard_v2_tab():
|
|
| 1782 |
font-size: 1.5rem;
|
| 1783 |
margin-bottom: 4px;
|
| 1784 |
display: block;
|
| 1785 |
-
filter: drop-shadow(0 0 10px
|
| 1786 |
}
|
| 1787 |
|
| 1788 |
.domain-name {
|
|
@@ -1797,7 +1817,7 @@ def create_leaderboard_v2_tab():
|
|
| 1797 |
top: 8px;
|
| 1798 |
right: 8px;
|
| 1799 |
background: var(--accent-primary);
|
| 1800 |
-
color:
|
| 1801 |
font-size: 0.75rem;
|
| 1802 |
padding: 2px 8px;
|
| 1803 |
border-radius: 12px;
|
|
@@ -1888,7 +1908,7 @@ def create_leaderboard_v2_tab():
|
|
| 1888 |
font-size: 0.85rem !important;
|
| 1889 |
margin-bottom: 8px !important;
|
| 1890 |
font-weight: 600 !important;
|
| 1891 |
-
color:
|
| 1892 |
display: block !important;
|
| 1893 |
}
|
| 1894 |
|
|
@@ -1921,7 +1941,7 @@ def create_leaderboard_v2_tab():
|
|
| 1921 |
.compact-radio .wrap > label:has(input[type="radio"]:checked) {
|
| 1922 |
background: transparent !important;
|
| 1923 |
border-color: var(--accent-primary) !important;
|
| 1924 |
-
color:
|
| 1925 |
font-weight: 600 !important;
|
| 1926 |
}
|
| 1927 |
|
|
@@ -1942,7 +1962,7 @@ def create_leaderboard_v2_tab():
|
|
| 1942 |
.domain-radio label[aria-checked="true"] {
|
| 1943 |
background: transparent !important;
|
| 1944 |
border-color: var(--accent-primary) !important;
|
| 1945 |
-
color:
|
| 1946 |
font-weight: 600 !important;
|
| 1947 |
}
|
| 1948 |
|
|
@@ -2031,7 +2051,7 @@ def create_leaderboard_v2_tab():
|
|
| 2031 |
border: 1px solid var(--border-subtle) !important;
|
| 2032 |
border-radius: 20px !important;
|
| 2033 |
font-size: 0.85rem !important;
|
| 2034 |
-
color:
|
| 2035 |
transition: all 0.2s ease !important;
|
| 2036 |
cursor: pointer !important;
|
| 2037 |
}
|
|
@@ -2045,7 +2065,7 @@ def create_leaderboard_v2_tab():
|
|
| 2045 |
.inline-radio label[aria-checked="true"] {
|
| 2046 |
background: rgba(255, 210, 30, 0.2) !important;
|
| 2047 |
border-color: var(--accent-primary) !important;
|
| 2048 |
-
color:
|
| 2049 |
font-weight: 600 !important;
|
| 2050 |
}
|
| 2051 |
</style>
|
|
@@ -2058,7 +2078,7 @@ def create_leaderboard_v2_tab():
|
|
| 2058 |
leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
|
| 2059 |
|
| 2060 |
# Integrated controls within leaderboard section - stacked vertically
|
| 2061 |
-
gr.HTML("<p style='color:
|
| 2062 |
domain_filter = gr.Radio(
|
| 2063 |
choices=level_options,
|
| 2064 |
value=default_level,
|
|
@@ -2068,10 +2088,10 @@ def create_leaderboard_v2_tab():
|
|
| 2068 |
elem_classes=["domain-radio", "inline-radio"]
|
| 2069 |
)
|
| 2070 |
|
| 2071 |
-
gr.HTML("<p style='color:
|
| 2072 |
with gr.Row():
|
| 2073 |
with gr.Column(scale=1):
|
| 2074 |
-
gr.HTML("<span style='color:
|
| 2075 |
model_type_filter = gr.Radio(
|
| 2076 |
choices=["All", "OSS", "API"],
|
| 2077 |
value="All",
|
|
@@ -2080,7 +2100,7 @@ def create_leaderboard_v2_tab():
|
|
| 2080 |
container=False
|
| 2081 |
)
|
| 2082 |
with gr.Column(scale=1):
|
| 2083 |
-
gr.HTML("<span style='color:
|
| 2084 |
sort_order = gr.Radio(
|
| 2085 |
choices=["Descending", "Ascending"],
|
| 2086 |
value="Descending",
|
|
@@ -2095,12 +2115,12 @@ def create_leaderboard_v2_tab():
|
|
| 2095 |
gr.HTML("""
|
| 2096 |
<div class="domain-selector-container domain-performance-container">
|
| 2097 |
<div class="domain-header">
|
| 2098 |
-
<h2 class="domain-title"
|
| 2099 |
-
<p class="domain-subtitle" style="color:
|
| 2100 |
</div>
|
| 2101 |
""")
|
| 2102 |
|
| 2103 |
-
gr.HTML("<p
|
| 2104 |
# gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>You can select up to five models.</p>")
|
| 2105 |
model_selector = gr.Dropdown(
|
| 2106 |
choices=initial_df['Model'].tolist()[:10],
|
|
@@ -2278,8 +2298,8 @@ def create_leaderboard_v2_tab():
|
|
| 2278 |
gr.HTML("""
|
| 2279 |
<div class="domain-selector-container performance-card-container">
|
| 2280 |
<div class="domain-header">
|
| 2281 |
-
<h2 class="domain-title"
|
| 2282 |
-
<p class="domain-subtitle" style="color:
|
| 2283 |
Explore detailed performance cards that visualize six core metrics plus overall SR across L1โL7 levels.
|
| 2284 |
</p>
|
| 2285 |
<p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
|
|
@@ -2292,7 +2312,7 @@ def create_leaderboard_v2_tab():
|
|
| 2292 |
|
| 2293 |
with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
|
| 2294 |
gr.HTML("""
|
| 2295 |
-
<p class="domain-subtitle" style="color:
|
| 2296 |
|
| 2297 |
""")
|
| 2298 |
card_model_selector = gr.Dropdown(
|
|
@@ -2329,15 +2349,11 @@ def create_leaderboard_v2_tab():
|
|
| 2329 |
gr.HTML("""
|
| 2330 |
<div class="domain-selector-container domain-performance-container level-metrics-wrapper">
|
| 2331 |
<div class="domain-header">
|
| 2332 |
-
<h2 class="domain-title"
|
| 2333 |
-
<p class="domain-subtitle" style="color:
|
| 2334 |
</div>
|
| 2335 |
""")
|
| 2336 |
|
| 2337 |
-
gr.HTML("""
|
| 2338 |
-
<p style="color: white; text-align: center; margin: 0 0 20px 0; font-size: 1.2rem; font-family: \'Nanum Gothic\', sans-serif;">Select a level and up to five models to explore detailed metrics.</p>
|
| 2339 |
-
""")
|
| 2340 |
-
|
| 2341 |
with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
|
| 2342 |
level_metric_selector = gr.Dropdown(
|
| 2343 |
choices=level_ids,
|
|
@@ -2373,8 +2389,8 @@ def create_leaderboard_v2_tab():
|
|
| 2373 |
# gr.HTML("""
|
| 2374 |
# <div class="domain-selector-container domain-performance-container heatmap-wrapper">
|
| 2375 |
# <div class="domain-header">
|
| 2376 |
-
# <h2 class="domain-title"
|
| 2377 |
-
# <p class="domain-subtitle"
|
| 2378 |
# </div>
|
| 2379 |
# <div class="chart-container heatmap-chart-container">
|
| 2380 |
# """)
|
|
@@ -2665,7 +2681,7 @@ def create_leaderboard_v2_tab():
|
|
| 2665 |
font-size: 1.9rem;
|
| 2666 |
font-weight: 800;
|
| 2667 |
letter-spacing: 0.01em;
|
| 2668 |
-
color:
|
| 2669 |
}
|
| 2670 |
|
| 2671 |
.meta-line {
|
|
@@ -2677,7 +2693,7 @@ def create_leaderboard_v2_tab():
|
|
| 2677 |
}
|
| 2678 |
|
| 2679 |
.meta-line span {
|
| 2680 |
-
color:
|
| 2681 |
font-weight: 600;
|
| 2682 |
}
|
| 2683 |
|
|
@@ -2717,7 +2733,7 @@ def create_leaderboard_v2_tab():
|
|
| 2717 |
.rank-value {
|
| 2718 |
font-size: 2.4rem;
|
| 2719 |
font-weight: 800;
|
| 2720 |
-
color:
|
| 2721 |
letter-spacing: 0.04em;
|
| 2722 |
}
|
| 2723 |
|
|
@@ -2822,7 +2838,7 @@ def create_leaderboard_v2_tab():
|
|
| 2822 |
border: 1px solid #333333 !important;
|
| 2823 |
border-radius: 999px !important;
|
| 2824 |
padding: 12px 20px !important;
|
| 2825 |
-
color: #
|
| 2826 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 2827 |
font-weight: 600 !important;
|
| 2828 |
font-size: 0.95rem !important;
|
|
@@ -2851,7 +2867,7 @@ def create_leaderboard_v2_tab():
|
|
| 2851 |
.level-model-dropdown button {
|
| 2852 |
background: #000000 !important;
|
| 2853 |
border: 1px solid #333333 !important;
|
| 2854 |
-
color: #
|
| 2855 |
}
|
| 2856 |
|
| 2857 |
.radar-placeholder {
|
|
@@ -2912,7 +2928,7 @@ def create_leaderboard_v2_tab():
|
|
| 2912 |
.core-metric-card .metric-value {
|
| 2913 |
font-size: 1.8rem;
|
| 2914 |
font-weight: 700;
|
| 2915 |
-
color:
|
| 2916 |
font-family: 'Geist Mono', monospace;
|
| 2917 |
}
|
| 2918 |
|
|
@@ -2945,7 +2961,7 @@ def create_leaderboard_v2_tab():
|
|
| 2945 |
.level-tile-score {
|
| 2946 |
font-size: 1.25rem;
|
| 2947 |
font-weight: 700;
|
| 2948 |
-
color:
|
| 2949 |
font-family: 'Geist Mono', monospace;
|
| 2950 |
}
|
| 2951 |
@media (max-width: 980px) {
|
|
@@ -3029,20 +3045,20 @@ def create_leaderboard_v2_tab():
|
|
| 3029 |
h2.section-title,
|
| 3030 |
.dashboard-section .section-title,
|
| 3031 |
.section-header .section-title {
|
| 3032 |
-
font-family: "
|
| 3033 |
}
|
| 3034 |
|
| 3035 |
.domain-title,
|
| 3036 |
h2.domain-title,
|
| 3037 |
.domain-header .domain-title {
|
| 3038 |
-
font-family: "
|
| 3039 |
}
|
| 3040 |
|
| 3041 |
.hero-title,
|
| 3042 |
.hero-subtitle,
|
| 3043 |
h1.hero-title,
|
| 3044 |
p.hero-subtitle {
|
| 3045 |
-
font-family: "
|
| 3046 |
font-size: 2rem; !important;
|
| 3047 |
}
|
| 3048 |
|
|
@@ -3236,8 +3252,8 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
|
|
| 3236 |
palette = [
|
| 3237 |
{'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
|
| 3238 |
{'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
|
| 3239 |
-
{'fill': 'rgba(
|
| 3240 |
-
{'fill': 'rgba(
|
| 3241 |
{'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
|
| 3242 |
]
|
| 3243 |
|
|
@@ -3362,16 +3378,7 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
|
|
| 3362 |
width=900,
|
| 3363 |
margin=dict(t=30, b=50, l=10, r=10),
|
| 3364 |
autosize=True,
|
| 3365 |
-
annotations=[
|
| 3366 |
-
dict(
|
| 3367 |
-
text="Galileo Agent Leaderboard",
|
| 3368 |
-
xref="paper", yref="paper",
|
| 3369 |
-
x=0.98, y=0.02,
|
| 3370 |
-
xanchor='right', yanchor='bottom',
|
| 3371 |
-
font=dict(size=10, color='#64748B'),
|
| 3372 |
-
showarrow=False
|
| 3373 |
-
)
|
| 3374 |
-
]
|
| 3375 |
)
|
| 3376 |
|
| 3377 |
return fig
|
|
@@ -3630,8 +3637,8 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
|
|
| 3630 |
model_palette = [
|
| 3631 |
'#ffd21e',
|
| 3632 |
'#FF8A3C',
|
| 3633 |
-
'#
|
| 3634 |
-
'#
|
| 3635 |
'#F8FAFC',
|
| 3636 |
'#38BDF8',
|
| 3637 |
]
|
|
|
|
| 224 |
# Level metadata for the 7-stage task framework
|
| 225 |
level_details = {
|
| 226 |
"ALL": {
|
| 227 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>ALL ยท All Tasks</span>",
|
| 228 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>First, observe the overall average performance across all seven tasks. This average should then be utilized as a baseline to conduct a more detailed per-level comparison.</span>"
|
| 229 |
},
|
| 230 |
"L1": {
|
| 231 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L1 ยท Single Tool Call</span>",
|
| 232 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Evaluates single tool invocation capability and basic command execution accuracy.</span>"
|
| 233 |
},
|
| 234 |
"L2": {
|
| 235 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L2 ยท Tool Selection</span>",
|
| 236 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Measures the ability to choose the right tool and invoke it with appropriate parameters.</span>"
|
| 237 |
},
|
| 238 |
"L3": {
|
| 239 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L3 ยท Sequential Tool Reasoning</span>",
|
| 240 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Validates multi-step sequential reasoning for solving tasks.</span>"
|
| 241 |
},
|
| 242 |
"L4": {
|
| 243 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L4 ยท Parallel Tool Reasoning</span>",
|
| 244 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Evaluates the ability to integrate and summarize information from multiple sources in parallel.</span>"
|
| 245 |
},
|
| 246 |
"L5": {
|
| 247 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L5 ยท Error Handling & Robustness</span>",
|
| 248 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Checks awareness of unexpected failures and the strategies used to recover.</span>"
|
| 249 |
},
|
| 250 |
"L6": {
|
| 251 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L6 ยท Efficient Tool Utilization</span>",
|
| 252 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Examines operational efficiency in achieving goals with minimal calls and cost.</span>"
|
| 253 |
},
|
| 254 |
"L7": {
|
| 255 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L7 ยท Long-Context Memory</span>",
|
| 256 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Analyzes the ability to retain and leverage long conversational context.</span>"
|
| 257 |
}
|
| 258 |
}
|
| 259 |
default_level = "ALL"
|
|
|
|
| 291 |
border-collapse: collapse;
|
| 292 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 293 |
background: var(--bg-card);
|
| 294 |
+
color: #FFFFFF;
|
| 295 |
}
|
| 296 |
|
| 297 |
.v2-styled-table thead {
|
|
|
|
| 305 |
padding: 14px 12px;
|
| 306 |
text-align: left;
|
| 307 |
font-weight: 600;
|
| 308 |
+
color: #FFFFFF;
|
| 309 |
border-bottom: 2px solid var(--accent-primary);
|
| 310 |
font-size: 13px;
|
| 311 |
text-transform: uppercase;
|
|
|
|
| 319 |
.v2-styled-table td {
|
| 320 |
padding: 12px;
|
| 321 |
border-bottom: 1px solid var(--border-subtle);
|
| 322 |
+
color: #FFFFFF;
|
| 323 |
transition: all 0.2s ease;
|
| 324 |
}
|
| 325 |
|
|
|
|
| 339 |
|
| 340 |
.model-name {
|
| 341 |
font-weight: 500;
|
| 342 |
+
color: #FFFFFF;
|
| 343 |
transition: color 0.2s ease;
|
| 344 |
}
|
| 345 |
|
| 346 |
/* Keep model name color consistent on hover to emphasize row highlight */
|
| 347 |
.v2-styled-table tr:hover .model-name {
|
| 348 |
+
color: #FFFFFF;
|
| 349 |
}
|
| 350 |
|
| 351 |
.numeric-cell {
|
| 352 |
font-family: 'Geist Mono', monospace;
|
| 353 |
font-size: 13px;
|
| 354 |
text-align: center;
|
| 355 |
+
color: #FFFFFF;
|
| 356 |
}
|
| 357 |
|
| 358 |
.highlight-header {
|
| 359 |
background: rgba(255, 210, 30, 0.14);
|
| 360 |
+
color: #FFFFFF;
|
| 361 |
}
|
| 362 |
|
| 363 |
.highlight-cell {
|
| 364 |
background: rgba(255, 210, 30, 0.08);
|
| 365 |
+
color: #FFFFFF;
|
| 366 |
font-weight: 600;
|
| 367 |
}
|
| 368 |
</style>
|
|
|
|
| 460 |
return f"""
|
| 461 |
<div class="domain-selector-container leaderboard-intro">
|
| 462 |
<div class="domain-header">
|
| 463 |
+
<h2 class="domain-title" >Agent Leaderboard ยท {level_title}</h2>
|
| 464 |
+
<p class="domain-subtitle" >{level_description}</p>
|
| 465 |
</div>
|
| 466 |
<div class="dataframe-container">
|
| 467 |
"""
|
|
|
|
| 511 |
# Load initial data
|
| 512 |
initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
|
| 513 |
initial_df = load_leaderboard_data() # Load raw data for model selector
|
| 514 |
+
if not initial_df.empty:
|
| 515 |
+
overall_success_numeric = pd.to_numeric(initial_df.get('Overall Success'), errors='coerce')
|
| 516 |
+
if overall_success_numeric.notna().any():
|
| 517 |
+
initial_df = initial_df.assign(**{'Overall Success': overall_success_numeric}).sort_values(
|
| 518 |
+
'Overall Success', ascending=False, na_position='last'
|
| 519 |
+
)
|
| 520 |
+
else:
|
| 521 |
+
initial_df = initial_df.sort_values('Model')
|
| 522 |
initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
|
| 523 |
initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
|
| 524 |
initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
|
|
|
|
| 609 |
border-collapse: collapse;
|
| 610 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 611 |
background: var(--bg-card);
|
| 612 |
+
color: #FFFFFF;
|
| 613 |
}
|
| 614 |
|
| 615 |
.v2-styled-table thead {
|
|
|
|
| 623 |
padding: 14px 12px;
|
| 624 |
text-align: left;
|
| 625 |
font-weight: 600;
|
| 626 |
+
color: #FFFFFF;
|
| 627 |
border-bottom: 2px solid var(--accent-primary);
|
| 628 |
font-size: 14px;
|
| 629 |
text-transform: uppercase;
|
|
|
|
| 634 |
.v2-styled-table td {
|
| 635 |
padding: 12px;
|
| 636 |
border-bottom: 1px solid var(--border-subtle);
|
| 637 |
+
color: #FFFFFF;
|
| 638 |
font-size: 14px;
|
| 639 |
transition: all 0.2s ease;
|
| 640 |
}
|
|
|
|
| 800 |
filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
|
| 801 |
}
|
| 802 |
|
| 803 |
+
.hero-banner-wrapper {
|
| 804 |
+
position: relative;
|
| 805 |
+
width: 100vw;
|
| 806 |
+
margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%);
|
| 807 |
border-radius: 0 !important;
|
| 808 |
overflow: hidden !important;
|
| 809 |
box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 810 |
}
|
| 811 |
+
|
| 812 |
+
.hero-banner-wrapper::before {
|
| 813 |
+
content: "";
|
| 814 |
+
position: absolute;
|
| 815 |
+
inset: 0;
|
| 816 |
+
background: #01091A;
|
| 817 |
+
z-index: 0;
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
#hero-banner {
|
| 821 |
+
position: relative;
|
| 822 |
+
width: 100% !important;
|
| 823 |
+
height: auto !important;
|
| 824 |
+
z-index: 1;
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
#hero-banner img {
|
| 828 |
+
width: 100% !important;
|
| 829 |
+
height: auto !important;
|
| 830 |
+
display: block !important;
|
| 831 |
+
object-fit: cover !important;
|
| 832 |
}
|
| 833 |
|
| 834 |
.hero-title {
|
|
|
|
| 839 |
-webkit-background-clip: text;
|
| 840 |
-webkit-text-fill-color: transparent;
|
| 841 |
margin-bottom: 1rem;
|
| 842 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 843 |
}
|
| 844 |
|
| 845 |
.hero-subtitle {
|
| 846 |
color: var(--text-secondary);
|
| 847 |
font-size: 3rem;
|
| 848 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 849 |
margin-top: 0;
|
| 850 |
}
|
| 851 |
|
|
|
|
| 867 |
background: rgba(245, 246, 247, 0.06) !important;
|
| 868 |
border: 1px solid var(--border-subtle) !important;
|
| 869 |
border-radius: 999px !important;
|
| 870 |
+
color: #FFFFFF !important;
|
| 871 |
text-decoration: none !important;
|
| 872 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 873 |
font-weight: 600 !important;
|
|
|
|
| 926 |
.section-title {
|
| 927 |
font-size: 3.75rem;
|
| 928 |
font-weight: 700;
|
| 929 |
+
color: #FFFFFF;
|
| 930 |
margin-bottom: 12px;
|
| 931 |
text-align: center !important;
|
| 932 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 933 |
}
|
| 934 |
|
| 935 |
.section-lead, .section-subtitle {
|
|
|
|
| 961 |
|
| 962 |
.phase-card h3 {
|
| 963 |
font-size: 1.44rem !important;
|
| 964 |
+
color: #FFFFFF;
|
| 965 |
margin-bottom: 20px;
|
| 966 |
font-weight: 700;
|
| 967 |
font-family: 'Nanum Gothic', sans-serif !important;
|
|
|
|
| 994 |
position: relative;
|
| 995 |
font-size: 1.2rem !important;
|
| 996 |
font-weight: 700;
|
| 997 |
+
color: #FFFFFF !important;
|
| 998 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 999 |
}
|
| 1000 |
|
|
|
|
| 1072 |
.criteria-card h3 {
|
| 1073 |
font-size: 1.25rem;
|
| 1074 |
font-weight: 700;
|
| 1075 |
+
color: #FFFFFF;
|
| 1076 |
margin: 0;
|
| 1077 |
}
|
| 1078 |
|
|
|
|
| 1128 |
</style>
|
| 1129 |
""")
|
| 1130 |
|
| 1131 |
+
gr.HTML("<div class='hero-banner-wrapper'>")
|
| 1132 |
gr.Image(
|
| 1133 |
value="banner_wide.png",
|
| 1134 |
show_label=False,
|
|
|
|
| 1136 |
type="filepath",
|
| 1137 |
elem_id="hero-banner"
|
| 1138 |
)
|
| 1139 |
+
gr.HTML("</div>")
|
| 1140 |
|
| 1141 |
gr.HTML("""
|
| 1142 |
<div style="text-align: center; padding: 20px 0;">
|
|
|
|
| 1149 |
gr.HTML("""
|
| 1150 |
<div class="hero-actions">
|
| 1151 |
<a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1152 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1153 |
<path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
|
| 1154 |
<line x1="8" y1="12" x2="16" y2="12"/>
|
| 1155 |
</svg>
|
| 1156 |
<span>Blog</span>
|
| 1157 |
</a>
|
| 1158 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1159 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1160 |
<path d="M9 19c-5 1.5-5-2.5-7-3"/>
|
| 1161 |
<path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
|
| 1162 |
</svg>
|
| 1163 |
<span>GitHub</span>
|
| 1164 |
</a>
|
| 1165 |
<a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1166 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1167 |
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
| 1168 |
<polyline points="7 10 12 15 17 10"/>
|
| 1169 |
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
|
|
| 1171 |
<span>Dataset</span>
|
| 1172 |
</a>
|
| 1173 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1174 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1175 |
<path d="M3 3v18h18"/>
|
| 1176 |
<path d="M7 17v-6"/>
|
| 1177 |
<path d="M12 17V7"/>
|
|
|
|
| 1186 |
gr.HTML("""
|
| 1187 |
<div class="dashboard-section">
|
| 1188 |
<div class="section-header">
|
| 1189 |
+
<h2 class="section-title" style="font-family: 'Nanum Gothic', sans-serif; font-size: 2.5rem;">7-Level Task Design</h2>
|
| 1190 |
</div>
|
| 1191 |
<p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">We analyzed agent capabilities across seven stagesโfrom simple tool calls to long-context retention and robustness.</p>
|
| 1192 |
<div class="phase-grid">
|
|
|
|
| 1196 |
<span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
|
| 1197 |
</div>
|
| 1198 |
<ul class="phase-list">
|
| 1199 |
+
<li style="color: #FFFFFF;">L1: Single Tool Call</li>
|
| 1200 |
+
<li style="color: #FFFFFF;">L2: Tool Selection</li>
|
| 1201 |
+
<li style="color: #FFFFFF;">L3: Sequential Tool Reasoning</li>
|
| 1202 |
+
<li style="color: #FFFFFF;">L4: Parallel Tool Reasoning</li>
|
| 1203 |
+
<li style="color: #FFFFFF;">L5: Error Handling & Robustness</li>
|
| 1204 |
</ul>
|
| 1205 |
</div>
|
| 1206 |
<div class="phase-card">
|
|
|
|
| 1209 |
<span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
|
| 1210 |
</div>
|
| 1211 |
<ul class="phase-list">
|
| 1212 |
+
<li style="color: #FFFFFF;">L6: Efficient Tool Utilization</li>
|
| 1213 |
+
<li style="color: #FFFFFF;">L7: Long-Context Memory</li>
|
| 1214 |
</ul>
|
| 1215 |
</div>
|
| 1216 |
</div>
|
|
|
|
| 1224 |
<h2 class="section-title" style="font-size: 2.0rem;">High-quality scenario design tailored to 18 Korea-specific APIs and real-world use cases.</h2>
|
| 1225 |
</div>
|
| 1226 |
<div class="scenario-body">
|
| 1227 |
+
<p style="color: var(--text-primary);">We built realistic scenariosโsuch as appointment booking and blog review searchโby integrating APIs widely used in Korea including Naver Maps, Kakao services, and local websites.</p>
|
| 1228 |
</div>
|
| 1229 |
|
| 1230 |
</div>
|
|
|
|
| 1377 |
filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
|
| 1378 |
letter-spacing: 0.02em;
|
| 1379 |
animation: title-shimmer 1.25s ease-in-out infinite;
|
| 1380 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 1381 |
}
|
| 1382 |
|
| 1383 |
@keyframes title-shimmer {
|
|
|
|
| 1517 |
.filter-group .gr-input-label {
|
| 1518 |
font-size: 1rem !important;
|
| 1519 |
font-weight: 600 !important;
|
| 1520 |
+
color: #FFFFFF !important;
|
| 1521 |
text-align: center !important;
|
| 1522 |
margin-bottom: 12px !important;
|
| 1523 |
}
|
|
|
|
| 1525 |
.filter-group-label {
|
| 1526 |
font-size: 1rem !important;
|
| 1527 |
font-weight: 600 !important;
|
| 1528 |
+
color: #FFFFFF !important;
|
| 1529 |
text-align: left !important;
|
| 1530 |
margin: 0 !important;
|
| 1531 |
font-family: 'Geist', sans-serif !important;
|
|
|
|
| 1556 |
text-align: center !important;
|
| 1557 |
position: relative !important;
|
| 1558 |
overflow: hidden !important;
|
| 1559 |
+
color: #FFFFFF !important;
|
| 1560 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1561 |
font-weight: 600 !important;
|
| 1562 |
font-size: 0.95rem !important;
|
|
|
|
| 1669 |
border: 1px solid #333333 !important;
|
| 1670 |
border-radius: 999px !important;
|
| 1671 |
padding: 12px 24px !important;
|
| 1672 |
+
color: #FFFFFF !important;
|
| 1673 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1674 |
font-weight: 600 !important;
|
| 1675 |
font-size: 1rem !important;
|
|
|
|
| 1700 |
background: #000000 !important;
|
| 1701 |
border: 1px solid #333333 !important;
|
| 1702 |
border-radius: 999px !important;
|
| 1703 |
+
color: #FFFFFF !important;
|
| 1704 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1705 |
font-weight: 600 !important;
|
| 1706 |
font-size: 0.95rem !important;
|
|
|
|
| 1727 |
.model-dropdown .tag {
|
| 1728 |
background: rgba(255, 210, 30, 0.18) !important;
|
| 1729 |
border: 1px solid rgba(255, 210, 30, 0.35) !important;
|
| 1730 |
+
color: #FFFFFF !important;
|
| 1731 |
border-radius: 999px !important;
|
| 1732 |
padding: 4px 10px !important;
|
| 1733 |
font-size: 0.85rem !important;
|
|
|
|
| 1802 |
font-size: 1.5rem;
|
| 1803 |
margin-bottom: 4px;
|
| 1804 |
display: block;
|
| 1805 |
+
filter: drop-shadow(0 0 10px white);
|
| 1806 |
}
|
| 1807 |
|
| 1808 |
.domain-name {
|
|
|
|
| 1817 |
top: 8px;
|
| 1818 |
right: 8px;
|
| 1819 |
background: var(--accent-primary);
|
| 1820 |
+
color: #FFFFFF;
|
| 1821 |
font-size: 0.75rem;
|
| 1822 |
padding: 2px 8px;
|
| 1823 |
border-radius: 12px;
|
|
|
|
| 1908 |
font-size: 0.85rem !important;
|
| 1909 |
margin-bottom: 8px !important;
|
| 1910 |
font-weight: 600 !important;
|
| 1911 |
+
color: #FFFFFF !important;
|
| 1912 |
display: block !important;
|
| 1913 |
}
|
| 1914 |
|
|
|
|
| 1941 |
.compact-radio .wrap > label:has(input[type="radio"]:checked) {
|
| 1942 |
background: transparent !important;
|
| 1943 |
border-color: var(--accent-primary) !important;
|
| 1944 |
+
color: #FFFFFF !important;
|
| 1945 |
font-weight: 600 !important;
|
| 1946 |
}
|
| 1947 |
|
|
|
|
| 1962 |
.domain-radio label[aria-checked="true"] {
|
| 1963 |
background: transparent !important;
|
| 1964 |
border-color: var(--accent-primary) !important;
|
| 1965 |
+
color: #FFFFFF !important;
|
| 1966 |
font-weight: 600 !important;
|
| 1967 |
}
|
| 1968 |
|
|
|
|
| 2051 |
border: 1px solid var(--border-subtle) !important;
|
| 2052 |
border-radius: 20px !important;
|
| 2053 |
font-size: 0.85rem !important;
|
| 2054 |
+
color: #FFFFFF !important;
|
| 2055 |
transition: all 0.2s ease !important;
|
| 2056 |
cursor: pointer !important;
|
| 2057 |
}
|
|
|
|
| 2065 |
.inline-radio label[aria-checked="true"] {
|
| 2066 |
background: rgba(255, 210, 30, 0.2) !important;
|
| 2067 |
border-color: var(--accent-primary) !important;
|
| 2068 |
+
color: #FFFFFF !important;
|
| 2069 |
font-weight: 600 !important;
|
| 2070 |
}
|
| 2071 |
</style>
|
|
|
|
| 2078 |
leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
|
| 2079 |
|
| 2080 |
# Integrated controls within leaderboard section - stacked vertically
|
| 2081 |
+
gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 5px 0; font-size: 1.2rem;'>Select Task Level</p>")
|
| 2082 |
domain_filter = gr.Radio(
|
| 2083 |
choices=level_options,
|
| 2084 |
value=default_level,
|
|
|
|
| 2088 |
elem_classes=["domain-radio", "inline-radio"]
|
| 2089 |
)
|
| 2090 |
|
| 2091 |
+
gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 0px 0; font-size: 1.2rem;'>๐ Filters & Sorting</p>")
|
| 2092 |
with gr.Row():
|
| 2093 |
with gr.Column(scale=1):
|
| 2094 |
+
gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>Model Access</span>")
|
| 2095 |
model_type_filter = gr.Radio(
|
| 2096 |
choices=["All", "OSS", "API"],
|
| 2097 |
value="All",
|
|
|
|
| 2100 |
container=False
|
| 2101 |
)
|
| 2102 |
with gr.Column(scale=1):
|
| 2103 |
+
gr.HTML("<span style='color: var(--text-primary);>Sort Order</span>")
|
| 2104 |
sort_order = gr.Radio(
|
| 2105 |
choices=["Descending", "Ascending"],
|
| 2106 |
value="Descending",
|
|
|
|
| 2115 |
gr.HTML("""
|
| 2116 |
<div class="domain-selector-container domain-performance-container">
|
| 2117 |
<div class="domain-header">
|
| 2118 |
+
<h2 class="domain-title" >Core Capability Radar</h2>
|
| 2119 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">Track six essential axes: <br>success, execution, reasoning, robustness, efficiency, and call validity.</p>
|
| 2120 |
</div>
|
| 2121 |
""")
|
| 2122 |
|
| 2123 |
+
gr.HTML("<p >Select models to compare (up to 5).</p>")
|
| 2124 |
# gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>You can select up to five models.</p>")
|
| 2125 |
model_selector = gr.Dropdown(
|
| 2126 |
choices=initial_df['Model'].tolist()[:10],
|
|
|
|
| 2298 |
gr.HTML("""
|
| 2299 |
<div class="domain-selector-container performance-card-container">
|
| 2300 |
<div class="domain-header">
|
| 2301 |
+
<h2 class="domain-title" >Model Performance Card</h2>
|
| 2302 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">
|
| 2303 |
Explore detailed performance cards that visualize six core metrics plus overall SR across L1โL7 levels.
|
| 2304 |
</p>
|
| 2305 |
<p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
|
|
|
|
| 2312 |
|
| 2313 |
with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
|
| 2314 |
gr.HTML("""
|
| 2315 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">Choose a model to generate its analysis card.</p>
|
| 2316 |
|
| 2317 |
""")
|
| 2318 |
card_model_selector = gr.Dropdown(
|
|
|
|
| 2349 |
gr.HTML("""
|
| 2350 |
<div class="domain-selector-container domain-performance-container level-metrics-wrapper">
|
| 2351 |
<div class="domain-header">
|
| 2352 |
+
<h2 class="domain-title" >Level-specific Metrics</h2>
|
| 2353 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">Compare model scores with each Ko-AgentBench level's dedicated metrics for deeper insights.</p>
|
| 2354 |
</div>
|
| 2355 |
""")
|
| 2356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2357 |
with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
|
| 2358 |
level_metric_selector = gr.Dropdown(
|
| 2359 |
choices=level_ids,
|
|
|
|
| 2389 |
# gr.HTML("""
|
| 2390 |
# <div class="domain-selector-container domain-performance-container heatmap-wrapper">
|
| 2391 |
# <div class="domain-header">
|
| 2392 |
+
# <h2 class="domain-title" >Comprehensive Performance Heatmap</h2>
|
| 2393 |
+
# <p class="domain-subtitle" >See each model's L1โL7 SR scores at a glance.</p>
|
| 2394 |
# </div>
|
| 2395 |
# <div class="chart-container heatmap-chart-container">
|
| 2396 |
# """)
|
|
|
|
| 2681 |
font-size: 1.9rem;
|
| 2682 |
font-weight: 800;
|
| 2683 |
letter-spacing: 0.01em;
|
| 2684 |
+
color: #FFFFFF;
|
| 2685 |
}
|
| 2686 |
|
| 2687 |
.meta-line {
|
|
|
|
| 2693 |
}
|
| 2694 |
|
| 2695 |
.meta-line span {
|
| 2696 |
+
color: #FFFFFF;
|
| 2697 |
font-weight: 600;
|
| 2698 |
}
|
| 2699 |
|
|
|
|
| 2733 |
.rank-value {
|
| 2734 |
font-size: 2.4rem;
|
| 2735 |
font-weight: 800;
|
| 2736 |
+
color: #FFFFFF;
|
| 2737 |
letter-spacing: 0.04em;
|
| 2738 |
}
|
| 2739 |
|
|
|
|
| 2838 |
border: 1px solid #333333 !important;
|
| 2839 |
border-radius: 999px !important;
|
| 2840 |
padding: 12px 20px !important;
|
| 2841 |
+
color: #FFFFFF !important;
|
| 2842 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 2843 |
font-weight: 600 !important;
|
| 2844 |
font-size: 0.95rem !important;
|
|
|
|
| 2867 |
.level-model-dropdown button {
|
| 2868 |
background: #000000 !important;
|
| 2869 |
border: 1px solid #333333 !important;
|
| 2870 |
+
color: #FFFFFF !important;
|
| 2871 |
}
|
| 2872 |
|
| 2873 |
.radar-placeholder {
|
|
|
|
| 2928 |
.core-metric-card .metric-value {
|
| 2929 |
font-size: 1.8rem;
|
| 2930 |
font-weight: 700;
|
| 2931 |
+
color: #FFFFFF;
|
| 2932 |
font-family: 'Geist Mono', monospace;
|
| 2933 |
}
|
| 2934 |
|
|
|
|
| 2961 |
.level-tile-score {
|
| 2962 |
font-size: 1.25rem;
|
| 2963 |
font-weight: 700;
|
| 2964 |
+
color: #FFFFFF;
|
| 2965 |
font-family: 'Geist Mono', monospace;
|
| 2966 |
}
|
| 2967 |
@media (max-width: 980px) {
|
|
|
|
| 3045 |
h2.section-title,
|
| 3046 |
.dashboard-section .section-title,
|
| 3047 |
.section-header .section-title {
|
| 3048 |
+
font-family: "Nanum Gothic", sans-serif !important;
|
| 3049 |
}
|
| 3050 |
|
| 3051 |
.domain-title,
|
| 3052 |
h2.domain-title,
|
| 3053 |
.domain-header .domain-title {
|
| 3054 |
+
font-family: "Nanum Gothic", sans-serif !important;
|
| 3055 |
}
|
| 3056 |
|
| 3057 |
.hero-title,
|
| 3058 |
.hero-subtitle,
|
| 3059 |
h1.hero-title,
|
| 3060 |
p.hero-subtitle {
|
| 3061 |
+
font-family: "Nanum Gothic", sans-serif !important;
|
| 3062 |
font-size: 2rem; !important;
|
| 3063 |
}
|
| 3064 |
|
|
|
|
| 3252 |
palette = [
|
| 3253 |
{'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
|
| 3254 |
{'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
|
| 3255 |
+
{'fill': 'rgba(161, 98, 7, 0.22)', 'line': '#A16207'},
|
| 3256 |
+
{'fill': 'rgba(220, 38, 38, 0.20)', 'line': '#DC2626'},
|
| 3257 |
{'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
|
| 3258 |
]
|
| 3259 |
|
|
|
|
| 3378 |
width=900,
|
| 3379 |
margin=dict(t=30, b=50, l=10, r=10),
|
| 3380 |
autosize=True,
|
| 3381 |
+
annotations=[]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3382 |
)
|
| 3383 |
|
| 3384 |
return fig
|
|
|
|
| 3637 |
model_palette = [
|
| 3638 |
'#ffd21e',
|
| 3639 |
'#FF8A3C',
|
| 3640 |
+
'#A16207',
|
| 3641 |
+
'#DC2626',
|
| 3642 |
'#F8FAFC',
|
| 3643 |
'#38BDF8',
|
| 3644 |
]
|
tabs/leaderboard_v1_kr.py
CHANGED
|
@@ -224,36 +224,36 @@ def create_leaderboard_v2_tab():
|
|
| 224 |
# Level metadata for the 7-stage task framework
|
| 225 |
level_details = {
|
| 226 |
"ALL": {
|
| 227 |
-
"title": "<span style='font-family: \"
|
| 228 |
-
"description": "<span style='font-family: \"Nanum Gothic\", sans-serif !important;'>7๊ฐ์ ํ์คํฌ ์ ๋ฐ์ ํ๊ท ์ฑ๋ฅ์ ํ๋์ ์ดํด๋ณด๊ณ ๊ฐ ๋ ๋ฒจ ๋น๊ต๋ฅผ ์ํ ๊ธฐ์ค์ ์ ์ ๊ณตํฉ๋๋ค.</span>"
|
| 229 |
},
|
| 230 |
"L1": {
|
| 231 |
-
"title": "<span style='color:
|
| 232 |
-
"description": "<span style='color:
|
| 233 |
},
|
| 234 |
"L2": {
|
| 235 |
-
"title": "<span style='color:
|
| 236 |
-
"description": "<span style='color:
|
| 237 |
},
|
| 238 |
"L3": {
|
| 239 |
-
"title": "<span style='color:
|
| 240 |
-
"description": "<span style='color:
|
| 241 |
},
|
| 242 |
"L4": {
|
| 243 |
-
"title": "<span style='color:
|
| 244 |
-
"description": "<span style='color:
|
| 245 |
},
|
| 246 |
"L5": {
|
| 247 |
-
"title": "<span style='color:
|
| 248 |
-
"description": "<span style='color:
|
| 249 |
},
|
| 250 |
"L6": {
|
| 251 |
-
"title": "<span style='color:
|
| 252 |
-
"description": "<span style='color:
|
| 253 |
},
|
| 254 |
"L7": {
|
| 255 |
-
"title": "<span style='color:
|
| 256 |
-
"description": "<span style='color:
|
| 257 |
}
|
| 258 |
}
|
| 259 |
default_level = "ALL"
|
|
@@ -291,7 +291,7 @@ def create_leaderboard_v2_tab():
|
|
| 291 |
border-collapse: collapse;
|
| 292 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 293 |
background: var(--bg-card);
|
| 294 |
-
color:
|
| 295 |
}
|
| 296 |
|
| 297 |
.v2-styled-table thead {
|
|
@@ -305,7 +305,7 @@ def create_leaderboard_v2_tab():
|
|
| 305 |
padding: 14px 12px;
|
| 306 |
text-align: left;
|
| 307 |
font-weight: 600;
|
| 308 |
-
color:
|
| 309 |
border-bottom: 2px solid var(--accent-primary);
|
| 310 |
font-size: 13px;
|
| 311 |
text-transform: uppercase;
|
|
@@ -319,7 +319,7 @@ def create_leaderboard_v2_tab():
|
|
| 319 |
.v2-styled-table td {
|
| 320 |
padding: 12px;
|
| 321 |
border-bottom: 1px solid var(--border-subtle);
|
| 322 |
-
color:
|
| 323 |
transition: all 0.2s ease;
|
| 324 |
}
|
| 325 |
|
|
@@ -339,30 +339,30 @@ def create_leaderboard_v2_tab():
|
|
| 339 |
|
| 340 |
.model-name {
|
| 341 |
font-weight: 500;
|
| 342 |
-
color:
|
| 343 |
transition: color 0.2s ease;
|
| 344 |
}
|
| 345 |
|
| 346 |
/* Keep model name color consistent on hover to emphasize row highlight */
|
| 347 |
.v2-styled-table tr:hover .model-name {
|
| 348 |
-
color:
|
| 349 |
}
|
| 350 |
|
| 351 |
.numeric-cell {
|
| 352 |
font-family: 'Geist Mono', monospace;
|
| 353 |
font-size: 13px;
|
| 354 |
text-align: center;
|
| 355 |
-
color:
|
| 356 |
}
|
| 357 |
|
| 358 |
.highlight-header {
|
| 359 |
background: rgba(255, 210, 30, 0.14);
|
| 360 |
-
color:
|
| 361 |
}
|
| 362 |
|
| 363 |
.highlight-cell {
|
| 364 |
background: rgba(255, 210, 30, 0.08);
|
| 365 |
-
color:
|
| 366 |
font-weight: 600;
|
| 367 |
}
|
| 368 |
</style>
|
|
@@ -460,8 +460,8 @@ def create_leaderboard_v2_tab():
|
|
| 460 |
return f"""
|
| 461 |
<div class="domain-selector-container leaderboard-intro">
|
| 462 |
<div class="domain-header">
|
| 463 |
-
<h2 class="domain-title" style="color:
|
| 464 |
-
<p class="domain-subtitle" style="color:
|
| 465 |
</div>
|
| 466 |
<div class="dataframe-container">
|
| 467 |
"""
|
|
@@ -511,6 +511,14 @@ def create_leaderboard_v2_tab():
|
|
| 511 |
# Load initial data
|
| 512 |
initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
|
| 513 |
initial_df = load_leaderboard_data() # Load raw data for model selector
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
|
| 515 |
initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
|
| 516 |
initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
|
|
@@ -743,7 +751,7 @@ def create_leaderboard_v2_tab():
|
|
| 743 |
display: inline-block !important;
|
| 744 |
padding: 14px 28px !important;
|
| 745 |
background: #ffd21e !important;
|
| 746 |
-
color:
|
| 747 |
text-decoration: none !important;
|
| 748 |
border-radius: 16px !important;
|
| 749 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
|
@@ -777,7 +785,7 @@ def create_leaderboard_v2_tab():
|
|
| 777 |
transform: translateY(-3px) !important;
|
| 778 |
box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
|
| 779 |
background: #ffd21e !important;
|
| 780 |
-
color:
|
| 781 |
text-decoration: none !important;
|
| 782 |
text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
|
| 783 |
}
|
|
@@ -792,25 +800,35 @@ def create_leaderboard_v2_tab():
|
|
| 792 |
filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
|
| 793 |
}
|
| 794 |
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
|
|
|
| 798 |
border-radius: 0 !important;
|
| 799 |
overflow: hidden !important;
|
| 800 |
box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
|
| 801 |
-
position: relative !important;
|
| 802 |
-
left: 50% !important;
|
| 803 |
-
right: 50% !important;
|
| 804 |
-
margin-left: -50vw !important;
|
| 805 |
-
margin-right: -50vw !important;
|
| 806 |
-
max-width: none !important;
|
| 807 |
}
|
| 808 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
#hero-banner img {
|
| 810 |
-
width: 100
|
| 811 |
-
height: auto;
|
| 812 |
-
display: block;
|
| 813 |
-
object-fit: cover;
|
| 814 |
}
|
| 815 |
|
| 816 |
.hero-title {
|
|
@@ -821,13 +839,13 @@ def create_leaderboard_v2_tab():
|
|
| 821 |
-webkit-background-clip: text;
|
| 822 |
-webkit-text-fill-color: transparent;
|
| 823 |
margin-bottom: 1rem;
|
| 824 |
-
font-family: '
|
| 825 |
}
|
| 826 |
|
| 827 |
.hero-subtitle {
|
| 828 |
color: var(--text-secondary);
|
| 829 |
font-size: 3rem;
|
| 830 |
-
font-family: '
|
| 831 |
margin-top: 0;
|
| 832 |
}
|
| 833 |
|
|
@@ -911,7 +929,7 @@ def create_leaderboard_v2_tab():
|
|
| 911 |
color: var(--text-primary);
|
| 912 |
margin-bottom: 12px;
|
| 913 |
text-align: center !important;
|
| 914 |
-
font-family: '
|
| 915 |
}
|
| 916 |
|
| 917 |
.section-lead, .section-subtitle {
|
|
@@ -976,19 +994,19 @@ def create_leaderboard_v2_tab():
|
|
| 976 |
position: relative;
|
| 977 |
font-size: 1.2rem !important;
|
| 978 |
font-weight: 700;
|
| 979 |
-
color:
|
| 980 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 981 |
}
|
| 982 |
|
| 983 |
/* ์ถ๊ฐ์ ์ธ ๊ตฌ์ฒด์ ์ ํ์ */
|
| 984 |
.phase-card .phase-chart span {
|
| 985 |
-
color:
|
| 986 |
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8) !important;
|
| 987 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 988 |
}
|
| 989 |
|
| 990 |
.phase-grid .phase-chart span {
|
| 991 |
-
color:
|
| 992 |
z-index: 10 !important;
|
| 993 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 994 |
}
|
|
@@ -1110,6 +1128,7 @@ def create_leaderboard_v2_tab():
|
|
| 1110 |
</style>
|
| 1111 |
""")
|
| 1112 |
|
|
|
|
| 1113 |
gr.Image(
|
| 1114 |
value="banner_wide.png",
|
| 1115 |
show_label=False,
|
|
@@ -1117,6 +1136,7 @@ def create_leaderboard_v2_tab():
|
|
| 1117 |
type="filepath",
|
| 1118 |
elem_id="hero-banner"
|
| 1119 |
)
|
|
|
|
| 1120 |
|
| 1121 |
gr.HTML("""
|
| 1122 |
<div style="text-align: center; padding: 20px 0;">
|
|
@@ -1129,21 +1149,21 @@ def create_leaderboard_v2_tab():
|
|
| 1129 |
gr.HTML("""
|
| 1130 |
<div class="hero-actions">
|
| 1131 |
<a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1132 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1133 |
<path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
|
| 1134 |
<line x1="8" y1="12" x2="16" y2="12"/>
|
| 1135 |
</svg>
|
| 1136 |
<span>๋ธ๋ก๊ทธ</span>
|
| 1137 |
</a>
|
| 1138 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1139 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1140 |
<path d="M9 19c-5 1.5-5-2.5-7-3"/>
|
| 1141 |
<path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
|
| 1142 |
</svg>
|
| 1143 |
<span>GitHub</span>
|
| 1144 |
</a>
|
| 1145 |
<a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1146 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1147 |
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
| 1148 |
<polyline points="7 10 12 15 17 10"/>
|
| 1149 |
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
@@ -1151,7 +1171,7 @@ def create_leaderboard_v2_tab():
|
|
| 1151 |
<span>๋ฐ์ดํฐ์
</span>
|
| 1152 |
</a>
|
| 1153 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1154 |
-
<svg viewBox="0 0 24 24" fill="none" stroke="
|
| 1155 |
<path d="M3 3v18h18"/>
|
| 1156 |
<path d="M7 17v-6"/>
|
| 1157 |
<path d="M12 17V7"/>
|
|
@@ -1166,31 +1186,31 @@ def create_leaderboard_v2_tab():
|
|
| 1166 |
gr.HTML("""
|
| 1167 |
<div class="dashboard-section">
|
| 1168 |
<div class="section-header">
|
| 1169 |
-
<h2 class="section-title" style="font-family: '
|
| 1170 |
</div>
|
| 1171 |
<p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">๋จ์ ๋๊ตฌ ํธ์ถ๋ถํฐ ์ฅ๊ธฐ์ ๋งฅ๋ฝ ๋ฅ๋ ฅ, ๊ฐ๊ฑด์ฑ ์ฒ๋ฆฌ ๋ฅ๋ ฅ๊น์ง ์์ด์ ํธ์ ๋ฅ๋ ฅ์ 7๋จ๊ณ๋ก ์
์ฒด์ ์ผ๋ก ๋ถ์ํ์์ต๋๋ค.</p>
|
| 1172 |
<div class="phase-grid">
|
| 1173 |
<div class="phase-card">
|
| 1174 |
<h3>๋จ์ผ ํด</h3>
|
| 1175 |
<div class="phase-chart" style="--progress:80%;">
|
| 1176 |
-
<span style="color:
|
| 1177 |
</div>
|
| 1178 |
<ul class="phase-list">
|
| 1179 |
-
<li style="color:
|
| 1180 |
-
<li style="color:
|
| 1181 |
-
<li style="color:
|
| 1182 |
-
<li style="color:
|
| 1183 |
-
<li style="color:
|
| 1184 |
</ul>
|
| 1185 |
</div>
|
| 1186 |
<div class="phase-card">
|
| 1187 |
<h3>๋ค์ค ํด</h3>
|
| 1188 |
<div class="phase-chart" style="--progress:20%;">
|
| 1189 |
-
<span style="color:
|
| 1190 |
</div>
|
| 1191 |
<ul class="phase-list">
|
| 1192 |
-
<li style="color:
|
| 1193 |
-
<li style="color:
|
| 1194 |
</ul>
|
| 1195 |
</div>
|
| 1196 |
</div>
|
|
@@ -1204,7 +1224,7 @@ def create_leaderboard_v2_tab():
|
|
| 1204 |
<h2 class="section-title" style="font-size: 2.0rem;">18๊ฐ์ง ํ๊ตญํ API ์ฌ์ฉ ๋ฐ ์ค์ํ ํ๊ฒฝ์ ํนํ๋ ๊ณ ํ์ง ์๋๋ฆฌ์ค ๊ตฌ์ฑ</h2>
|
| 1205 |
</div>
|
| 1206 |
<div class="scenario-body">
|
| 1207 |
-
<p
|
| 1208 |
</div>
|
| 1209 |
|
| 1210 |
</div>
|
|
@@ -1357,7 +1377,7 @@ def create_leaderboard_v2_tab():
|
|
| 1357 |
filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
|
| 1358 |
letter-spacing: 0.02em;
|
| 1359 |
animation: title-shimmer 1.25s ease-in-out infinite;
|
| 1360 |
-
font-family: '
|
| 1361 |
}
|
| 1362 |
|
| 1363 |
@keyframes title-shimmer {
|
|
@@ -1649,7 +1669,7 @@ def create_leaderboard_v2_tab():
|
|
| 1649 |
border: 1px solid #333333 !important;
|
| 1650 |
border-radius: 999px !important;
|
| 1651 |
padding: 12px 24px !important;
|
| 1652 |
-
color:
|
| 1653 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1654 |
font-weight: 600 !important;
|
| 1655 |
font-size: 1rem !important;
|
|
@@ -1680,7 +1700,7 @@ def create_leaderboard_v2_tab():
|
|
| 1680 |
background: #000000 !important;
|
| 1681 |
border: 1px solid #333333 !important;
|
| 1682 |
border-radius: 999px !important;
|
| 1683 |
-
color:
|
| 1684 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1685 |
font-weight: 600 !important;
|
| 1686 |
font-size: 0.95rem !important;
|
|
@@ -1735,7 +1755,7 @@ def create_leaderboard_v2_tab():
|
|
| 1735 |
background: #ffd21e !important;
|
| 1736 |
border: 1px solid rgba(255, 210, 30, 0.6) !important;
|
| 1737 |
border-radius: 999px !important;
|
| 1738 |
-
color:
|
| 1739 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1740 |
font-weight: 600 !important;
|
| 1741 |
font-size: 0.95rem !important;
|
|
@@ -1782,7 +1802,7 @@ def create_leaderboard_v2_tab():
|
|
| 1782 |
font-size: 1.5rem;
|
| 1783 |
margin-bottom: 4px;
|
| 1784 |
display: block;
|
| 1785 |
-
filter: drop-shadow(0 0 10px
|
| 1786 |
}
|
| 1787 |
|
| 1788 |
.domain-name {
|
|
@@ -1797,7 +1817,7 @@ def create_leaderboard_v2_tab():
|
|
| 1797 |
top: 8px;
|
| 1798 |
right: 8px;
|
| 1799 |
background: var(--accent-primary);
|
| 1800 |
-
color:
|
| 1801 |
font-size: 0.75rem;
|
| 1802 |
padding: 2px 8px;
|
| 1803 |
border-radius: 12px;
|
|
@@ -2045,7 +2065,7 @@ def create_leaderboard_v2_tab():
|
|
| 2045 |
.inline-radio label[aria-checked="true"] {
|
| 2046 |
background: rgba(255, 210, 30, 0.2) !important;
|
| 2047 |
border-color: var(--accent-primary) !important;
|
| 2048 |
-
color:
|
| 2049 |
font-weight: 600 !important;
|
| 2050 |
}
|
| 2051 |
</style>
|
|
@@ -2058,7 +2078,7 @@ def create_leaderboard_v2_tab():
|
|
| 2058 |
leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
|
| 2059 |
|
| 2060 |
# Integrated controls within leaderboard section - stacked vertically
|
| 2061 |
-
gr.HTML("<p style='color:
|
| 2062 |
domain_filter = gr.Radio(
|
| 2063 |
choices=level_options,
|
| 2064 |
value=default_level,
|
|
@@ -2068,10 +2088,10 @@ def create_leaderboard_v2_tab():
|
|
| 2068 |
elem_classes=["domain-radio", "inline-radio"]
|
| 2069 |
)
|
| 2070 |
|
| 2071 |
-
gr.HTML("<p style='color:
|
| 2072 |
with gr.Row():
|
| 2073 |
with gr.Column(scale=1):
|
| 2074 |
-
gr.HTML("<span style='color:
|
| 2075 |
model_type_filter = gr.Radio(
|
| 2076 |
choices=["All", "OSS", "API"],
|
| 2077 |
value="All",
|
|
@@ -2080,7 +2100,7 @@ def create_leaderboard_v2_tab():
|
|
| 2080 |
container=False
|
| 2081 |
)
|
| 2082 |
with gr.Column(scale=1):
|
| 2083 |
-
gr.HTML("<span style='color:
|
| 2084 |
sort_order = gr.Radio(
|
| 2085 |
choices=["Descending", "Ascending"],
|
| 2086 |
value="Descending",
|
|
@@ -2095,12 +2115,12 @@ def create_leaderboard_v2_tab():
|
|
| 2095 |
gr.HTML("""
|
| 2096 |
<div class="domain-selector-container domain-performance-container">
|
| 2097 |
<div class="domain-header">
|
| 2098 |
-
<h2 class="domain-title" style="color:
|
| 2099 |
-
<p class="domain-subtitle" style="color:
|
| 2100 |
</div>
|
| 2101 |
""")
|
| 2102 |
|
| 2103 |
-
gr.HTML("<p style='color:
|
| 2104 |
# gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>๋ชจ๋ธ์ ์ต๋ 5๊ฐ๊น์ง ์ ํ ๊ฐ๋ฅ ํฉ๋๋ค.</p>")
|
| 2105 |
model_selector = gr.Dropdown(
|
| 2106 |
choices=initial_df['Model'].tolist()[:10],
|
|
@@ -2278,8 +2298,8 @@ def create_leaderboard_v2_tab():
|
|
| 2278 |
gr.HTML("""
|
| 2279 |
<div class="domain-selector-container performance-card-container">
|
| 2280 |
<div class="domain-header">
|
| 2281 |
-
<h2 class="domain-title" style="color:
|
| 2282 |
-
<p class="domain-subtitle" style="color:
|
| 2283 |
๋ชจ๋ธ์ ์ฑ๋ฅ ์คํํธ๋ผ์ 6๋ ํต์ฌ ์งํ์ L1~L7 ๋จ๊ณ๋ณ ์ข
ํฉ ์ฑ๊ณต๋ฅ (SR)๋ก ์๊ฐํํ ์ ๋ฐ ๋ถ์ ์นด๋๋ฅผ ํ์ธํด๋ณด์ธ์.
|
| 2284 |
</p>
|
| 2285 |
<p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
|
|
@@ -2292,7 +2312,7 @@ def create_leaderboard_v2_tab():
|
|
| 2292 |
|
| 2293 |
with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
|
| 2294 |
gr.HTML("""
|
| 2295 |
-
<p class="domain-subtitle" style="color:
|
| 2296 |
|
| 2297 |
""")
|
| 2298 |
card_model_selector = gr.Dropdown(
|
|
@@ -2329,15 +2349,11 @@ def create_leaderboard_v2_tab():
|
|
| 2329 |
gr.HTML("""
|
| 2330 |
<div class="domain-selector-container domain-performance-container level-metrics-wrapper">
|
| 2331 |
<div class="domain-header">
|
| 2332 |
-
<h2 class="domain-title" style="color:
|
| 2333 |
-
<p class="domain-subtitle" style="color:
|
| 2334 |
</div>
|
| 2335 |
""")
|
| 2336 |
|
| 2337 |
-
gr.HTML("""
|
| 2338 |
-
<p style="color: white; text-align: center; margin: 0 0 20px 0; font-size: 1.2rem; font-family: \'Nanum Gothic\', sans-serif;">ํ์คํฌ ๋ ๋ฒจ๊ณผ ๋ชจ๋ธ(์ต๋ 5๊ฐ)์ ์ ํํ์ฌ ์์ธ ์งํ๋ฅผ ํ์ํ์ธ์.</p>
|
| 2339 |
-
""")
|
| 2340 |
-
|
| 2341 |
with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
|
| 2342 |
level_metric_selector = gr.Dropdown(
|
| 2343 |
choices=level_ids,
|
|
@@ -2373,8 +2389,8 @@ def create_leaderboard_v2_tab():
|
|
| 2373 |
# gr.HTML("""
|
| 2374 |
# <div class="domain-selector-container domain-performance-container heatmap-wrapper">
|
| 2375 |
# <div class="domain-header">
|
| 2376 |
-
# <h2 class="domain-title" style="color:
|
| 2377 |
-
# <p class="domain-subtitle" style="color:
|
| 2378 |
# </div>
|
| 2379 |
# <div class="chart-container heatmap-chart-container">
|
| 2380 |
# """)
|
|
@@ -2822,7 +2838,7 @@ def create_leaderboard_v2_tab():
|
|
| 2822 |
border: 1px solid #333333 !important;
|
| 2823 |
border-radius: 999px !important;
|
| 2824 |
padding: 12px 20px !important;
|
| 2825 |
-
color:
|
| 2826 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 2827 |
font-weight: 600 !important;
|
| 2828 |
font-size: 0.95rem !important;
|
|
@@ -2851,7 +2867,7 @@ def create_leaderboard_v2_tab():
|
|
| 2851 |
.level-model-dropdown button {
|
| 2852 |
background: #000000 !important;
|
| 2853 |
border: 1px solid #333333 !important;
|
| 2854 |
-
color:
|
| 2855 |
}
|
| 2856 |
|
| 2857 |
.radar-placeholder {
|
|
@@ -3029,20 +3045,20 @@ def create_leaderboard_v2_tab():
|
|
| 3029 |
h2.section-title,
|
| 3030 |
.dashboard-section .section-title,
|
| 3031 |
.section-header .section-title {
|
| 3032 |
-
font-family: "
|
| 3033 |
}
|
| 3034 |
|
| 3035 |
.domain-title,
|
| 3036 |
h2.domain-title,
|
| 3037 |
.domain-header .domain-title {
|
| 3038 |
-
font-family: "
|
| 3039 |
}
|
| 3040 |
|
| 3041 |
.hero-title,
|
| 3042 |
.hero-subtitle,
|
| 3043 |
h1.hero-title,
|
| 3044 |
p.hero-subtitle {
|
| 3045 |
-
font-family: "
|
| 3046 |
font-size: 2rem; !important;
|
| 3047 |
}
|
| 3048 |
|
|
@@ -3236,8 +3252,8 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
|
|
| 3236 |
palette = [
|
| 3237 |
{'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
|
| 3238 |
{'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
|
| 3239 |
-
{'fill': 'rgba(
|
| 3240 |
-
{'fill': 'rgba(
|
| 3241 |
{'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
|
| 3242 |
]
|
| 3243 |
|
|
@@ -3362,16 +3378,7 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
|
|
| 3362 |
width=900,
|
| 3363 |
margin=dict(t=30, b=50, l=10, r=10),
|
| 3364 |
autosize=True,
|
| 3365 |
-
annotations=[
|
| 3366 |
-
dict(
|
| 3367 |
-
text="Galileo Agent Leaderboard",
|
| 3368 |
-
xref="paper", yref="paper",
|
| 3369 |
-
x=0.98, y=0.02,
|
| 3370 |
-
xanchor='right', yanchor='bottom',
|
| 3371 |
-
font=dict(size=10, color='#64748B'),
|
| 3372 |
-
showarrow=False
|
| 3373 |
-
)
|
| 3374 |
-
]
|
| 3375 |
)
|
| 3376 |
|
| 3377 |
return fig
|
|
@@ -3630,8 +3637,8 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
|
|
| 3630 |
model_palette = [
|
| 3631 |
'#ffd21e',
|
| 3632 |
'#FF8A3C',
|
| 3633 |
-
'#
|
| 3634 |
-
'#
|
| 3635 |
'#F8FAFC',
|
| 3636 |
'#38BDF8',
|
| 3637 |
]
|
|
|
|
| 224 |
# Level metadata for the 7-stage task framework
|
| 225 |
level_details = {
|
| 226 |
"ALL": {
|
| 227 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>ALL ยท ์ ์ฒด ํ์คํฌ</span>",
|
| 228 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>7๊ฐ์ ํ์คํฌ ์ ๋ฐ์ ํ๊ท ์ฑ๋ฅ์ ํ๋์ ์ดํด๋ณด๊ณ ๊ฐ ๋ ๋ฒจ ๋น๊ต๋ฅผ ์ํ ๊ธฐ์ค์ ์ ์ ๊ณตํฉ๋๋ค.</span>"
|
| 229 |
},
|
| 230 |
"L1": {
|
| 231 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L1 ยท ๋จ์ผ ๋๊ตฌ ํธ์ถ</span>",
|
| 232 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>๋จ์ผ ๋๊ตฌ ํธ์ถ ๋ฅ๋ ฅ๊ณผ ๊ธฐ๋ณธ์ ์ธ ๋ช
๋ น ์ํ ์ ํ๋๋ฅผ ํ๊ฐํฉ๋๋ค.</span>"
|
| 233 |
},
|
| 234 |
"L2": {
|
| 235 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L2 ยท ๋๊ตฌ ์ ํ</span>",
|
| 236 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์๊ตฌ ์ฌํญ์ ๋ง๋ ๋๊ตฌ๋ฅผ ๊ณ ๋ฅด๊ณ ์ ์ ํ ํ๋ผ๋ฏธํฐ๋ก ํธ์ถํ๋ ๋ฅ๋ ฅ์ ์ธก์ ํฉ๋๋ค.</span>"
|
| 237 |
},
|
| 238 |
"L3": {
|
| 239 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L3 ยท ๋๊ตฌ ์์ฐจ ์ถ๋ก </span>",
|
| 240 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>๋ณต์ ๋จ๊ณ์ ์์ฐจ์ reasoning์ ํตํด ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๋ ๊ณผ์ ์ ๊ฒ์ฆํฉ๋๋ค.</span>"
|
| 241 |
},
|
| 242 |
"L4": {
|
| 243 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L4 ยท ๋๊ตฌ ๋ณ๋ ฌ ์ถ๋ก </span>",
|
| 244 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์ฌ๋ฌ ์์ค์ ์ ๋ณด๋ฅผ ๋ณ๋ ฌ์ ์ผ๋ก ํตํฉํ๊ณ ์์ฝํ๋ ๋ฅ๋ ฅ์ ํ๊ฐํฉ๋๋ค.</span>"
|
| 245 |
},
|
| 246 |
"L5": {
|
| 247 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L5 ยท ์ค๋ฅ ์ฒ๋ฆฌ์ ๊ฐ๊ฑด์ฑ</span>",
|
| 248 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์์์น ๋ชปํ ์ค๋ฅ๋ ์คํจ ์ํฉ์ ๋ํ ์ธ์ง์ ๋์ ์ ๋ต์ ํ์ธํฉ๋๋ค.</span>"
|
| 249 |
},
|
| 250 |
"L6": {
|
| 251 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L6 ยท ํจ์จ์ ์ธ ๋๊ตฌ ํ์ฉ</span>",
|
| 252 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์ต์ํ์ ํธ์ถ๊ณผ ๋น์ฉ์ผ๋ก ๋ชฉํ๋ฅผ ๋ฌ์ฑํ๋ ์ด์ ํจ์จ์ ์ดํด๋ด
๋๋ค.</span>"
|
| 253 |
},
|
| 254 |
"L7": {
|
| 255 |
+
"title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L7 ยท ์ฅ๊ธฐ ์ปจํ
์คํธ ๊ธฐ์ต</span>",
|
| 256 |
+
"description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์ฅ๊ธฐ ๋ํ ๋งฅ๋ฝ์ ์ ์งํ๊ณ ์ ์ ํ ํ์ฉํ๋ ๋ฅ๋ ฅ์ ์ง์ค์ ์ผ๋ก ๋ถ์ํฉ๋๋ค.</span>"
|
| 257 |
}
|
| 258 |
}
|
| 259 |
default_level = "ALL"
|
|
|
|
| 291 |
border-collapse: collapse;
|
| 292 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 293 |
background: var(--bg-card);
|
| 294 |
+
color: var(--text-primary);
|
| 295 |
}
|
| 296 |
|
| 297 |
.v2-styled-table thead {
|
|
|
|
| 305 |
padding: 14px 12px;
|
| 306 |
text-align: left;
|
| 307 |
font-weight: 600;
|
| 308 |
+
color: var(--text-primary);
|
| 309 |
border-bottom: 2px solid var(--accent-primary);
|
| 310 |
font-size: 13px;
|
| 311 |
text-transform: uppercase;
|
|
|
|
| 319 |
.v2-styled-table td {
|
| 320 |
padding: 12px;
|
| 321 |
border-bottom: 1px solid var(--border-subtle);
|
| 322 |
+
color: var(--text-primary);
|
| 323 |
transition: all 0.2s ease;
|
| 324 |
}
|
| 325 |
|
|
|
|
| 339 |
|
| 340 |
.model-name {
|
| 341 |
font-weight: 500;
|
| 342 |
+
color: var(--text-primary);
|
| 343 |
transition: color 0.2s ease;
|
| 344 |
}
|
| 345 |
|
| 346 |
/* Keep model name color consistent on hover to emphasize row highlight */
|
| 347 |
.v2-styled-table tr:hover .model-name {
|
| 348 |
+
color: var(--text-primary);
|
| 349 |
}
|
| 350 |
|
| 351 |
.numeric-cell {
|
| 352 |
font-family: 'Geist Mono', monospace;
|
| 353 |
font-size: 13px;
|
| 354 |
text-align: center;
|
| 355 |
+
color: var(--text-primary);
|
| 356 |
}
|
| 357 |
|
| 358 |
.highlight-header {
|
| 359 |
background: rgba(255, 210, 30, 0.14);
|
| 360 |
+
color: var(--text-primary);
|
| 361 |
}
|
| 362 |
|
| 363 |
.highlight-cell {
|
| 364 |
background: rgba(255, 210, 30, 0.08);
|
| 365 |
+
color: var(--text-primary);
|
| 366 |
font-weight: 600;
|
| 367 |
}
|
| 368 |
</style>
|
|
|
|
| 460 |
return f"""
|
| 461 |
<div class="domain-selector-container leaderboard-intro">
|
| 462 |
<div class="domain-header">
|
| 463 |
+
<h2 class="domain-title" style="color: var(--text-primary);">Agent Leaderboard ยท {level_title}</h2>
|
| 464 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">{level_description}</p>
|
| 465 |
</div>
|
| 466 |
<div class="dataframe-container">
|
| 467 |
"""
|
|
|
|
| 511 |
# Load initial data
|
| 512 |
initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
|
| 513 |
initial_df = load_leaderboard_data() # Load raw data for model selector
|
| 514 |
+
if not initial_df.empty:
|
| 515 |
+
overall_success_numeric = pd.to_numeric(initial_df.get('Overall Success'), errors='coerce')
|
| 516 |
+
if overall_success_numeric.notna().any():
|
| 517 |
+
initial_df = initial_df.assign(**{'Overall Success': overall_success_numeric}).sort_values(
|
| 518 |
+
'Overall Success', ascending=False, na_position='last'
|
| 519 |
+
)
|
| 520 |
+
else:
|
| 521 |
+
initial_df = initial_df.sort_values('Model')
|
| 522 |
initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
|
| 523 |
initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
|
| 524 |
initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
|
|
|
|
| 751 |
display: inline-block !important;
|
| 752 |
padding: 14px 28px !important;
|
| 753 |
background: #ffd21e !important;
|
| 754 |
+
color: var(--text-primary) !important;
|
| 755 |
text-decoration: none !important;
|
| 756 |
border-radius: 16px !important;
|
| 757 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
|
|
|
| 785 |
transform: translateY(-3px) !important;
|
| 786 |
box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
|
| 787 |
background: #ffd21e !important;
|
| 788 |
+
color: var(--text-primary) !important;
|
| 789 |
text-decoration: none !important;
|
| 790 |
text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
|
| 791 |
}
|
|
|
|
| 800 |
filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
|
| 801 |
}
|
| 802 |
|
| 803 |
+
.hero-banner-wrapper {
|
| 804 |
+
position: relative;
|
| 805 |
+
width: 100vw;
|
| 806 |
+
margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%);
|
| 807 |
border-radius: 0 !important;
|
| 808 |
overflow: hidden !important;
|
| 809 |
box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 810 |
}
|
| 811 |
+
|
| 812 |
+
.hero-banner-wrapper::before {
|
| 813 |
+
content: "";
|
| 814 |
+
position: absolute;
|
| 815 |
+
inset: 0;
|
| 816 |
+
background: #01091A;
|
| 817 |
+
z-index: 0;
|
| 818 |
+
}
|
| 819 |
+
|
| 820 |
+
#hero-banner {
|
| 821 |
+
position: relative;
|
| 822 |
+
width: 100% !important;
|
| 823 |
+
height: auto !important;
|
| 824 |
+
z-index: 1;
|
| 825 |
+
}
|
| 826 |
+
|
| 827 |
#hero-banner img {
|
| 828 |
+
width: 100% !important;
|
| 829 |
+
height: auto !important;
|
| 830 |
+
display: block !important;
|
| 831 |
+
object-fit: cover !important;
|
| 832 |
}
|
| 833 |
|
| 834 |
.hero-title {
|
|
|
|
| 839 |
-webkit-background-clip: text;
|
| 840 |
-webkit-text-fill-color: transparent;
|
| 841 |
margin-bottom: 1rem;
|
| 842 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 843 |
}
|
| 844 |
|
| 845 |
.hero-subtitle {
|
| 846 |
color: var(--text-secondary);
|
| 847 |
font-size: 3rem;
|
| 848 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 849 |
margin-top: 0;
|
| 850 |
}
|
| 851 |
|
|
|
|
| 929 |
color: var(--text-primary);
|
| 930 |
margin-bottom: 12px;
|
| 931 |
text-align: center !important;
|
| 932 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 933 |
}
|
| 934 |
|
| 935 |
.section-lead, .section-subtitle {
|
|
|
|
| 994 |
position: relative;
|
| 995 |
font-size: 1.2rem !important;
|
| 996 |
font-weight: 700;
|
| 997 |
+
color: var(--text-primary) !important;
|
| 998 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 999 |
}
|
| 1000 |
|
| 1001 |
/* ์ถ๊ฐ์ ์ธ ๊ตฌ์ฒด์ ์ ํ์ */
|
| 1002 |
.phase-card .phase-chart span {
|
| 1003 |
+
color: var(--text-primary) !important;
|
| 1004 |
text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8) !important;
|
| 1005 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 1006 |
}
|
| 1007 |
|
| 1008 |
.phase-grid .phase-chart span {
|
| 1009 |
+
color: var(--text-primary) !important;
|
| 1010 |
z-index: 10 !important;
|
| 1011 |
font-family: 'Nanum Gothic', sans-serif !important;
|
| 1012 |
}
|
|
|
|
| 1128 |
</style>
|
| 1129 |
""")
|
| 1130 |
|
| 1131 |
+
gr.HTML("<div class='hero-banner-wrapper'>")
|
| 1132 |
gr.Image(
|
| 1133 |
value="banner_wide.png",
|
| 1134 |
show_label=False,
|
|
|
|
| 1136 |
type="filepath",
|
| 1137 |
elem_id="hero-banner"
|
| 1138 |
)
|
| 1139 |
+
gr.HTML("</div>")
|
| 1140 |
|
| 1141 |
gr.HTML("""
|
| 1142 |
<div style="text-align: center; padding: 20px 0;">
|
|
|
|
| 1149 |
gr.HTML("""
|
| 1150 |
<div class="hero-actions">
|
| 1151 |
<a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1152 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1153 |
<path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
|
| 1154 |
<line x1="8" y1="12" x2="16" y2="12"/>
|
| 1155 |
</svg>
|
| 1156 |
<span>๋ธ๋ก๊ทธ</span>
|
| 1157 |
</a>
|
| 1158 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1159 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1160 |
<path d="M9 19c-5 1.5-5-2.5-7-3"/>
|
| 1161 |
<path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
|
| 1162 |
</svg>
|
| 1163 |
<span>GitHub</span>
|
| 1164 |
</a>
|
| 1165 |
<a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1166 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1167 |
<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
|
| 1168 |
<polyline points="7 10 12 15 17 10"/>
|
| 1169 |
<line x1="12" y1="15" x2="12" y2="3"/>
|
|
|
|
| 1171 |
<span>๋ฐ์ดํฐ์
</span>
|
| 1172 |
</a>
|
| 1173 |
<a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
|
| 1174 |
+
<svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1175 |
<path d="M3 3v18h18"/>
|
| 1176 |
<path d="M7 17v-6"/>
|
| 1177 |
<path d="M12 17V7"/>
|
|
|
|
| 1186 |
gr.HTML("""
|
| 1187 |
<div class="dashboard-section">
|
| 1188 |
<div class="section-header">
|
| 1189 |
+
<h2 class="section-title" style="font-family: 'Nanum Gothic', sans-serif; font-size: 2.5rem;">๋จ๊ณ๋ณ ํ์คํฌ ์ค๊ณ</h2>
|
| 1190 |
</div>
|
| 1191 |
<p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">๋จ์ ๋๊ตฌ ํธ์ถ๋ถํฐ ์ฅ๊ธฐ์ ๋งฅ๋ฝ ๋ฅ๋ ฅ, ๊ฐ๊ฑด์ฑ ์ฒ๋ฆฌ ๋ฅ๋ ฅ๊น์ง ์์ด์ ํธ์ ๋ฅ๋ ฅ์ 7๋จ๊ณ๋ก ์
์ฒด์ ์ผ๋ก ๋ถ์ํ์์ต๋๋ค.</p>
|
| 1192 |
<div class="phase-grid">
|
| 1193 |
<div class="phase-card">
|
| 1194 |
<h3>๋จ์ผ ํด</h3>
|
| 1195 |
<div class="phase-chart" style="--progress:80%;">
|
| 1196 |
+
<span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
|
| 1197 |
</div>
|
| 1198 |
<ul class="phase-list">
|
| 1199 |
+
<li style="color: var(--text-primary);">L1: ๋จ์ผ ๋๊ตฌ ํธ์ถ</li>
|
| 1200 |
+
<li style="color: var(--text-primary);">L2: ๋๊ตฌ ์ ํ</li>
|
| 1201 |
+
<li style="color: var(--text-primary);">L3: ๋๊ตฌ ์์ฐจ ์ถ๋ก </li>
|
| 1202 |
+
<li style="color: var(--text-primary);">L4: ๋๊ตฌ ๋ณ๋ ฌ ์ถ๋ก </li>
|
| 1203 |
+
<li style="color: var(--text-primary);">L5: ์ค๋ฅ ์ฒ๋ฆฌ์ ๊ฐ๊ฑด์ฑ</li>
|
| 1204 |
</ul>
|
| 1205 |
</div>
|
| 1206 |
<div class="phase-card">
|
| 1207 |
<h3>๋ค์ค ํด</h3>
|
| 1208 |
<div class="phase-chart" style="--progress:20%;">
|
| 1209 |
+
<span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
|
| 1210 |
</div>
|
| 1211 |
<ul class="phase-list">
|
| 1212 |
+
<li style="color: var(--text-primary);">L6: ํจ์จ์ ์ธ ๋๊ตฌ ํ์ฉ</li>
|
| 1213 |
+
<li style="color: var(--text-primary);">L7: ์ฅ๊ธฐ ์ปจํ
์คํธ ๊ธฐ์ต</li>
|
| 1214 |
</ul>
|
| 1215 |
</div>
|
| 1216 |
</div>
|
|
|
|
| 1224 |
<h2 class="section-title" style="font-size: 2.0rem;">18๊ฐ์ง ํ๊ตญํ API ์ฌ์ฉ ๋ฐ ์ค์ํ ํ๊ฒฝ์ ํนํ๋ ๊ณ ํ์ง ์๋๋ฆฌ์ค ๊ตฌ์ฑ</h2>
|
| 1225 |
</div>
|
| 1226 |
<div class="scenario-body">
|
| 1227 |
+
<p style="color: var(--text-primary);">๋ค์ด๋ฒ, ์นด์นด์ค ๋ฑ ๊ตญ๋ด ์ค์ฌ์ฉ API๋ฅผ ๊ธฐ๋ฐ์ผ๋ก, '์ฝ์ ์์ฝ', '๋ธ๋ก๊ทธ ํ๊ธฐ ๊ฒ์'์ฒ๋ผ ์ผ์์ ์ ์ฉํ ํ์ค์ ์ธ ๋ฌธ์ ํด๊ฒฐ ์๋๋ฆฌ์ค๋ฅผ ๊ตฌํํ์ต๋๋ค.</p>
|
| 1228 |
</div>
|
| 1229 |
|
| 1230 |
</div>
|
|
|
|
| 1377 |
filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
|
| 1378 |
letter-spacing: 0.02em;
|
| 1379 |
animation: title-shimmer 1.25s ease-in-out infinite;
|
| 1380 |
+
font-family: 'Nanum Gothic', sans-serif !important;
|
| 1381 |
}
|
| 1382 |
|
| 1383 |
@keyframes title-shimmer {
|
|
|
|
| 1669 |
border: 1px solid #333333 !important;
|
| 1670 |
border-radius: 999px !important;
|
| 1671 |
padding: 12px 24px !important;
|
| 1672 |
+
color: var(--text-primary) !important;
|
| 1673 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1674 |
font-weight: 600 !important;
|
| 1675 |
font-size: 1rem !important;
|
|
|
|
| 1700 |
background: #000000 !important;
|
| 1701 |
border: 1px solid #333333 !important;
|
| 1702 |
border-radius: 999px !important;
|
| 1703 |
+
color: var(--text-primary) !important;
|
| 1704 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1705 |
font-weight: 600 !important;
|
| 1706 |
font-size: 0.95rem !important;
|
|
|
|
| 1755 |
background: #ffd21e !important;
|
| 1756 |
border: 1px solid rgba(255, 210, 30, 0.6) !important;
|
| 1757 |
border-radius: 999px !important;
|
| 1758 |
+
color: var(--text-primary) !important;
|
| 1759 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 1760 |
font-weight: 600 !important;
|
| 1761 |
font-size: 0.95rem !important;
|
|
|
|
| 1802 |
font-size: 1.5rem;
|
| 1803 |
margin-bottom: 4px;
|
| 1804 |
display: block;
|
| 1805 |
+
filter: drop-shadow(0 0 10px white);
|
| 1806 |
}
|
| 1807 |
|
| 1808 |
.domain-name {
|
|
|
|
| 1817 |
top: 8px;
|
| 1818 |
right: 8px;
|
| 1819 |
background: var(--accent-primary);
|
| 1820 |
+
color: var(--text-primary);
|
| 1821 |
font-size: 0.75rem;
|
| 1822 |
padding: 2px 8px;
|
| 1823 |
border-radius: 12px;
|
|
|
|
| 2065 |
.inline-radio label[aria-checked="true"] {
|
| 2066 |
background: rgba(255, 210, 30, 0.2) !important;
|
| 2067 |
border-color: var(--accent-primary) !important;
|
| 2068 |
+
color: var(--text-primary) !important;
|
| 2069 |
font-weight: 600 !important;
|
| 2070 |
}
|
| 2071 |
</style>
|
|
|
|
| 2078 |
leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
|
| 2079 |
|
| 2080 |
# Integrated controls within leaderboard section - stacked vertically
|
| 2081 |
+
gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 5px 0; font-size: 1.2rem;'>ํ์คํฌ ๋ ๋ฒจ ์ ํ</p>")
|
| 2082 |
domain_filter = gr.Radio(
|
| 2083 |
choices=level_options,
|
| 2084 |
value=default_level,
|
|
|
|
| 2088 |
elem_classes=["domain-radio", "inline-radio"]
|
| 2089 |
)
|
| 2090 |
|
| 2091 |
+
gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 0px 0; font-size: 1.2rem;'>๐ ํํฐ ๋ฐ ์ ๋ ฌ</p>")
|
| 2092 |
with gr.Row():
|
| 2093 |
with gr.Column(scale=1):
|
| 2094 |
+
gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>๋ชจ๋ธ ์ ๊ทผ</span>")
|
| 2095 |
model_type_filter = gr.Radio(
|
| 2096 |
choices=["All", "OSS", "API"],
|
| 2097 |
value="All",
|
|
|
|
| 2100 |
container=False
|
| 2101 |
)
|
| 2102 |
with gr.Column(scale=1):
|
| 2103 |
+
gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>์ ๋ ฌ ์์</span>")
|
| 2104 |
sort_order = gr.Radio(
|
| 2105 |
choices=["Descending", "Ascending"],
|
| 2106 |
value="Descending",
|
|
|
|
| 2115 |
gr.HTML("""
|
| 2116 |
<div class="domain-selector-container domain-performance-container">
|
| 2117 |
<div class="domain-header">
|
| 2118 |
+
<h2 class="domain-title" style="color: var(--text-primary);">ํต์ฌ ์ญ๋ ๋ ์ด๋</h2>
|
| 2119 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">6๊ฐ์ง ํ์ ํต์ฌ ์์(์ฑ๊ณต, ์คํ, ์ถ๋ก , ๊ฐ๊ฑด์ฑ, ํจ์จ์ฑ, ํธ์ถ ์ ํจ์ฑ)๋ฅผ ์ถ์ ํฉ๋๋ค.</p>
|
| 2120 |
</div>
|
| 2121 |
""")
|
| 2122 |
|
| 2123 |
+
gr.HTML("<p style='color: var(--text-primary); margin: 10px 0 0 0; font-size: 1.2rem; font-family: \"Nanum Gothic\", sans-serif;'>๋น๊ตํ ๋ชจ๋ธ์ ์ ํํ์ธ์. ์ต๋ 5๊ฐ๊น์ง ๊ฐ๋ฅํฉ๋๋ค.</p>")
|
| 2124 |
# gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>๋ชจ๋ธ์ ์ต๋ 5๊ฐ๊น์ง ์ ํ ๊ฐ๋ฅ ํฉ๋๋ค.</p>")
|
| 2125 |
model_selector = gr.Dropdown(
|
| 2126 |
choices=initial_df['Model'].tolist()[:10],
|
|
|
|
| 2298 |
gr.HTML("""
|
| 2299 |
<div class="domain-selector-container performance-card-container">
|
| 2300 |
<div class="domain-header">
|
| 2301 |
+
<h2 class="domain-title" style="color: var(--text-primary);">๋ชจ๋ธ ์ฑ๋ฅ ์นด๋</h2>
|
| 2302 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">
|
| 2303 |
๋ชจ๋ธ์ ์ฑ๋ฅ ์คํํธ๋ผ์ 6๋ ํต์ฌ ์งํ์ L1~L7 ๋จ๊ณ๋ณ ์ข
ํฉ ์ฑ๊ณต๋ฅ (SR)๋ก ์๊ฐํํ ์ ๋ฐ ๋ถ์ ์นด๋๋ฅผ ํ์ธํด๋ณด์ธ์.
|
| 2304 |
</p>
|
| 2305 |
<p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
|
|
|
|
| 2312 |
|
| 2313 |
with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
|
| 2314 |
gr.HTML("""
|
| 2315 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">๋ถ์ ์นด๋๋ฅผ ์์ฑํ ๋ชจ๋ธ์ ์ ํํ์ธ์.</p>
|
| 2316 |
|
| 2317 |
""")
|
| 2318 |
card_model_selector = gr.Dropdown(
|
|
|
|
| 2349 |
gr.HTML("""
|
| 2350 |
<div class="domain-selector-container domain-performance-container level-metrics-wrapper">
|
| 2351 |
<div class="domain-header">
|
| 2352 |
+
<h2 class="domain-title" style="color: var(--text-primary);">๋ ๋ฒจ๋ณ ์์ธ ์งํ</h2>
|
| 2353 |
+
<p class="domain-subtitle" style="color: var(--text-primary);">๊ฐ Ko-AgentBench ๋จ๊ณ๋ณ ๊ณ ์ ํ๊ฐ ์งํ๋ฅผ ํตํด ๋ชจ๋ธ ์ ์๋ฅผ ๋น๊ตํ๊ณ ๋ ์์ธํ ์ดํด๋ณด์ธ์.</p>
|
| 2354 |
</div>
|
| 2355 |
""")
|
| 2356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2357 |
with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
|
| 2358 |
level_metric_selector = gr.Dropdown(
|
| 2359 |
choices=level_ids,
|
|
|
|
| 2389 |
# gr.HTML("""
|
| 2390 |
# <div class="domain-selector-container domain-performance-container heatmap-wrapper">
|
| 2391 |
# <div class="domain-header">
|
| 2392 |
+
# <h2 class="domain-title" style="color: var(--text-primary);">์ข
ํฉ ์ฑ๋ฅ ํํธ๋งต</h2>
|
| 2393 |
+
# <p class="domain-subtitle" style="color: var(--text-primary);">๊ฐ ๋ชจ๋ธ์ L1~L7 Ko-AgentBench SR(์ฑ๊ณต๋ฅ ) ์ ์๋ฅผ ํ๋์ ๋ณด์ธ์.</p>
|
| 2394 |
# </div>
|
| 2395 |
# <div class="chart-container heatmap-chart-container">
|
| 2396 |
# """)
|
|
|
|
| 2838 |
border: 1px solid #333333 !important;
|
| 2839 |
border-radius: 999px !important;
|
| 2840 |
padding: 12px 20px !important;
|
| 2841 |
+
color: var(--text-primary) !important;
|
| 2842 |
font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 2843 |
font-weight: 600 !important;
|
| 2844 |
font-size: 0.95rem !important;
|
|
|
|
| 2867 |
.level-model-dropdown button {
|
| 2868 |
background: #000000 !important;
|
| 2869 |
border: 1px solid #333333 !important;
|
| 2870 |
+
color: var(--text-primary) !important;
|
| 2871 |
}
|
| 2872 |
|
| 2873 |
.radar-placeholder {
|
|
|
|
| 3045 |
h2.section-title,
|
| 3046 |
.dashboard-section .section-title,
|
| 3047 |
.section-header .section-title {
|
| 3048 |
+
font-family: "Nanum Gothic", sans-serif !important;
|
| 3049 |
}
|
| 3050 |
|
| 3051 |
.domain-title,
|
| 3052 |
h2.domain-title,
|
| 3053 |
.domain-header .domain-title {
|
| 3054 |
+
font-family: "Nanum Gothic", sans-serif !important;
|
| 3055 |
}
|
| 3056 |
|
| 3057 |
.hero-title,
|
| 3058 |
.hero-subtitle,
|
| 3059 |
h1.hero-title,
|
| 3060 |
p.hero-subtitle {
|
| 3061 |
+
font-family: "Nanum Gothic", sans-serif !important;
|
| 3062 |
font-size: 2rem; !important;
|
| 3063 |
}
|
| 3064 |
|
|
|
|
| 3252 |
palette = [
|
| 3253 |
{'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
|
| 3254 |
{'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
|
| 3255 |
+
{'fill': 'rgba(161, 98, 7, 0.22)', 'line': '#A16207'},
|
| 3256 |
+
{'fill': 'rgba(220, 38, 38, 0.20)', 'line': '#DC2626'},
|
| 3257 |
{'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
|
| 3258 |
]
|
| 3259 |
|
|
|
|
| 3378 |
width=900,
|
| 3379 |
margin=dict(t=30, b=50, l=10, r=10),
|
| 3380 |
autosize=True,
|
| 3381 |
+
annotations=[]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3382 |
)
|
| 3383 |
|
| 3384 |
return fig
|
|
|
|
| 3637 |
model_palette = [
|
| 3638 |
'#ffd21e',
|
| 3639 |
'#FF8A3C',
|
| 3640 |
+
'#A16207',
|
| 3641 |
+
'#DC2626',
|
| 3642 |
'#F8FAFC',
|
| 3643 |
'#38BDF8',
|
| 3644 |
]
|
utils.py
CHANGED
|
@@ -9,8 +9,8 @@ def get_chart_colors():
|
|
| 9 |
# "grid": (1, 1, 1, 0.1), # RGBA tuple for grid
|
| 10 |
# }
|
| 11 |
return {
|
| 12 |
-
"Private": "#
|
| 13 |
-
"Open source": "#
|
| 14 |
"performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
|
| 15 |
"text": "#111827",
|
| 16 |
"background": "#FFFFFF",
|
|
@@ -20,10 +20,12 @@ def get_chart_colors():
|
|
| 20 |
|
| 21 |
def get_rank_badge(rank):
|
| 22 |
"""Generate HTML for rank badge with appropriate styling"""
|
|
|
|
|
|
|
| 23 |
badge_styles = {
|
| 24 |
-
1: ("1st",
|
| 25 |
-
2: ("2nd",
|
| 26 |
-
3: ("3rd",
|
| 27 |
}
|
| 28 |
|
| 29 |
if rank in badge_styles:
|
|
@@ -63,24 +65,25 @@ def get_type_badge(model_type):
|
|
| 63 |
"""Generate HTML for model type badge"""
|
| 64 |
colors = get_chart_colors()
|
| 65 |
color_map = {
|
| 66 |
-
"Open source": colors.get("Open source", "#
|
| 67 |
-
"Proprietary": colors.get("Private", "#
|
| 68 |
-
"Private": colors.get("Private", "#
|
| 69 |
}
|
| 70 |
label_map = {
|
| 71 |
"Open source": "OSS",
|
| 72 |
"Proprietary": "API",
|
| 73 |
"Private": "API",
|
| 74 |
}
|
| 75 |
-
bg_color = color_map.get(model_type, "#
|
| 76 |
display_label = label_map.get(model_type, model_type)
|
|
|
|
| 77 |
return f"""
|
| 78 |
<div style="
|
| 79 |
display: inline-flex;
|
| 80 |
align-items: center;
|
| 81 |
padding: 4px 8px;
|
| 82 |
background: {bg_color};
|
| 83 |
-
color:
|
| 84 |
border-radius: 4px;
|
| 85 |
font-size: 0.85em;
|
| 86 |
font-weight: 500;
|
|
|
|
| 9 |
# "grid": (1, 1, 1, 0.1), # RGBA tuple for grid
|
| 10 |
# }
|
| 11 |
return {
|
| 12 |
+
"Private": "#593B1D", # rich brown for API
|
| 13 |
+
"Open source": "#FACC15", # warm amber for OSS
|
| 14 |
"performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
|
| 15 |
"text": "#111827",
|
| 16 |
"background": "#FFFFFF",
|
|
|
|
| 20 |
|
| 21 |
def get_rank_badge(rank):
|
| 22 |
"""Generate HTML for rank badge with appropriate styling"""
|
| 23 |
+
tag_background = "#593B1D"
|
| 24 |
+
tag_text_color = "#FFFFFF"
|
| 25 |
badge_styles = {
|
| 26 |
+
1: ("1st", tag_background, tag_text_color),
|
| 27 |
+
2: ("2nd", tag_background, tag_text_color),
|
| 28 |
+
3: ("3rd", tag_background, tag_text_color),
|
| 29 |
}
|
| 30 |
|
| 31 |
if rank in badge_styles:
|
|
|
|
| 65 |
"""Generate HTML for model type badge"""
|
| 66 |
colors = get_chart_colors()
|
| 67 |
color_map = {
|
| 68 |
+
"Open source": colors.get("Open source", "#FACC15"),
|
| 69 |
+
"Proprietary": colors.get("Private", "#593B1D"),
|
| 70 |
+
"Private": colors.get("Private", "#593B1D"),
|
| 71 |
}
|
| 72 |
label_map = {
|
| 73 |
"Open source": "OSS",
|
| 74 |
"Proprietary": "API",
|
| 75 |
"Private": "API",
|
| 76 |
}
|
| 77 |
+
bg_color = color_map.get(model_type, "#593B1D")
|
| 78 |
display_label = label_map.get(model_type, model_type)
|
| 79 |
+
text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
|
| 80 |
return f"""
|
| 81 |
<div style="
|
| 82 |
display: inline-flex;
|
| 83 |
align-items: center;
|
| 84 |
padding: 4px 8px;
|
| 85 |
background: {bg_color};
|
| 86 |
+
color: {text_color};
|
| 87 |
border-radius: 4px;
|
| 88 |
font-size: 0.85em;
|
| 89 |
font-weight: 500;
|