Harheem Kim commited on
Commit
c6e563a
ยท
1 Parent(s): a9d36f6

colors, fonts, dropdown problems

Browse files
banner_background_capture.png ADDED

Git LFS Details

  • SHA256: d7f9f7750d7da8252b7d31d45360b232f0216042fe888ef790fedc82603f4e37
  • Pointer size: 131 Bytes
  • Size of remote file: 964 kB
components/leaderboard_components.py CHANGED
@@ -5,8 +5,8 @@ These are stable components that don't change frequently
5
 
6
  def get_chart_colors():
7
  return {
8
- "Private": "#1098F7", # Airglow Blue for Proprietary
9
- "Open source": "#58BC82", # Green for Open source
10
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
11
  "text": "white",
12
  "background": "#01091A",
@@ -16,10 +16,12 @@ def get_chart_colors():
16
 
17
  def get_rank_badge(rank):
18
  """Generate HTML for rank badge with appropriate styling"""
 
 
19
  badge_styles = {
20
- 1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
21
- 2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
22
- 3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
23
  }
24
 
25
  if rank in badge_styles:
@@ -59,24 +61,25 @@ def get_type_badge(model_type):
59
  """Generate HTML for model type badge"""
60
  colors = get_chart_colors()
61
  color_map = {
62
- "Open source": colors.get("Open source", "#58BC82"),
63
- "Proprietary": colors.get("Private", "#1098F7"),
64
- "Private": colors.get("Private", "#1098F7"),
65
  }
66
  label_map = {
67
  "Open source": "OSS",
68
  "Proprietary": "API",
69
  "Private": "API",
70
  }
71
- bg_color = color_map.get(model_type, "#4F46E5")
72
  display_label = label_map.get(model_type, model_type)
 
73
  return f"""
74
  <div style="
75
  display: inline-flex;
76
  align-items: center;
77
  padding: 4px 8px;
78
  background: {bg_color};
79
- color: white;
80
  border-radius: 4px;
81
  font-size: 0.85em;
82
  font-weight: 500;
 
5
 
6
  def get_chart_colors():
7
  return {
8
+ "Private": "#593B1D", # Rich brown for API
9
+ "Open source": "#FACC15", # Warm amber for OSS
10
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
11
  "text": "white",
12
  "background": "#01091A",
 
16
 
17
  def get_rank_badge(rank):
18
  """Generate HTML for rank badge with appropriate styling"""
19
+ tag_background = "#593B1D"
20
+ tag_text_color = "#FFFFFF"
21
  badge_styles = {
22
+ 1: ("1st", tag_background, tag_text_color),
23
+ 2: ("2nd", tag_background, tag_text_color),
24
+ 3: ("3rd", tag_background, tag_text_color),
25
  }
26
 
27
  if rank in badge_styles:
 
61
  """Generate HTML for model type badge"""
62
  colors = get_chart_colors()
63
  color_map = {
64
+ "Open source": colors.get("Open source", "#FACC15"),
65
+ "Proprietary": colors.get("Private", "#593B1D"),
66
+ "Private": colors.get("Private", "#593B1D"),
67
  }
68
  label_map = {
69
  "Open source": "OSS",
70
  "Proprietary": "API",
71
  "Private": "API",
72
  }
73
+ bg_color = color_map.get(model_type, "#593B1D")
74
  display_label = label_map.get(model_type, model_type)
75
+ text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
76
  return f"""
77
  <div style="
78
  display: inline-flex;
79
  align-items: center;
80
  padding: 4px 8px;
81
  background: {bg_color};
82
+ color: {text_color};
83
  border-radius: 4px;
84
  font-size: 0.85em;
85
  font-weight: 500;
styles/leaderboard_styles.py CHANGED
@@ -34,9 +34,9 @@ def get_leaderboard_css():
34
  --border-subtle: rgba(245, 246, 247, 0.08);
35
  --border-default: rgba(245, 246, 247, 0.12);
36
  --border-strong: rgba(245, 246, 247, 0.2);
37
- --text-primary: #F5F6F7;
38
- --text-secondary: #94A3B8;
39
- --text-muted: #64748B;
40
  --accent-primary: #ffd21e;
41
  --accent-secondary: #1098F7;
42
  --accent-tertiary: #F5F6F7;
@@ -44,12 +44,38 @@ def get_leaderboard_css():
44
  --glow-secondary: rgba(16, 152, 247, 0.4);
45
  --glow-tertiary: rgba(245, 246, 247, 0.3);
46
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  /* Global font and background */
49
- .gradio-container {
50
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
51
  background: var(--bg-primary) !important;
52
- color: var(--text-primary) !important;
53
  }
54
 
55
  /* Headers and text */
@@ -60,18 +86,15 @@ def get_leaderboard_css():
60
  }
61
 
62
  p, span, div, li, ul li {
63
- color: white !important;
64
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
65
  }
66
 
67
  /* Labels and info text */
68
  label {
69
- color: white !important;
70
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
71
  }
72
 
73
  .gr-box label {
74
- color: white !important;
75
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
76
  }
77
 
@@ -158,7 +181,7 @@ def get_leaderboard_css():
158
 
159
  /* Radio button labels */
160
  input[type="radio"] + label {
161
- color: white !important;
162
  }
163
 
164
  input[type="radio"]:checked {
@@ -171,26 +194,22 @@ def get_leaderboard_css():
171
  .dropdown {
172
  border-color: var(--border-default) !important;
173
  background: var(--bg-card) !important;
174
- color: white !important;
175
  transition: all 0.2s ease !important;
176
  }
177
 
178
  /* Dropdown option styling */
179
  .dropdown option {
180
  background: var(--bg-card) !important;
181
- color: white !important;
182
  }
183
 
184
  /* Gradio dropdown specific styling */
185
  .gradio-dropdown select,
186
  .gradio-dropdown [role="combobox"],
187
  .gradio-dropdown input {
188
- color: white !important;
189
  background: var(--bg-card) !important;
190
  }
191
 
192
  .gradio-dropdown option {
193
- color: white !important;
194
  background: var(--bg-card) !important;
195
  }
196
 
@@ -210,19 +229,16 @@ def get_leaderboard_css():
210
  overflow-y: auto !important;
211
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
212
  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
213
- color: white !important;
214
  }
215
 
216
  /* Table cells and headers */
217
  .dataframe td,
218
  .dataframe th {
219
- color: white !important;
220
  }
221
 
222
  /* Button styling */
223
  button {
224
  background: var(--bg-card) !important;
225
- color: white !important;
226
  border: 1px solid var(--border-default) !important;
227
  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
228
  }
@@ -363,7 +379,7 @@ def get_leaderboard_css():
363
  display: inline-block !important;
364
  padding: 14px 28px !important;
365
  background: #ffd21e !important;
366
- color: #FFFFFF !important;
367
  text-decoration: none !important;
368
  border-radius: 16px !important;
369
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -382,7 +398,7 @@ def get_leaderboard_css():
382
  transform: translateY(-3px) !important;
383
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
384
  background: #ffd21e !important;
385
- color: #FFFFFF !important;
386
  text-decoration: none !important;
387
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
388
  }
@@ -424,24 +440,46 @@ def get_leaderboard_css():
424
  border-color: #ffd21e !important;
425
  box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
426
  text-decoration: none !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  color: #FFFFFF !important;
 
 
 
 
 
428
  }
429
 
430
  /* Numeric content styling */
431
  .numeric-cell, .metric-value, .rank-value,
432
  .level-tile-score, .core-metric-card .metric-value {
433
- color: white !important;
434
  font-family: 'Geist Mono', monospace !important;
435
  }
436
 
437
  /* Table content */
438
  td, th, table * {
439
- color: white !important;
440
  }
441
 
442
  /* All numeric and data elements */
443
  .performance-card *, .v2-styled-table *, .dataframe * {
444
- color: white !important;
445
  }
446
 
447
  /* Enhanced dropdown styling - more specific selectors
@@ -454,20 +492,18 @@ def get_leaderboard_css():
454
  .model-dropdown [role="combobox"],
455
  .model-dropdown button {
456
  background: rgba(1, 9, 26, 0.95) !important;
457
- color: white !important;
458
  border: 1px solid var(--border-default) !important;
459
  border-radius: 8px !important;
460
  }
461
-
462
  .gradio-dropdown option,
463
  .model-dropdown option {
464
  background: rgba(1, 9, 26, 0.95) !important;
465
- color: white !important;
466
  }
467
 
468
  /* Force dropdown text color */
469
  /* .gradio-dropdown *, .model-dropdown * {
470
- color: white !important;
471
  } */
472
 
473
  /* Gradio 5.x compatible dropdown styling */
@@ -475,22 +511,31 @@ def get_leaderboard_css():
475
  .gradio-container [data-testid="dropdown"],
476
  .gradio-container select {
477
  background-color: rgba(1, 9, 26, 0.95) !important;
478
- color: white !important;
479
  border: 1px solid rgba(245, 246, 247, 0.12) !important;
480
  }
481
-
482
  .gradio-container .gradio-dropdown option,
483
  .gradio-container select option {
484
  background-color: rgba(1, 9, 26, 0.95) !important;
485
- color: white !important;
486
  }
487
-
488
  /* Target the actual visible text in dropdown */
489
  .gradio-container [role="combobox"],
490
  .gradio-container .gradio-dropdown .wrap > div {
491
- color: white !important;
492
  background-color: rgba(1, 9, 26, 0.95) !important;
493
  }
494
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  </style>
496
  """
 
34
  --border-subtle: rgba(245, 246, 247, 0.08);
35
  --border-default: rgba(245, 246, 247, 0.12);
36
  --border-strong: rgba(245, 246, 247, 0.2);
37
+ --text-primary: #FFFFFF;
38
+ --text-secondary: #E2E8F0;
39
+ --text-muted: #94A3B8;
40
  --accent-primary: #ffd21e;
41
  --accent-secondary: #1098F7;
42
  --accent-tertiary: #F5F6F7;
 
44
  --glow-secondary: rgba(16, 152, 247, 0.4);
45
  --glow-tertiary: rgba(245, 246, 247, 0.3);
46
  }
47
+
48
+ html.light,
49
+ html.light body,
50
+ html.light .gradio-container {
51
+ --bg-primary: #F8FAFC;
52
+ --bg-secondary: rgba(15, 23, 42, 0.06);
53
+ --bg-card: rgba(255, 255, 255, 0.92);
54
+ --border-subtle: rgba(15, 23, 42, 0.08);
55
+ --border-default: rgba(15, 23, 42, 0.12);
56
+ --border-strong: rgba(15, 23, 42, 0.18);
57
+ --text-primary: #0B1120;
58
+ --text-secondary: #1E293B;
59
+ --text-muted: #475569;
60
+ --accent-primary: #F59E0B;
61
+ --accent-secondary: #2563EB;
62
+ --accent-tertiary: #111827;
63
+ --glow-primary: rgba(245, 158, 11, 0.25);
64
+ --glow-secondary: rgba(37, 99, 235, 0.2);
65
+ --glow-tertiary: rgba(15, 23, 42, 0.18);
66
+ }
67
+
68
+ html.light [style*="color: white"],
69
+ html.light [style*="color:white"],
70
+ html.light [style*="#FFFFFF"],
71
+ html.light [style*="#ffffff"] {
72
+ color: var(--text-primary) !important;
73
+ }
74
 
75
  /* Global font and background */
76
+ html, body, .gradio-container {
77
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Inter', sans-serif !important;
78
  background: var(--bg-primary) !important;
 
79
  }
80
 
81
  /* Headers and text */
 
86
  }
87
 
88
  p, span, div, li, ul li {
 
89
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
90
  }
91
 
92
  /* Labels and info text */
93
  label {
 
94
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
95
  }
96
 
97
  .gr-box label {
 
98
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
99
  }
100
 
 
181
 
182
  /* Radio button labels */
183
  input[type="radio"] + label {
184
+ color: var(--text-primary) !important;
185
  }
186
 
187
  input[type="radio"]:checked {
 
194
  .dropdown {
195
  border-color: var(--border-default) !important;
196
  background: var(--bg-card) !important;
 
197
  transition: all 0.2s ease !important;
198
  }
199
 
200
  /* Dropdown option styling */
201
  .dropdown option {
202
  background: var(--bg-card) !important;
 
203
  }
204
 
205
  /* Gradio dropdown specific styling */
206
  .gradio-dropdown select,
207
  .gradio-dropdown [role="combobox"],
208
  .gradio-dropdown input {
 
209
  background: var(--bg-card) !important;
210
  }
211
 
212
  .gradio-dropdown option {
 
213
  background: var(--bg-card) !important;
214
  }
215
 
 
229
  overflow-y: auto !important;
230
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
231
  box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3) !important;
 
232
  }
233
 
234
  /* Table cells and headers */
235
  .dataframe td,
236
  .dataframe th {
 
237
  }
238
 
239
  /* Button styling */
240
  button {
241
  background: var(--bg-card) !important;
 
242
  border: 1px solid var(--border-default) !important;
243
  transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
244
  }
 
379
  display: inline-block !important;
380
  padding: 14px 28px !important;
381
  background: #ffd21e !important;
382
+ color: var(--text-primary) !important;
383
  text-decoration: none !important;
384
  border-radius: 16px !important;
385
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
 
398
  transform: translateY(-3px) !important;
399
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
400
  background: #ffd21e !important;
401
+ color: var(--text-primary) !important;
402
  text-decoration: none !important;
403
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
404
  }
 
440
  border-color: #ffd21e !important;
441
  box-shadow: 0 8px 24px rgba(255, 210, 30, 0.3), 0 4px 12px rgba(0, 0, 0, 0.4) !important;
442
  text-decoration: none !important;
443
+ color: var(--text-primary) !important;
444
+ }
445
+
446
+ /* Ensure key hero/body text stays bright */
447
+ .hero-subtitle,
448
+ .section-lead,
449
+ .section-subtitle,
450
+ .criteria-card li,
451
+ .scenario-body,
452
+ .hero-action-button,
453
+ .hero-action-button span {
454
+ color: #FFFFFF !important;
455
+ }
456
+
457
+ /* Language toggle button */
458
+ #lang-toggle-btn button,
459
+ #lang-toggle-btn {
460
  color: #FFFFFF !important;
461
+ border-color: #ffd21e !important;
462
+ }
463
+
464
+ .hero-action-button {
465
+ border-color: #ffd21e !important;
466
  }
467
 
468
  /* Numeric content styling */
469
  .numeric-cell, .metric-value, .rank-value,
470
  .level-tile-score, .core-metric-card .metric-value {
471
+ color: var(--text-primary) !important;
472
  font-family: 'Geist Mono', monospace !important;
473
  }
474
 
475
  /* Table content */
476
  td, th, table * {
477
+ color: var(--text-primary) !important;
478
  }
479
 
480
  /* All numeric and data elements */
481
  .performance-card *, .v2-styled-table *, .dataframe * {
482
+ color: var(--text-primary) !important;
483
  }
484
 
485
  /* Enhanced dropdown styling - more specific selectors
 
492
  .model-dropdown [role="combobox"],
493
  .model-dropdown button {
494
  background: rgba(1, 9, 26, 0.95) !important;
 
495
  border: 1px solid var(--border-default) !important;
496
  border-radius: 8px !important;
497
  }
498
+
499
  .gradio-dropdown option,
500
  .model-dropdown option {
501
  background: rgba(1, 9, 26, 0.95) !important;
 
502
  }
503
 
504
  /* Force dropdown text color */
505
  /* .gradio-dropdown *, .model-dropdown * {
506
+ color: var(--text-primary) !important;
507
  } */
508
 
509
  /* Gradio 5.x compatible dropdown styling */
 
511
  .gradio-container [data-testid="dropdown"],
512
  .gradio-container select {
513
  background-color: rgba(1, 9, 26, 0.95) !important;
 
514
  border: 1px solid rgba(245, 246, 247, 0.12) !important;
515
  }
516
+
517
  .gradio-container .gradio-dropdown option,
518
  .gradio-container select option {
519
  background-color: rgba(1, 9, 26, 0.95) !important;
 
520
  }
521
+
522
  /* Target the actual visible text in dropdown */
523
  .gradio-container [role="combobox"],
524
  .gradio-container .gradio-dropdown .wrap > div {
 
525
  background-color: rgba(1, 9, 26, 0.95) !important;
526
  }
527
 
528
+ html.light .model-dropdown .gradio-dropdown,
529
+ html.light .model-dropdown [role="combobox"],
530
+ html.light .model-dropdown button,
531
+ html.light .gradio-container [data-testid="dropdown"],
532
+ html.light .gradio-container select,
533
+ html.light .gradio-container [role="combobox"],
534
+ html.light .gradio-container .gradio-dropdown .wrap > div {
535
+ background-color: rgba(255, 255, 255, 0.95) !important;
536
+ border-color: rgba(15, 23, 42, 0.12) !important;
537
+ box-shadow: 0 8px 20px rgba(15, 23, 42, 0.08) !important;
538
+ }
539
+
540
  </style>
541
  """
tabs/leaderboard_v1_en.py CHANGED
@@ -224,36 +224,36 @@ def create_leaderboard_v2_tab():
224
  # Level metadata for the 7-stage task framework
225
  level_details = {
226
  "ALL": {
227
- "title": "<span style='font-family: \"Gowun Dodum\", sans-serif !important;'>ALL ยท All Tasks</span>",
228
- "description": "<span style='font-family: \"Nanum Gothic\", sans-serif !important;'>See average performance across all seven tasks and use it as a baseline for per-level comparison.</span>"
229
  },
230
  "L1": {
231
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L1 ยท Single Tool Call</span>",
232
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Evaluates single tool invocation capability and basic command execution accuracy.</span>"
233
  },
234
  "L2": {
235
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L2 ยท Tool Selection</span>",
236
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Measures the ability to choose the right tool and invoke it with appropriate parameters.</span>"
237
  },
238
  "L3": {
239
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L3 ยท Sequential Tool Reasoning</span>",
240
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Validates multi-step sequential reasoning for solving tasks.</span>"
241
  },
242
  "L4": {
243
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L4 ยท Parallel Tool Reasoning</span>",
244
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Evaluates the ability to integrate and summarize information from multiple sources in parallel.</span>"
245
  },
246
  "L5": {
247
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L5 ยท Error Handling & Robustness</span>",
248
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Checks awareness of unexpected failures and the strategies used to recover.</span>"
249
  },
250
  "L6": {
251
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L6 ยท Efficient Tool Utilization</span>",
252
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Examines operational efficiency in achieving goals with minimal calls and cost.</span>"
253
  },
254
  "L7": {
255
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L7 ยท Long-Context Memory</span>",
256
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>Analyzes the ability to retain and leverage long conversational context.</span>"
257
  }
258
  }
259
  default_level = "ALL"
@@ -291,7 +291,7 @@ def create_leaderboard_v2_tab():
291
  border-collapse: collapse;
292
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
293
  background: var(--bg-card);
294
- color: white;
295
  }
296
 
297
  .v2-styled-table thead {
@@ -305,7 +305,7 @@ def create_leaderboard_v2_tab():
305
  padding: 14px 12px;
306
  text-align: left;
307
  font-weight: 600;
308
- color: white;
309
  border-bottom: 2px solid var(--accent-primary);
310
  font-size: 13px;
311
  text-transform: uppercase;
@@ -319,7 +319,7 @@ def create_leaderboard_v2_tab():
319
  .v2-styled-table td {
320
  padding: 12px;
321
  border-bottom: 1px solid var(--border-subtle);
322
- color: white;
323
  transition: all 0.2s ease;
324
  }
325
 
@@ -339,30 +339,30 @@ def create_leaderboard_v2_tab():
339
 
340
  .model-name {
341
  font-weight: 500;
342
- color: white;
343
  transition: color 0.2s ease;
344
  }
345
 
346
  /* Keep model name color consistent on hover to emphasize row highlight */
347
  .v2-styled-table tr:hover .model-name {
348
- color: white;
349
  }
350
 
351
  .numeric-cell {
352
  font-family: 'Geist Mono', monospace;
353
  font-size: 13px;
354
  text-align: center;
355
- color: white;
356
  }
357
 
358
  .highlight-header {
359
  background: rgba(255, 210, 30, 0.14);
360
- color: white;
361
  }
362
 
363
  .highlight-cell {
364
  background: rgba(255, 210, 30, 0.08);
365
- color: white;
366
  font-weight: 600;
367
  }
368
  </style>
@@ -460,8 +460,8 @@ def create_leaderboard_v2_tab():
460
  return f"""
461
  <div class="domain-selector-container leaderboard-intro">
462
  <div class="domain-header">
463
- <h2 class="domain-title" style="color: white;">Agent Leaderboard ยท {level_title}</h2>
464
- <p class="domain-subtitle" style="color: white;">{level_description}</p>
465
  </div>
466
  <div class="dataframe-container">
467
  """
@@ -511,6 +511,14 @@ def create_leaderboard_v2_tab():
511
  # Load initial data
512
  initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
513
  initial_df = load_leaderboard_data() # Load raw data for model selector
 
 
 
 
 
 
 
 
514
  initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
515
  initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
516
  initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
@@ -601,7 +609,7 @@ def create_leaderboard_v2_tab():
601
  border-collapse: collapse;
602
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
603
  background: var(--bg-card);
604
- color: var(--text-primary);
605
  }
606
 
607
  .v2-styled-table thead {
@@ -615,7 +623,7 @@ def create_leaderboard_v2_tab():
615
  padding: 14px 12px;
616
  text-align: left;
617
  font-weight: 600;
618
- color: var(--text-primary);
619
  border-bottom: 2px solid var(--accent-primary);
620
  font-size: 14px;
621
  text-transform: uppercase;
@@ -626,7 +634,7 @@ def create_leaderboard_v2_tab():
626
  .v2-styled-table td {
627
  padding: 12px;
628
  border-bottom: 1px solid var(--border-subtle);
629
- color: var(--text-primary);
630
  font-size: 14px;
631
  transition: all 0.2s ease;
632
  }
@@ -792,25 +800,35 @@ def create_leaderboard_v2_tab():
792
  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
793
  }
794
 
795
- #hero-banner {
796
- width: 100vw !important;
797
- margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%) !important;
 
798
  border-radius: 0 !important;
799
  overflow: hidden !important;
800
  box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
801
- position: relative !important;
802
- left: 50% !important;
803
- right: 50% !important;
804
- margin-left: -50vw !important;
805
- margin-right: -50vw !important;
806
- max-width: none !important;
807
  }
808
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  #hero-banner img {
810
- width: 100%;
811
- height: auto;
812
- display: block;
813
- object-fit: cover;
814
  }
815
 
816
  .hero-title {
@@ -821,13 +839,13 @@ def create_leaderboard_v2_tab():
821
  -webkit-background-clip: text;
822
  -webkit-text-fill-color: transparent;
823
  margin-bottom: 1rem;
824
- font-family: 'Do Hyeon', sans-serif !important;
825
  }
826
 
827
  .hero-subtitle {
828
  color: var(--text-secondary);
829
  font-size: 3rem;
830
- font-family: 'Do Hyeon', sans-serif !important;
831
  margin-top: 0;
832
  }
833
 
@@ -849,7 +867,7 @@ def create_leaderboard_v2_tab():
849
  background: rgba(245, 246, 247, 0.06) !important;
850
  border: 1px solid var(--border-subtle) !important;
851
  border-radius: 999px !important;
852
- color: var(--text-primary) !important;
853
  text-decoration: none !important;
854
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
855
  font-weight: 600 !important;
@@ -908,10 +926,10 @@ def create_leaderboard_v2_tab():
908
  .section-title {
909
  font-size: 3.75rem;
910
  font-weight: 700;
911
- color: var(--text-primary);
912
  margin-bottom: 12px;
913
  text-align: center !important;
914
- font-family: 'Gowun Dodum', sans-serif !important;
915
  }
916
 
917
  .section-lead, .section-subtitle {
@@ -943,7 +961,7 @@ def create_leaderboard_v2_tab():
943
 
944
  .phase-card h3 {
945
  font-size: 1.44rem !important;
946
- color: var(--text-primary);
947
  margin-bottom: 20px;
948
  font-weight: 700;
949
  font-family: 'Nanum Gothic', sans-serif !important;
@@ -976,7 +994,7 @@ def create_leaderboard_v2_tab():
976
  position: relative;
977
  font-size: 1.2rem !important;
978
  font-weight: 700;
979
- color: white !important;
980
  font-family: 'Nanum Gothic', sans-serif !important;
981
  }
982
 
@@ -1054,7 +1072,7 @@ def create_leaderboard_v2_tab():
1054
  .criteria-card h3 {
1055
  font-size: 1.25rem;
1056
  font-weight: 700;
1057
- color: var(--text-primary);
1058
  margin: 0;
1059
  }
1060
 
@@ -1110,6 +1128,7 @@ def create_leaderboard_v2_tab():
1110
  </style>
1111
  """)
1112
 
 
1113
  gr.Image(
1114
  value="banner_wide.png",
1115
  show_label=False,
@@ -1117,6 +1136,7 @@ def create_leaderboard_v2_tab():
1117
  type="filepath",
1118
  elem_id="hero-banner"
1119
  )
 
1120
 
1121
  gr.HTML("""
1122
  <div style="text-align: center; padding: 20px 0;">
@@ -1129,21 +1149,21 @@ def create_leaderboard_v2_tab():
1129
  gr.HTML("""
1130
  <div class="hero-actions">
1131
  <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1132
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1133
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1134
  <line x1="8" y1="12" x2="16" y2="12"/>
1135
  </svg>
1136
  <span>Blog</span>
1137
  </a>
1138
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1139
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1140
  <path d="M9 19c-5 1.5-5-2.5-7-3"/>
1141
  <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
1142
  </svg>
1143
  <span>GitHub</span>
1144
  </a>
1145
  <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1146
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1147
  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
1148
  <polyline points="7 10 12 15 17 10"/>
1149
  <line x1="12" y1="15" x2="12" y2="3"/>
@@ -1151,7 +1171,7 @@ def create_leaderboard_v2_tab():
1151
  <span>Dataset</span>
1152
  </a>
1153
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1154
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1155
  <path d="M3 3v18h18"/>
1156
  <path d="M7 17v-6"/>
1157
  <path d="M12 17V7"/>
@@ -1166,7 +1186,7 @@ def create_leaderboard_v2_tab():
1166
  gr.HTML("""
1167
  <div class="dashboard-section">
1168
  <div class="section-header">
1169
- <h2 class="section-title" style="font-family: 'Gowun Dodum', sans-serif; font-size: 2.5rem;">7-Level Task Design</h2>
1170
  </div>
1171
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">We analyzed agent capabilities across seven stagesโ€”from simple tool calls to long-context retention and robustness.</p>
1172
  <div class="phase-grid">
@@ -1176,11 +1196,11 @@ def create_leaderboard_v2_tab():
1176
  <span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
1177
  </div>
1178
  <ul class="phase-list">
1179
- <li style="color: white;">L1: Single Tool Call</li>
1180
- <li style="color: white;">L2: Tool Selection</li>
1181
- <li style="color: white;">L3: Sequential Tool Reasoning</li>
1182
- <li style="color: white;">L4: Parallel Tool Reasoning</li>
1183
- <li style="color: white;">L5: Error Handling & Robustness</li>
1184
  </ul>
1185
  </div>
1186
  <div class="phase-card">
@@ -1189,8 +1209,8 @@ def create_leaderboard_v2_tab():
1189
  <span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
1190
  </div>
1191
  <ul class="phase-list">
1192
- <li style="color: white;">L6: Efficient Tool Utilization</li>
1193
- <li style="color: white;">L7: Long-Context Memory</li>
1194
  </ul>
1195
  </div>
1196
  </div>
@@ -1204,7 +1224,7 @@ def create_leaderboard_v2_tab():
1204
  <h2 class="section-title" style="font-size: 2.0rem;">High-quality scenario design tailored to 18 Korea-specific APIs and real-world use cases.</h2>
1205
  </div>
1206
  <div class="scenario-body">
1207
- <p>We built realistic scenariosโ€”such as appointment booking and blog review searchโ€”by integrating APIs widely used in Korea including Naver Maps, Kakao services, and local websites.</p>
1208
  </div>
1209
 
1210
  </div>
@@ -1357,7 +1377,7 @@ def create_leaderboard_v2_tab():
1357
  filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
1358
  letter-spacing: 0.02em;
1359
  animation: title-shimmer 1.25s ease-in-out infinite;
1360
- font-family: 'Gowun Dodum', sans-serif !important;
1361
  }
1362
 
1363
  @keyframes title-shimmer {
@@ -1497,7 +1517,7 @@ def create_leaderboard_v2_tab():
1497
  .filter-group .gr-input-label {
1498
  font-size: 1rem !important;
1499
  font-weight: 600 !important;
1500
- color: var(--text-primary) !important;
1501
  text-align: center !important;
1502
  margin-bottom: 12px !important;
1503
  }
@@ -1505,7 +1525,7 @@ def create_leaderboard_v2_tab():
1505
  .filter-group-label {
1506
  font-size: 1rem !important;
1507
  font-weight: 600 !important;
1508
- color: var(--text-primary) !important;
1509
  text-align: left !important;
1510
  margin: 0 !important;
1511
  font-family: 'Geist', sans-serif !important;
@@ -1536,7 +1556,7 @@ def create_leaderboard_v2_tab():
1536
  text-align: center !important;
1537
  position: relative !important;
1538
  overflow: hidden !important;
1539
- color: var(--text-primary) !important;
1540
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1541
  font-weight: 600 !important;
1542
  font-size: 0.95rem !important;
@@ -1649,7 +1669,7 @@ def create_leaderboard_v2_tab():
1649
  border: 1px solid #333333 !important;
1650
  border-radius: 999px !important;
1651
  padding: 12px 24px !important;
1652
- color: #ffffff !important;
1653
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1654
  font-weight: 600 !important;
1655
  font-size: 1rem !important;
@@ -1680,7 +1700,7 @@ def create_leaderboard_v2_tab():
1680
  background: #000000 !important;
1681
  border: 1px solid #333333 !important;
1682
  border-radius: 999px !important;
1683
- color: #ffffff !important;
1684
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1685
  font-weight: 600 !important;
1686
  font-size: 0.95rem !important;
@@ -1707,7 +1727,7 @@ def create_leaderboard_v2_tab():
1707
  .model-dropdown .tag {
1708
  background: rgba(255, 210, 30, 0.18) !important;
1709
  border: 1px solid rgba(255, 210, 30, 0.35) !important;
1710
- color: var(--text-primary) !important;
1711
  border-radius: 999px !important;
1712
  padding: 4px 10px !important;
1713
  font-size: 0.85rem !important;
@@ -1782,7 +1802,7 @@ def create_leaderboard_v2_tab():
1782
  font-size: 1.5rem;
1783
  margin-bottom: 4px;
1784
  display: block;
1785
- filter: drop-shadow(0 0 10px currentColor);
1786
  }
1787
 
1788
  .domain-name {
@@ -1797,7 +1817,7 @@ def create_leaderboard_v2_tab():
1797
  top: 8px;
1798
  right: 8px;
1799
  background: var(--accent-primary);
1800
- color: white;
1801
  font-size: 0.75rem;
1802
  padding: 2px 8px;
1803
  border-radius: 12px;
@@ -1888,7 +1908,7 @@ def create_leaderboard_v2_tab():
1888
  font-size: 0.85rem !important;
1889
  margin-bottom: 8px !important;
1890
  font-weight: 600 !important;
1891
- color: var(--text-primary) !important;
1892
  display: block !important;
1893
  }
1894
 
@@ -1921,7 +1941,7 @@ def create_leaderboard_v2_tab():
1921
  .compact-radio .wrap > label:has(input[type="radio"]:checked) {
1922
  background: transparent !important;
1923
  border-color: var(--accent-primary) !important;
1924
- color: var(--text-primary) !important;
1925
  font-weight: 600 !important;
1926
  }
1927
 
@@ -1942,7 +1962,7 @@ def create_leaderboard_v2_tab():
1942
  .domain-radio label[aria-checked="true"] {
1943
  background: transparent !important;
1944
  border-color: var(--accent-primary) !important;
1945
- color: var(--text-primary) !important;
1946
  font-weight: 600 !important;
1947
  }
1948
 
@@ -2031,7 +2051,7 @@ def create_leaderboard_v2_tab():
2031
  border: 1px solid var(--border-subtle) !important;
2032
  border-radius: 20px !important;
2033
  font-size: 0.85rem !important;
2034
- color: var(--text-primary) !important;
2035
  transition: all 0.2s ease !important;
2036
  cursor: pointer !important;
2037
  }
@@ -2045,7 +2065,7 @@ def create_leaderboard_v2_tab():
2045
  .inline-radio label[aria-checked="true"] {
2046
  background: rgba(255, 210, 30, 0.2) !important;
2047
  border-color: var(--accent-primary) !important;
2048
- color: white !important;
2049
  font-weight: 600 !important;
2050
  }
2051
  </style>
@@ -2058,7 +2078,7 @@ def create_leaderboard_v2_tab():
2058
  leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
2059
 
2060
  # Integrated controls within leaderboard section - stacked vertically
2061
- gr.HTML("<p style='color: white; margin: 5px 0 5px 0; font-size: 1.2rem;'>Select Task Level</p>")
2062
  domain_filter = gr.Radio(
2063
  choices=level_options,
2064
  value=default_level,
@@ -2068,10 +2088,10 @@ def create_leaderboard_v2_tab():
2068
  elem_classes=["domain-radio", "inline-radio"]
2069
  )
2070
 
2071
- gr.HTML("<p style='color: white; margin: 5px 0 0px 0; font-size: 1.2rem;'>๐Ÿ” Filters & Sorting</p>")
2072
  with gr.Row():
2073
  with gr.Column(scale=1):
2074
- gr.HTML("<span style='color: white; font-size: 1.2rem; margin-bottom: 5px; display: block;'>Model Access</span>")
2075
  model_type_filter = gr.Radio(
2076
  choices=["All", "OSS", "API"],
2077
  value="All",
@@ -2080,7 +2100,7 @@ def create_leaderboard_v2_tab():
2080
  container=False
2081
  )
2082
  with gr.Column(scale=1):
2083
- gr.HTML("<span style='color: white; font-size: 1.2rem; margin-bottom: 5px; display: block;'>Sort Order</span>")
2084
  sort_order = gr.Radio(
2085
  choices=["Descending", "Ascending"],
2086
  value="Descending",
@@ -2095,12 +2115,12 @@ def create_leaderboard_v2_tab():
2095
  gr.HTML("""
2096
  <div class="domain-selector-container domain-performance-container">
2097
  <div class="domain-header">
2098
- <h2 class="domain-title" style="color: white;">Core Capability Radar</h2>
2099
- <p class="domain-subtitle" style="color: white;">Track six essential axes: success, execution, reasoning, robustness, efficiency, and call validity.</p>
2100
  </div>
2101
  """)
2102
 
2103
- gr.HTML("<p style='color: white; margin: 10px 0 0 0; font-size: 1.2rem; font-family: \"Nanum Gothic\", sans-serif;'>Select models to compare (up to 5).</p>")
2104
  # gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>You can select up to five models.</p>")
2105
  model_selector = gr.Dropdown(
2106
  choices=initial_df['Model'].tolist()[:10],
@@ -2278,8 +2298,8 @@ def create_leaderboard_v2_tab():
2278
  gr.HTML("""
2279
  <div class="domain-selector-container performance-card-container">
2280
  <div class="domain-header">
2281
- <h2 class="domain-title" style="color: white;">Model Performance Card</h2>
2282
- <p class="domain-subtitle" style="color: white;">
2283
  Explore detailed performance cards that visualize six core metrics plus overall SR across L1โ€“L7 levels.
2284
  </p>
2285
  <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
@@ -2292,7 +2312,7 @@ def create_leaderboard_v2_tab():
2292
 
2293
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
2294
  gr.HTML("""
2295
- <p class="domain-subtitle" style="color: white;">Choose a model to generate its analysis card.</p>
2296
 
2297
  """)
2298
  card_model_selector = gr.Dropdown(
@@ -2329,15 +2349,11 @@ def create_leaderboard_v2_tab():
2329
  gr.HTML("""
2330
  <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2331
  <div class="domain-header">
2332
- <h2 class="domain-title" style="color: white;">Level-specific Metrics</h2>
2333
- <p class="domain-subtitle" style="color: white;">Compare model scores with each Ko-AgentBench level's dedicated metrics for deeper insights.</p>
2334
  </div>
2335
  """)
2336
 
2337
- gr.HTML("""
2338
- <p style="color: white; text-align: center; margin: 0 0 20px 0; font-size: 1.2rem; font-family: \'Nanum Gothic\', sans-serif;">Select a level and up to five models to explore detailed metrics.</p>
2339
- """)
2340
-
2341
  with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2342
  level_metric_selector = gr.Dropdown(
2343
  choices=level_ids,
@@ -2373,8 +2389,8 @@ def create_leaderboard_v2_tab():
2373
  # gr.HTML("""
2374
  # <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2375
  # <div class="domain-header">
2376
- # <h2 class="domain-title" style="color: white;">Comprehensive Performance Heatmap</h2>
2377
- # <p class="domain-subtitle" style="color: white;">See each model's L1โ€“L7 SR scores at a glance.</p>
2378
  # </div>
2379
  # <div class="chart-container heatmap-chart-container">
2380
  # """)
@@ -2665,7 +2681,7 @@ def create_leaderboard_v2_tab():
2665
  font-size: 1.9rem;
2666
  font-weight: 800;
2667
  letter-spacing: 0.01em;
2668
- color: var(--text-primary);
2669
  }
2670
 
2671
  .meta-line {
@@ -2677,7 +2693,7 @@ def create_leaderboard_v2_tab():
2677
  }
2678
 
2679
  .meta-line span {
2680
- color: var(--text-primary);
2681
  font-weight: 600;
2682
  }
2683
 
@@ -2717,7 +2733,7 @@ def create_leaderboard_v2_tab():
2717
  .rank-value {
2718
  font-size: 2.4rem;
2719
  font-weight: 800;
2720
- color: var(--text-primary);
2721
  letter-spacing: 0.04em;
2722
  }
2723
 
@@ -2822,7 +2838,7 @@ def create_leaderboard_v2_tab():
2822
  border: 1px solid #333333 !important;
2823
  border-radius: 999px !important;
2824
  padding: 12px 20px !important;
2825
- color: #ffffff !important;
2826
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
2827
  font-weight: 600 !important;
2828
  font-size: 0.95rem !important;
@@ -2851,7 +2867,7 @@ def create_leaderboard_v2_tab():
2851
  .level-model-dropdown button {
2852
  background: #000000 !important;
2853
  border: 1px solid #333333 !important;
2854
- color: #ffffff !important;
2855
  }
2856
 
2857
  .radar-placeholder {
@@ -2912,7 +2928,7 @@ def create_leaderboard_v2_tab():
2912
  .core-metric-card .metric-value {
2913
  font-size: 1.8rem;
2914
  font-weight: 700;
2915
- color: var(--text-primary);
2916
  font-family: 'Geist Mono', monospace;
2917
  }
2918
 
@@ -2945,7 +2961,7 @@ def create_leaderboard_v2_tab():
2945
  .level-tile-score {
2946
  font-size: 1.25rem;
2947
  font-weight: 700;
2948
- color: var(--text-primary);
2949
  font-family: 'Geist Mono', monospace;
2950
  }
2951
  @media (max-width: 980px) {
@@ -3029,20 +3045,20 @@ def create_leaderboard_v2_tab():
3029
  h2.section-title,
3030
  .dashboard-section .section-title,
3031
  .section-header .section-title {
3032
- font-family: "Gowun Dodum", sans-serif !important;
3033
  }
3034
 
3035
  .domain-title,
3036
  h2.domain-title,
3037
  .domain-header .domain-title {
3038
- font-family: "Gowun Dodum", sans-serif !important;
3039
  }
3040
 
3041
  .hero-title,
3042
  .hero-subtitle,
3043
  h1.hero-title,
3044
  p.hero-subtitle {
3045
- font-family: "Do Hyeon", sans-serif !important;
3046
  font-size: 2rem; !important;
3047
  }
3048
 
@@ -3236,8 +3252,8 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3236
  palette = [
3237
  {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
3238
  {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
3239
- {'fill': 'rgba(249, 112, 185, 0.22)', 'line': '#F970B9'},
3240
- {'fill': 'rgba(139, 92, 246, 0.20)', 'line': '#8B5CF6'},
3241
  {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
3242
  ]
3243
 
@@ -3362,16 +3378,7 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3362
  width=900,
3363
  margin=dict(t=30, b=50, l=10, r=10),
3364
  autosize=True,
3365
- annotations=[
3366
- dict(
3367
- text="Galileo Agent Leaderboard",
3368
- xref="paper", yref="paper",
3369
- x=0.98, y=0.02,
3370
- xanchor='right', yanchor='bottom',
3371
- font=dict(size=10, color='#64748B'),
3372
- showarrow=False
3373
- )
3374
- ]
3375
  )
3376
 
3377
  return fig
@@ -3630,8 +3637,8 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
3630
  model_palette = [
3631
  '#ffd21e',
3632
  '#FF8A3C',
3633
- '#F970B9',
3634
- '#8B5CF6',
3635
  '#F8FAFC',
3636
  '#38BDF8',
3637
  ]
 
224
  # Level metadata for the 7-stage task framework
225
  level_details = {
226
  "ALL": {
227
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>ALL ยท All Tasks</span>",
228
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>First, observe the overall average performance across all seven tasks. This average should then be utilized as a baseline to conduct a more detailed per-level comparison.</span>"
229
  },
230
  "L1": {
231
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L1 ยท Single Tool Call</span>",
232
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Evaluates single tool invocation capability and basic command execution accuracy.</span>"
233
  },
234
  "L2": {
235
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L2 ยท Tool Selection</span>",
236
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Measures the ability to choose the right tool and invoke it with appropriate parameters.</span>"
237
  },
238
  "L3": {
239
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L3 ยท Sequential Tool Reasoning</span>",
240
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Validates multi-step sequential reasoning for solving tasks.</span>"
241
  },
242
  "L4": {
243
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L4 ยท Parallel Tool Reasoning</span>",
244
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Evaluates the ability to integrate and summarize information from multiple sources in parallel.</span>"
245
  },
246
  "L5": {
247
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L5 ยท Error Handling & Robustness</span>",
248
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Checks awareness of unexpected failures and the strategies used to recover.</span>"
249
  },
250
  "L6": {
251
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L6 ยท Efficient Tool Utilization</span>",
252
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Examines operational efficiency in achieving goals with minimal calls and cost.</span>"
253
  },
254
  "L7": {
255
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L7 ยท Long-Context Memory</span>",
256
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>Analyzes the ability to retain and leverage long conversational context.</span>"
257
  }
258
  }
259
  default_level = "ALL"
 
291
  border-collapse: collapse;
292
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
293
  background: var(--bg-card);
294
+ color: #FFFFFF;
295
  }
296
 
297
  .v2-styled-table thead {
 
305
  padding: 14px 12px;
306
  text-align: left;
307
  font-weight: 600;
308
+ color: #FFFFFF;
309
  border-bottom: 2px solid var(--accent-primary);
310
  font-size: 13px;
311
  text-transform: uppercase;
 
319
  .v2-styled-table td {
320
  padding: 12px;
321
  border-bottom: 1px solid var(--border-subtle);
322
+ color: #FFFFFF;
323
  transition: all 0.2s ease;
324
  }
325
 
 
339
 
340
  .model-name {
341
  font-weight: 500;
342
+ color: #FFFFFF;
343
  transition: color 0.2s ease;
344
  }
345
 
346
  /* Keep model name color consistent on hover to emphasize row highlight */
347
  .v2-styled-table tr:hover .model-name {
348
+ color: #FFFFFF;
349
  }
350
 
351
  .numeric-cell {
352
  font-family: 'Geist Mono', monospace;
353
  font-size: 13px;
354
  text-align: center;
355
+ color: #FFFFFF;
356
  }
357
 
358
  .highlight-header {
359
  background: rgba(255, 210, 30, 0.14);
360
+ color: #FFFFFF;
361
  }
362
 
363
  .highlight-cell {
364
  background: rgba(255, 210, 30, 0.08);
365
+ color: #FFFFFF;
366
  font-weight: 600;
367
  }
368
  </style>
 
460
  return f"""
461
  <div class="domain-selector-container leaderboard-intro">
462
  <div class="domain-header">
463
+ <h2 class="domain-title" >Agent Leaderboard ยท {level_title}</h2>
464
+ <p class="domain-subtitle" >{level_description}</p>
465
  </div>
466
  <div class="dataframe-container">
467
  """
 
511
  # Load initial data
512
  initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
513
  initial_df = load_leaderboard_data() # Load raw data for model selector
514
+ if not initial_df.empty:
515
+ overall_success_numeric = pd.to_numeric(initial_df.get('Overall Success'), errors='coerce')
516
+ if overall_success_numeric.notna().any():
517
+ initial_df = initial_df.assign(**{'Overall Success': overall_success_numeric}).sort_values(
518
+ 'Overall Success', ascending=False, na_position='last'
519
+ )
520
+ else:
521
+ initial_df = initial_df.sort_values('Model')
522
  initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
523
  initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
524
  initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
 
609
  border-collapse: collapse;
610
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
611
  background: var(--bg-card);
612
+ color: #FFFFFF;
613
  }
614
 
615
  .v2-styled-table thead {
 
623
  padding: 14px 12px;
624
  text-align: left;
625
  font-weight: 600;
626
+ color: #FFFFFF;
627
  border-bottom: 2px solid var(--accent-primary);
628
  font-size: 14px;
629
  text-transform: uppercase;
 
634
  .v2-styled-table td {
635
  padding: 12px;
636
  border-bottom: 1px solid var(--border-subtle);
637
+ color: #FFFFFF;
638
  font-size: 14px;
639
  transition: all 0.2s ease;
640
  }
 
800
  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
801
  }
802
 
803
+ .hero-banner-wrapper {
804
+ position: relative;
805
+ width: 100vw;
806
+ margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%);
807
  border-radius: 0 !important;
808
  overflow: hidden !important;
809
  box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
 
 
 
 
 
 
810
  }
811
+
812
+ .hero-banner-wrapper::before {
813
+ content: "";
814
+ position: absolute;
815
+ inset: 0;
816
+ background: #01091A;
817
+ z-index: 0;
818
+ }
819
+
820
+ #hero-banner {
821
+ position: relative;
822
+ width: 100% !important;
823
+ height: auto !important;
824
+ z-index: 1;
825
+ }
826
+
827
  #hero-banner img {
828
+ width: 100% !important;
829
+ height: auto !important;
830
+ display: block !important;
831
+ object-fit: cover !important;
832
  }
833
 
834
  .hero-title {
 
839
  -webkit-background-clip: text;
840
  -webkit-text-fill-color: transparent;
841
  margin-bottom: 1rem;
842
+ font-family: 'Nanum Gothic', sans-serif !important;
843
  }
844
 
845
  .hero-subtitle {
846
  color: var(--text-secondary);
847
  font-size: 3rem;
848
+ font-family: 'Nanum Gothic', sans-serif !important;
849
  margin-top: 0;
850
  }
851
 
 
867
  background: rgba(245, 246, 247, 0.06) !important;
868
  border: 1px solid var(--border-subtle) !important;
869
  border-radius: 999px !important;
870
+ color: #FFFFFF !important;
871
  text-decoration: none !important;
872
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
873
  font-weight: 600 !important;
 
926
  .section-title {
927
  font-size: 3.75rem;
928
  font-weight: 700;
929
+ color: #FFFFFF;
930
  margin-bottom: 12px;
931
  text-align: center !important;
932
+ font-family: 'Nanum Gothic', sans-serif !important;
933
  }
934
 
935
  .section-lead, .section-subtitle {
 
961
 
962
  .phase-card h3 {
963
  font-size: 1.44rem !important;
964
+ color: #FFFFFF;
965
  margin-bottom: 20px;
966
  font-weight: 700;
967
  font-family: 'Nanum Gothic', sans-serif !important;
 
994
  position: relative;
995
  font-size: 1.2rem !important;
996
  font-weight: 700;
997
+ color: #FFFFFF !important;
998
  font-family: 'Nanum Gothic', sans-serif !important;
999
  }
1000
 
 
1072
  .criteria-card h3 {
1073
  font-size: 1.25rem;
1074
  font-weight: 700;
1075
+ color: #FFFFFF;
1076
  margin: 0;
1077
  }
1078
 
 
1128
  </style>
1129
  """)
1130
 
1131
+ gr.HTML("<div class='hero-banner-wrapper'>")
1132
  gr.Image(
1133
  value="banner_wide.png",
1134
  show_label=False,
 
1136
  type="filepath",
1137
  elem_id="hero-banner"
1138
  )
1139
+ gr.HTML("</div>")
1140
 
1141
  gr.HTML("""
1142
  <div style="text-align: center; padding: 20px 0;">
 
1149
  gr.HTML("""
1150
  <div class="hero-actions">
1151
  <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1152
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1153
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1154
  <line x1="8" y1="12" x2="16" y2="12"/>
1155
  </svg>
1156
  <span>Blog</span>
1157
  </a>
1158
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1159
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1160
  <path d="M9 19c-5 1.5-5-2.5-7-3"/>
1161
  <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
1162
  </svg>
1163
  <span>GitHub</span>
1164
  </a>
1165
  <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1166
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1167
  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
1168
  <polyline points="7 10 12 15 17 10"/>
1169
  <line x1="12" y1="15" x2="12" y2="3"/>
 
1171
  <span>Dataset</span>
1172
  </a>
1173
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1174
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1175
  <path d="M3 3v18h18"/>
1176
  <path d="M7 17v-6"/>
1177
  <path d="M12 17V7"/>
 
1186
  gr.HTML("""
1187
  <div class="dashboard-section">
1188
  <div class="section-header">
1189
+ <h2 class="section-title" style="font-family: 'Nanum Gothic', sans-serif; font-size: 2.5rem;">7-Level Task Design</h2>
1190
  </div>
1191
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">We analyzed agent capabilities across seven stagesโ€”from simple tool calls to long-context retention and robustness.</p>
1192
  <div class="phase-grid">
 
1196
  <span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
1197
  </div>
1198
  <ul class="phase-list">
1199
+ <li style="color: #FFFFFF;">L1: Single Tool Call</li>
1200
+ <li style="color: #FFFFFF;">L2: Tool Selection</li>
1201
+ <li style="color: #FFFFFF;">L3: Sequential Tool Reasoning</li>
1202
+ <li style="color: #FFFFFF;">L4: Parallel Tool Reasoning</li>
1203
+ <li style="color: #FFFFFF;">L5: Error Handling & Robustness</li>
1204
  </ul>
1205
  </div>
1206
  <div class="phase-card">
 
1209
  <span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
1210
  </div>
1211
  <ul class="phase-list">
1212
+ <li style="color: #FFFFFF;">L6: Efficient Tool Utilization</li>
1213
+ <li style="color: #FFFFFF;">L7: Long-Context Memory</li>
1214
  </ul>
1215
  </div>
1216
  </div>
 
1224
  <h2 class="section-title" style="font-size: 2.0rem;">High-quality scenario design tailored to 18 Korea-specific APIs and real-world use cases.</h2>
1225
  </div>
1226
  <div class="scenario-body">
1227
+ <p style="color: var(--text-primary);">We built realistic scenariosโ€”such as appointment booking and blog review searchโ€”by integrating APIs widely used in Korea including Naver Maps, Kakao services, and local websites.</p>
1228
  </div>
1229
 
1230
  </div>
 
1377
  filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
1378
  letter-spacing: 0.02em;
1379
  animation: title-shimmer 1.25s ease-in-out infinite;
1380
+ font-family: 'Nanum Gothic', sans-serif !important;
1381
  }
1382
 
1383
  @keyframes title-shimmer {
 
1517
  .filter-group .gr-input-label {
1518
  font-size: 1rem !important;
1519
  font-weight: 600 !important;
1520
+ color: #FFFFFF !important;
1521
  text-align: center !important;
1522
  margin-bottom: 12px !important;
1523
  }
 
1525
  .filter-group-label {
1526
  font-size: 1rem !important;
1527
  font-weight: 600 !important;
1528
+ color: #FFFFFF !important;
1529
  text-align: left !important;
1530
  margin: 0 !important;
1531
  font-family: 'Geist', sans-serif !important;
 
1556
  text-align: center !important;
1557
  position: relative !important;
1558
  overflow: hidden !important;
1559
+ color: #FFFFFF !important;
1560
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1561
  font-weight: 600 !important;
1562
  font-size: 0.95rem !important;
 
1669
  border: 1px solid #333333 !important;
1670
  border-radius: 999px !important;
1671
  padding: 12px 24px !important;
1672
+ color: #FFFFFF !important;
1673
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1674
  font-weight: 600 !important;
1675
  font-size: 1rem !important;
 
1700
  background: #000000 !important;
1701
  border: 1px solid #333333 !important;
1702
  border-radius: 999px !important;
1703
+ color: #FFFFFF !important;
1704
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1705
  font-weight: 600 !important;
1706
  font-size: 0.95rem !important;
 
1727
  .model-dropdown .tag {
1728
  background: rgba(255, 210, 30, 0.18) !important;
1729
  border: 1px solid rgba(255, 210, 30, 0.35) !important;
1730
+ color: #FFFFFF !important;
1731
  border-radius: 999px !important;
1732
  padding: 4px 10px !important;
1733
  font-size: 0.85rem !important;
 
1802
  font-size: 1.5rem;
1803
  margin-bottom: 4px;
1804
  display: block;
1805
+ filter: drop-shadow(0 0 10px white);
1806
  }
1807
 
1808
  .domain-name {
 
1817
  top: 8px;
1818
  right: 8px;
1819
  background: var(--accent-primary);
1820
+ color: #FFFFFF;
1821
  font-size: 0.75rem;
1822
  padding: 2px 8px;
1823
  border-radius: 12px;
 
1908
  font-size: 0.85rem !important;
1909
  margin-bottom: 8px !important;
1910
  font-weight: 600 !important;
1911
+ color: #FFFFFF !important;
1912
  display: block !important;
1913
  }
1914
 
 
1941
  .compact-radio .wrap > label:has(input[type="radio"]:checked) {
1942
  background: transparent !important;
1943
  border-color: var(--accent-primary) !important;
1944
+ color: #FFFFFF !important;
1945
  font-weight: 600 !important;
1946
  }
1947
 
 
1962
  .domain-radio label[aria-checked="true"] {
1963
  background: transparent !important;
1964
  border-color: var(--accent-primary) !important;
1965
+ color: #FFFFFF !important;
1966
  font-weight: 600 !important;
1967
  }
1968
 
 
2051
  border: 1px solid var(--border-subtle) !important;
2052
  border-radius: 20px !important;
2053
  font-size: 0.85rem !important;
2054
+ color: #FFFFFF !important;
2055
  transition: all 0.2s ease !important;
2056
  cursor: pointer !important;
2057
  }
 
2065
  .inline-radio label[aria-checked="true"] {
2066
  background: rgba(255, 210, 30, 0.2) !important;
2067
  border-color: var(--accent-primary) !important;
2068
+ color: #FFFFFF !important;
2069
  font-weight: 600 !important;
2070
  }
2071
  </style>
 
2078
  leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
2079
 
2080
  # Integrated controls within leaderboard section - stacked vertically
2081
+ gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 5px 0; font-size: 1.2rem;'>Select Task Level</p>")
2082
  domain_filter = gr.Radio(
2083
  choices=level_options,
2084
  value=default_level,
 
2088
  elem_classes=["domain-radio", "inline-radio"]
2089
  )
2090
 
2091
+ gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 0px 0; font-size: 1.2rem;'>๐Ÿ” Filters & Sorting</p>")
2092
  with gr.Row():
2093
  with gr.Column(scale=1):
2094
+ gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>Model Access</span>")
2095
  model_type_filter = gr.Radio(
2096
  choices=["All", "OSS", "API"],
2097
  value="All",
 
2100
  container=False
2101
  )
2102
  with gr.Column(scale=1):
2103
+ gr.HTML("<span style='color: var(--text-primary);>Sort Order</span>")
2104
  sort_order = gr.Radio(
2105
  choices=["Descending", "Ascending"],
2106
  value="Descending",
 
2115
  gr.HTML("""
2116
  <div class="domain-selector-container domain-performance-container">
2117
  <div class="domain-header">
2118
+ <h2 class="domain-title" >Core Capability Radar</h2>
2119
+ <p class="domain-subtitle" style="color: var(--text-primary);">Track six essential axes: <br>success, execution, reasoning, robustness, efficiency, and call validity.</p>
2120
  </div>
2121
  """)
2122
 
2123
+ gr.HTML("<p >Select models to compare (up to 5).</p>")
2124
  # gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>You can select up to five models.</p>")
2125
  model_selector = gr.Dropdown(
2126
  choices=initial_df['Model'].tolist()[:10],
 
2298
  gr.HTML("""
2299
  <div class="domain-selector-container performance-card-container">
2300
  <div class="domain-header">
2301
+ <h2 class="domain-title" >Model Performance Card</h2>
2302
+ <p class="domain-subtitle" style="color: var(--text-primary);">
2303
  Explore detailed performance cards that visualize six core metrics plus overall SR across L1โ€“L7 levels.
2304
  </p>
2305
  <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
 
2312
 
2313
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
2314
  gr.HTML("""
2315
+ <p class="domain-subtitle" style="color: var(--text-primary);">Choose a model to generate its analysis card.</p>
2316
 
2317
  """)
2318
  card_model_selector = gr.Dropdown(
 
2349
  gr.HTML("""
2350
  <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2351
  <div class="domain-header">
2352
+ <h2 class="domain-title" >Level-specific Metrics</h2>
2353
+ <p class="domain-subtitle" style="color: var(--text-primary);">Compare model scores with each Ko-AgentBench level's dedicated metrics for deeper insights.</p>
2354
  </div>
2355
  """)
2356
 
 
 
 
 
2357
  with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2358
  level_metric_selector = gr.Dropdown(
2359
  choices=level_ids,
 
2389
  # gr.HTML("""
2390
  # <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2391
  # <div class="domain-header">
2392
+ # <h2 class="domain-title" >Comprehensive Performance Heatmap</h2>
2393
+ # <p class="domain-subtitle" >See each model's L1โ€“L7 SR scores at a glance.</p>
2394
  # </div>
2395
  # <div class="chart-container heatmap-chart-container">
2396
  # """)
 
2681
  font-size: 1.9rem;
2682
  font-weight: 800;
2683
  letter-spacing: 0.01em;
2684
+ color: #FFFFFF;
2685
  }
2686
 
2687
  .meta-line {
 
2693
  }
2694
 
2695
  .meta-line span {
2696
+ color: #FFFFFF;
2697
  font-weight: 600;
2698
  }
2699
 
 
2733
  .rank-value {
2734
  font-size: 2.4rem;
2735
  font-weight: 800;
2736
+ color: #FFFFFF;
2737
  letter-spacing: 0.04em;
2738
  }
2739
 
 
2838
  border: 1px solid #333333 !important;
2839
  border-radius: 999px !important;
2840
  padding: 12px 20px !important;
2841
+ color: #FFFFFF !important;
2842
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
2843
  font-weight: 600 !important;
2844
  font-size: 0.95rem !important;
 
2867
  .level-model-dropdown button {
2868
  background: #000000 !important;
2869
  border: 1px solid #333333 !important;
2870
+ color: #FFFFFF !important;
2871
  }
2872
 
2873
  .radar-placeholder {
 
2928
  .core-metric-card .metric-value {
2929
  font-size: 1.8rem;
2930
  font-weight: 700;
2931
+ color: #FFFFFF;
2932
  font-family: 'Geist Mono', monospace;
2933
  }
2934
 
 
2961
  .level-tile-score {
2962
  font-size: 1.25rem;
2963
  font-weight: 700;
2964
+ color: #FFFFFF;
2965
  font-family: 'Geist Mono', monospace;
2966
  }
2967
  @media (max-width: 980px) {
 
3045
  h2.section-title,
3046
  .dashboard-section .section-title,
3047
  .section-header .section-title {
3048
+ font-family: "Nanum Gothic", sans-serif !important;
3049
  }
3050
 
3051
  .domain-title,
3052
  h2.domain-title,
3053
  .domain-header .domain-title {
3054
+ font-family: "Nanum Gothic", sans-serif !important;
3055
  }
3056
 
3057
  .hero-title,
3058
  .hero-subtitle,
3059
  h1.hero-title,
3060
  p.hero-subtitle {
3061
+ font-family: "Nanum Gothic", sans-serif !important;
3062
  font-size: 2rem; !important;
3063
  }
3064
 
 
3252
  palette = [
3253
  {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
3254
  {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
3255
+ {'fill': 'rgba(161, 98, 7, 0.22)', 'line': '#A16207'},
3256
+ {'fill': 'rgba(220, 38, 38, 0.20)', 'line': '#DC2626'},
3257
  {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
3258
  ]
3259
 
 
3378
  width=900,
3379
  margin=dict(t=30, b=50, l=10, r=10),
3380
  autosize=True,
3381
+ annotations=[]
 
 
 
 
 
 
 
 
 
3382
  )
3383
 
3384
  return fig
 
3637
  model_palette = [
3638
  '#ffd21e',
3639
  '#FF8A3C',
3640
+ '#A16207',
3641
+ '#DC2626',
3642
  '#F8FAFC',
3643
  '#38BDF8',
3644
  ]
tabs/leaderboard_v1_kr.py CHANGED
@@ -224,36 +224,36 @@ def create_leaderboard_v2_tab():
224
  # Level metadata for the 7-stage task framework
225
  level_details = {
226
  "ALL": {
227
- "title": "<span style='font-family: \"Gowun Dodum\", sans-serif !important;'>ALL ยท ์ „์ฒด ํƒœ์Šคํฌ</span>",
228
- "description": "<span style='font-family: \"Nanum Gothic\", sans-serif !important;'>7๊ฐœ์˜ ํƒœ์Šคํฌ ์ „๋ฐ˜์˜ ํ‰๊ท  ์„ฑ๋Šฅ์„ ํ•œ๋ˆˆ์— ์‚ดํŽด๋ณด๊ณ  ๊ฐ ๋ ˆ๋ฒจ ๋น„๊ต๋ฅผ ์œ„ํ•œ ๊ธฐ์ค€์ ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.</span>"
229
  },
230
  "L1": {
231
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L1 ยท ๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ</span>",
232
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ ๋Šฅ๋ ฅ๊ณผ ๊ธฐ๋ณธ์ ์ธ ๋ช…๋ น ์ˆ˜ํ–‰ ์ •ํ™•๋„๋ฅผ ํ‰๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.</span>"
233
  },
234
  "L2": {
235
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L2 ยท ๋„๊ตฌ ์„ ํƒ</span>",
236
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>์š”๊ตฌ ์‚ฌํ•ญ์— ๋งž๋Š” ๋„๊ตฌ๋ฅผ ๊ณ ๋ฅด๊ณ  ์ ์ ˆํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ํ˜ธ์ถœํ•˜๋Š” ๋Šฅ๋ ฅ์„ ์ธก์ •ํ•ฉ๋‹ˆ๋‹ค.</span>"
237
  },
238
  "L3": {
239
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L3 ยท ๋„๊ตฌ ์ˆœ์ฐจ ์ถ”๋ก </span>",
240
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>๋ณต์ˆ˜ ๋‹จ๊ณ„์˜ ์ˆœ์ฐจ์  reasoning์„ ํ†ตํ•ด ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋Š” ๊ณผ์ •์„ ๊ฒ€์ฆํ•ฉ๋‹ˆ๋‹ค.</span>"
241
  },
242
  "L4": {
243
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L4 ยท ๋„๊ตฌ ๋ณ‘๋ ฌ ์ถ”๋ก </span>",
244
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>์—ฌ๋Ÿฌ ์†Œ์Šค์˜ ์ •๋ณด๋ฅผ ๋ณ‘๋ ฌ์ ์œผ๋กœ ํ†ตํ•ฉํ•˜๊ณ  ์š”์•ฝํ•˜๋Š” ๋Šฅ๋ ฅ์„ ํ‰๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.</span>"
245
  },
246
  "L5": {
247
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L5 ยท ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ์™€ ๊ฐ•๊ฑด์„ฑ</span>",
248
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜๋‚˜ ์‹คํŒจ ์ƒํ™ฉ์— ๋Œ€ํ•œ ์ธ์ง€์™€ ๋Œ€์‘ ์ „๋žต์„ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.</span>"
249
  },
250
  "L6": {
251
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L6 ยท ํšจ์œจ์ ์ธ ๋„๊ตฌ ํ™œ์šฉ</span>",
252
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>์ตœ์†Œํ•œ์˜ ํ˜ธ์ถœ๊ณผ ๋น„์šฉ์œผ๋กœ ๋ชฉํ‘œ๋ฅผ ๋‹ฌ์„ฑํ•˜๋Š” ์šด์˜ ํšจ๏ฟฝ๏ฟฝ๏ฟฝ์„ ์‚ดํŽด๋ด…๋‹ˆ๋‹ค.</span>"
253
  },
254
  "L7": {
255
- "title": "<span style='color: white; font-family: \"Gowun Dodum\", sans-serif !important;'>L7 ยท ์žฅ๊ธฐ ์ปจํ…์ŠคํŠธ ๊ธฐ์–ต</span>",
256
- "description": "<span style='color: white; font-family: \"Nanum Gothic\", sans-serif !important;'>์žฅ๊ธฐ ๋Œ€ํ™” ๋งฅ๋ฝ์„ ์œ ์ง€ํ•˜๊ณ  ์ ์ ˆํžˆ ํ™œ์šฉํ•˜๋Š” ๋Šฅ๋ ฅ์„ ์ง‘์ค‘์ ์œผ๋กœ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.</span>"
257
  }
258
  }
259
  default_level = "ALL"
@@ -291,7 +291,7 @@ def create_leaderboard_v2_tab():
291
  border-collapse: collapse;
292
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
293
  background: var(--bg-card);
294
- color: white;
295
  }
296
 
297
  .v2-styled-table thead {
@@ -305,7 +305,7 @@ def create_leaderboard_v2_tab():
305
  padding: 14px 12px;
306
  text-align: left;
307
  font-weight: 600;
308
- color: white;
309
  border-bottom: 2px solid var(--accent-primary);
310
  font-size: 13px;
311
  text-transform: uppercase;
@@ -319,7 +319,7 @@ def create_leaderboard_v2_tab():
319
  .v2-styled-table td {
320
  padding: 12px;
321
  border-bottom: 1px solid var(--border-subtle);
322
- color: white;
323
  transition: all 0.2s ease;
324
  }
325
 
@@ -339,30 +339,30 @@ def create_leaderboard_v2_tab():
339
 
340
  .model-name {
341
  font-weight: 500;
342
- color: white;
343
  transition: color 0.2s ease;
344
  }
345
 
346
  /* Keep model name color consistent on hover to emphasize row highlight */
347
  .v2-styled-table tr:hover .model-name {
348
- color: white;
349
  }
350
 
351
  .numeric-cell {
352
  font-family: 'Geist Mono', monospace;
353
  font-size: 13px;
354
  text-align: center;
355
- color: white;
356
  }
357
 
358
  .highlight-header {
359
  background: rgba(255, 210, 30, 0.14);
360
- color: white;
361
  }
362
 
363
  .highlight-cell {
364
  background: rgba(255, 210, 30, 0.08);
365
- color: white;
366
  font-weight: 600;
367
  }
368
  </style>
@@ -460,8 +460,8 @@ def create_leaderboard_v2_tab():
460
  return f"""
461
  <div class="domain-selector-container leaderboard-intro">
462
  <div class="domain-header">
463
- <h2 class="domain-title" style="color: white;">Agent Leaderboard ยท {level_title}</h2>
464
- <p class="domain-subtitle" style="color: white;">{level_description}</p>
465
  </div>
466
  <div class="dataframe-container">
467
  """
@@ -511,6 +511,14 @@ def create_leaderboard_v2_tab():
511
  # Load initial data
512
  initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
513
  initial_df = load_leaderboard_data() # Load raw data for model selector
 
 
 
 
 
 
 
 
514
  initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
515
  initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
516
  initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
@@ -743,7 +751,7 @@ def create_leaderboard_v2_tab():
743
  display: inline-block !important;
744
  padding: 14px 28px !important;
745
  background: #ffd21e !important;
746
- color: #FFFFFF !important;
747
  text-decoration: none !important;
748
  border-radius: 16px !important;
749
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
@@ -777,7 +785,7 @@ def create_leaderboard_v2_tab():
777
  transform: translateY(-3px) !important;
778
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
779
  background: #ffd21e !important;
780
- color: #FFFFFF !important;
781
  text-decoration: none !important;
782
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
783
  }
@@ -792,25 +800,35 @@ def create_leaderboard_v2_tab():
792
  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
793
  }
794
 
795
- #hero-banner {
796
- width: 100vw !important;
797
- margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%) !important;
 
798
  border-radius: 0 !important;
799
  overflow: hidden !important;
800
  box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
801
- position: relative !important;
802
- left: 50% !important;
803
- right: 50% !important;
804
- margin-left: -50vw !important;
805
- margin-right: -50vw !important;
806
- max-width: none !important;
807
  }
808
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  #hero-banner img {
810
- width: 100%;
811
- height: auto;
812
- display: block;
813
- object-fit: cover;
814
  }
815
 
816
  .hero-title {
@@ -821,13 +839,13 @@ def create_leaderboard_v2_tab():
821
  -webkit-background-clip: text;
822
  -webkit-text-fill-color: transparent;
823
  margin-bottom: 1rem;
824
- font-family: 'Do Hyeon', sans-serif !important;
825
  }
826
 
827
  .hero-subtitle {
828
  color: var(--text-secondary);
829
  font-size: 3rem;
830
- font-family: 'Do Hyeon', sans-serif !important;
831
  margin-top: 0;
832
  }
833
 
@@ -911,7 +929,7 @@ def create_leaderboard_v2_tab():
911
  color: var(--text-primary);
912
  margin-bottom: 12px;
913
  text-align: center !important;
914
- font-family: 'Gowun Dodum', sans-serif !important;
915
  }
916
 
917
  .section-lead, .section-subtitle {
@@ -976,19 +994,19 @@ def create_leaderboard_v2_tab():
976
  position: relative;
977
  font-size: 1.2rem !important;
978
  font-weight: 700;
979
- color: white !important;
980
  font-family: 'Nanum Gothic', sans-serif !important;
981
  }
982
 
983
  /* ์ถ”๊ฐ€์ ์ธ ๊ตฌ์ฒด์  ์„ ํƒ์ž */
984
  .phase-card .phase-chart span {
985
- color: #FFFFFF !important;
986
  text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8) !important;
987
  font-family: 'Nanum Gothic', sans-serif !important;
988
  }
989
 
990
  .phase-grid .phase-chart span {
991
- color: #FFFFFF !important;
992
  z-index: 10 !important;
993
  font-family: 'Nanum Gothic', sans-serif !important;
994
  }
@@ -1110,6 +1128,7 @@ def create_leaderboard_v2_tab():
1110
  </style>
1111
  """)
1112
 
 
1113
  gr.Image(
1114
  value="banner_wide.png",
1115
  show_label=False,
@@ -1117,6 +1136,7 @@ def create_leaderboard_v2_tab():
1117
  type="filepath",
1118
  elem_id="hero-banner"
1119
  )
 
1120
 
1121
  gr.HTML("""
1122
  <div style="text-align: center; padding: 20px 0;">
@@ -1129,21 +1149,21 @@ def create_leaderboard_v2_tab():
1129
  gr.HTML("""
1130
  <div class="hero-actions">
1131
  <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1132
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1133
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1134
  <line x1="8" y1="12" x2="16" y2="12"/>
1135
  </svg>
1136
  <span>๋ธ”๋กœ๊ทธ</span>
1137
  </a>
1138
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1139
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1140
  <path d="M9 19c-5 1.5-5-2.5-7-3"/>
1141
  <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
1142
  </svg>
1143
  <span>GitHub</span>
1144
  </a>
1145
  <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1146
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1147
  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
1148
  <polyline points="7 10 12 15 17 10"/>
1149
  <line x1="12" y1="15" x2="12" y2="3"/>
@@ -1151,7 +1171,7 @@ def create_leaderboard_v2_tab():
1151
  <span>๋ฐ์ดํ„ฐ์…‹</span>
1152
  </a>
1153
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1154
- <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1155
  <path d="M3 3v18h18"/>
1156
  <path d="M7 17v-6"/>
1157
  <path d="M12 17V7"/>
@@ -1166,31 +1186,31 @@ def create_leaderboard_v2_tab():
1166
  gr.HTML("""
1167
  <div class="dashboard-section">
1168
  <div class="section-header">
1169
- <h2 class="section-title" style="font-family: 'Gowun Dodum', sans-serif; font-size: 2.5rem;">๋‹จ๊ณ„๋ณ„ ํƒœ์Šคํฌ ์„ค๊ณ„</h2>
1170
  </div>
1171
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">๋‹จ์ˆœ ๋„๊ตฌ ํ˜ธ์ถœ๋ถ€ํ„ฐ ์žฅ๊ธฐ์  ๋งฅ๋ฝ ๋Šฅ๋ ฅ, ๊ฐ•๊ฑด์„ฑ ์ฒ˜๋ฆฌ ๋Šฅ๋ ฅ๊นŒ์ง€ ์—์ด์ „ํŠธ์˜ ๋Šฅ๋ ฅ์„ 7๋‹จ๊ณ„๋กœ ์ž…์ฒด์ ์œผ๋กœ ๋ถ„์„ํ•˜์˜€์Šต๋‹ˆ๋‹ค.</p>
1172
  <div class="phase-grid">
1173
  <div class="phase-card">
1174
  <h3>๋‹จ์ผ ํ„ด</h3>
1175
  <div class="phase-chart" style="--progress:80%;">
1176
- <span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
1177
  </div>
1178
  <ul class="phase-list">
1179
- <li style="color: white;">L1: ๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ</li>
1180
- <li style="color: white;">L2: ๋„๊ตฌ ์„ ํƒ</li>
1181
- <li style="color: white;">L3: ๋„๊ตฌ ์ˆœ์ฐจ ์ถ”๋ก </li>
1182
- <li style="color: white;">L4: ๋„๊ตฌ ๋ณ‘๋ ฌ ์ถ”๋ก </li>
1183
- <li style="color: white;">L5: ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ์™€ ๊ฐ•๊ฑด์„ฑ</li>
1184
  </ul>
1185
  </div>
1186
  <div class="phase-card">
1187
  <h3>๋‹ค์ค‘ ํ„ด</h3>
1188
  <div class="phase-chart" style="--progress:20%;">
1189
- <span style="color: #FFFFFF !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
1190
  </div>
1191
  <ul class="phase-list">
1192
- <li style="color: white;">L6: ํšจ์œจ์ ์ธ ๋„๊ตฌ ํ™œ์šฉ</li>
1193
- <li style="color: white;">L7: ์žฅ๊ธฐ ์ปจํ…์ŠคํŠธ ๊ธฐ์–ต</li>
1194
  </ul>
1195
  </div>
1196
  </div>
@@ -1204,7 +1224,7 @@ def create_leaderboard_v2_tab():
1204
  <h2 class="section-title" style="font-size: 2.0rem;">18๊ฐ€์ง€ ํ•œ๊ตญํ˜• API ์‚ฌ์šฉ ๋ฐ ์‹ค์ƒํ™œ ํ™˜๊ฒฝ์— ํŠนํ™”๋œ ๊ณ ํ’ˆ์งˆ ์‹œ๋‚˜๋ฆฌ์˜ค ๊ตฌ์„ฑ</h2>
1205
  </div>
1206
  <div class="scenario-body">
1207
- <p>๋„ค์ด๋ฒ„, ์ง€๋„, ์นด์นด์˜ค, ์›น์‚ฌ์ดํŠธ ๋“ฑ ํ•œ๊ตญ ์‹ค์‚ฌ์šฉ ํ™˜๊ฒฝ ๊ธฐ๋ฐ˜์˜ API๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ<br> ๊ตญ๋‚ด ์‚ฌ์šฉ์ž์˜ ์ผ์ƒ๊ณผ ๋ฐ€์ ‘ํ•œ '์•ฝ์† ์˜ˆ์•ฝ', '๋ธ”๋กœ๊ทธ ํ›„๊ธฐ ๊ฒ€์ƒ‰'๊ณผ ๊ฐ™์€ ํ˜„์‹ค์ ์ธ ๋ฌธ์ œ ํ•ด๊ฒฐ ์‹œ๋‚˜๋ฆฌ์˜ค๋ฅผ ๊ตฌํ˜„ํ–ˆ์Šต๋‹ˆ๋‹ค.</p>
1208
  </div>
1209
 
1210
  </div>
@@ -1357,7 +1377,7 @@ def create_leaderboard_v2_tab():
1357
  filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
1358
  letter-spacing: 0.02em;
1359
  animation: title-shimmer 1.25s ease-in-out infinite;
1360
- font-family: 'Gowun Dodum', sans-serif !important;
1361
  }
1362
 
1363
  @keyframes title-shimmer {
@@ -1649,7 +1669,7 @@ def create_leaderboard_v2_tab():
1649
  border: 1px solid #333333 !important;
1650
  border-radius: 999px !important;
1651
  padding: 12px 24px !important;
1652
- color: #ffffff !important;
1653
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1654
  font-weight: 600 !important;
1655
  font-size: 1rem !important;
@@ -1680,7 +1700,7 @@ def create_leaderboard_v2_tab():
1680
  background: #000000 !important;
1681
  border: 1px solid #333333 !important;
1682
  border-radius: 999px !important;
1683
- color: #ffffff !important;
1684
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1685
  font-weight: 600 !important;
1686
  font-size: 0.95rem !important;
@@ -1735,7 +1755,7 @@ def create_leaderboard_v2_tab():
1735
  background: #ffd21e !important;
1736
  border: 1px solid rgba(255, 210, 30, 0.6) !important;
1737
  border-radius: 999px !important;
1738
- color: #FFFFFF !important;
1739
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1740
  font-weight: 600 !important;
1741
  font-size: 0.95rem !important;
@@ -1782,7 +1802,7 @@ def create_leaderboard_v2_tab():
1782
  font-size: 1.5rem;
1783
  margin-bottom: 4px;
1784
  display: block;
1785
- filter: drop-shadow(0 0 10px currentColor);
1786
  }
1787
 
1788
  .domain-name {
@@ -1797,7 +1817,7 @@ def create_leaderboard_v2_tab():
1797
  top: 8px;
1798
  right: 8px;
1799
  background: var(--accent-primary);
1800
- color: white;
1801
  font-size: 0.75rem;
1802
  padding: 2px 8px;
1803
  border-radius: 12px;
@@ -2045,7 +2065,7 @@ def create_leaderboard_v2_tab():
2045
  .inline-radio label[aria-checked="true"] {
2046
  background: rgba(255, 210, 30, 0.2) !important;
2047
  border-color: var(--accent-primary) !important;
2048
- color: white !important;
2049
  font-weight: 600 !important;
2050
  }
2051
  </style>
@@ -2058,7 +2078,7 @@ def create_leaderboard_v2_tab():
2058
  leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
2059
 
2060
  # Integrated controls within leaderboard section - stacked vertically
2061
- gr.HTML("<p style='color: white; margin: 5px 0 5px 0; font-size: 1.2rem;'>ํƒœ์Šคํฌ ๋ ˆ๋ฒจ ์„ ํƒ</p>")
2062
  domain_filter = gr.Radio(
2063
  choices=level_options,
2064
  value=default_level,
@@ -2068,10 +2088,10 @@ def create_leaderboard_v2_tab():
2068
  elem_classes=["domain-radio", "inline-radio"]
2069
  )
2070
 
2071
- gr.HTML("<p style='color: white; margin: 5px 0 0px 0; font-size: 1.2rem;'>๐Ÿ” ํ•„ํ„ฐ ๋ฐ ์ •๋ ฌ</p>")
2072
  with gr.Row():
2073
  with gr.Column(scale=1):
2074
- gr.HTML("<span style='color: white; font-size: 1.2rem; margin-bottom: 5px; display: block;'>๋ชจ๋ธ ์ ‘๊ทผ</span>")
2075
  model_type_filter = gr.Radio(
2076
  choices=["All", "OSS", "API"],
2077
  value="All",
@@ -2080,7 +2100,7 @@ def create_leaderboard_v2_tab():
2080
  container=False
2081
  )
2082
  with gr.Column(scale=1):
2083
- gr.HTML("<span style='color: white; font-size: 1.2rem; margin-bottom: 5px; display: block;'>์ •๋ ฌ ์ˆœ์„œ</span>")
2084
  sort_order = gr.Radio(
2085
  choices=["Descending", "Ascending"],
2086
  value="Descending",
@@ -2095,12 +2115,12 @@ def create_leaderboard_v2_tab():
2095
  gr.HTML("""
2096
  <div class="domain-selector-container domain-performance-container">
2097
  <div class="domain-header">
2098
- <h2 class="domain-title" style="color: white;">ํ•ต์‹ฌ ์—ญ๋Ÿ‰ ๋ ˆ์ด๋”</h2>
2099
- <p class="domain-subtitle" style="color: white;">6๊ฐ€์ง€ ํ•„์ˆ˜ ํ•ต์‹ฌ ์š”์†Œ(์„ฑ๊ณต, ์‹คํ–‰, ์ถ”๋ก , ๊ฐ•๊ฑด์„ฑ, ํšจ์œจ์„ฑ, ํ˜ธ์ถœ ์œ ํšจ์„ฑ)๋ฅผ ์ถ”์ ํ•ฉ๋‹ˆ๋‹ค.</p>
2100
  </div>
2101
  """)
2102
 
2103
- gr.HTML("<p style='color: white; margin: 10px 0 0 0; font-size: 1.2rem; font-family: \"Nanum Gothic\", sans-serif;'>๋น„๊ตํ•  ๋ชจ๋ธ์„ ์„ ํƒํ•˜์„ธ์š”. ์ตœ๋Œ€ 5๊ฐœ๊นŒ์ง€ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.</p>")
2104
  # gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>๋ชจ๋ธ์€ ์ตœ๋Œ€ 5๊ฐœ๊นŒ์ง€ ์„ ํƒ ๊ฐ€๋Šฅ ํ•ฉ๋‹ˆ๋‹ค.</p>")
2105
  model_selector = gr.Dropdown(
2106
  choices=initial_df['Model'].tolist()[:10],
@@ -2278,8 +2298,8 @@ def create_leaderboard_v2_tab():
2278
  gr.HTML("""
2279
  <div class="domain-selector-container performance-card-container">
2280
  <div class="domain-header">
2281
- <h2 class="domain-title" style="color: white;">๋ชจ๋ธ ์„ฑ๋Šฅ ์นด๋“œ</h2>
2282
- <p class="domain-subtitle" style="color: white;">
2283
  ๋ชจ๋ธ์˜ ์„ฑ๋Šฅ ์ŠคํŽ™ํŠธ๋Ÿผ์„ 6๋Œ€ ํ•ต์‹ฌ ์ง€ํ‘œ์™€ L1~L7 ๋‹จ๊ณ„๋ณ„ ์ข…ํ•ฉ ์„ฑ๊ณต๋ฅ (SR)๋กœ ์‹œ๊ฐํ™”ํ•œ ์ •๋ฐ€ ๋ถ„์„ ์นด๋“œ๋ฅผ ํ™•์ธํ•ด๋ณด์„ธ์š”.
2284
  </p>
2285
  <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
@@ -2292,7 +2312,7 @@ def create_leaderboard_v2_tab():
2292
 
2293
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
2294
  gr.HTML("""
2295
- <p class="domain-subtitle" style="color: white;">๋ถ„์„ ์นด๋“œ๋ฅผ ์ƒ์„ฑํ•  ๋ชจ๋ธ์„ ์„ ํƒํ•˜์„ธ์š”.</p>
2296
 
2297
  """)
2298
  card_model_selector = gr.Dropdown(
@@ -2329,15 +2349,11 @@ def create_leaderboard_v2_tab():
2329
  gr.HTML("""
2330
  <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2331
  <div class="domain-header">
2332
- <h2 class="domain-title" style="color: white;">๋ ˆ๋ฒจ๋ณ„ ์ƒ์„ธ ์ง€ํ‘œ</h2>
2333
- <p class="domain-subtitle" style="color: white;">๊ฐ Ko-AgentBench ๋‹จ๊ณ„๋ณ„ ๊ณ ์œ  ํ‰๊ฐ€ ์ง€ํ‘œ๋ฅผ ํ†ตํ•ด ๋ชจ๋ธ ์ ์ˆ˜๋ฅผ ๋น„๊ตํ•˜๊ณ  ๋” ์ž์„ธํžˆ ์‚ดํŽด๋ณด์„ธ์š”.</p>
2334
  </div>
2335
  """)
2336
 
2337
- gr.HTML("""
2338
- <p style="color: white; text-align: center; margin: 0 0 20px 0; font-size: 1.2rem; font-family: \'Nanum Gothic\', sans-serif;">ํƒœ์Šคํฌ ๋ ˆ๋ฒจ๊ณผ ๋ชจ๋ธ(์ตœ๋Œ€ 5๊ฐœ)์„ ์„ ํƒํ•˜์—ฌ ์ƒ์„ธ ์ง€ํ‘œ๋ฅผ ํƒ์ƒ‰ํ•˜์„ธ์š”.</p>
2339
- """)
2340
-
2341
  with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2342
  level_metric_selector = gr.Dropdown(
2343
  choices=level_ids,
@@ -2373,8 +2389,8 @@ def create_leaderboard_v2_tab():
2373
  # gr.HTML("""
2374
  # <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2375
  # <div class="domain-header">
2376
- # <h2 class="domain-title" style="color: white;">์ข…ํ•ฉ ์„ฑ๋Šฅ ํžˆํŠธ๋งต</h2>
2377
- # <p class="domain-subtitle" style="color: white;">๊ฐ ๋ชจ๋ธ์˜ L1~L7 Ko-AgentBench SR(์„ฑ๊ณต๋ฅ ) ์ ์ˆ˜๋ฅผ ํ•œ๋ˆˆ์— ๋ณด์„ธ์š”.</p>
2378
  # </div>
2379
  # <div class="chart-container heatmap-chart-container">
2380
  # """)
@@ -2822,7 +2838,7 @@ def create_leaderboard_v2_tab():
2822
  border: 1px solid #333333 !important;
2823
  border-radius: 999px !important;
2824
  padding: 12px 20px !important;
2825
- color: #ffffff !important;
2826
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
2827
  font-weight: 600 !important;
2828
  font-size: 0.95rem !important;
@@ -2851,7 +2867,7 @@ def create_leaderboard_v2_tab():
2851
  .level-model-dropdown button {
2852
  background: #000000 !important;
2853
  border: 1px solid #333333 !important;
2854
- color: #ffffff !important;
2855
  }
2856
 
2857
  .radar-placeholder {
@@ -3029,20 +3045,20 @@ def create_leaderboard_v2_tab():
3029
  h2.section-title,
3030
  .dashboard-section .section-title,
3031
  .section-header .section-title {
3032
- font-family: "Gowun Dodum", sans-serif !important;
3033
  }
3034
 
3035
  .domain-title,
3036
  h2.domain-title,
3037
  .domain-header .domain-title {
3038
- font-family: "Gowun Dodum", sans-serif !important;
3039
  }
3040
 
3041
  .hero-title,
3042
  .hero-subtitle,
3043
  h1.hero-title,
3044
  p.hero-subtitle {
3045
- font-family: "Do Hyeon", sans-serif !important;
3046
  font-size: 2rem; !important;
3047
  }
3048
 
@@ -3236,8 +3252,8 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3236
  palette = [
3237
  {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
3238
  {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
3239
- {'fill': 'rgba(249, 112, 185, 0.22)', 'line': '#F970B9'},
3240
- {'fill': 'rgba(139, 92, 246, 0.20)', 'line': '#8B5CF6'},
3241
  {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
3242
  ]
3243
 
@@ -3362,16 +3378,7 @@ def create_domain_radar_chart(df, selected_models=None, max_models=5):
3362
  width=900,
3363
  margin=dict(t=30, b=50, l=10, r=10),
3364
  autosize=True,
3365
- annotations=[
3366
- dict(
3367
- text="Galileo Agent Leaderboard",
3368
- xref="paper", yref="paper",
3369
- x=0.98, y=0.02,
3370
- xanchor='right', yanchor='bottom',
3371
- font=dict(size=10, color='#64748B'),
3372
- showarrow=False
3373
- )
3374
- ]
3375
  )
3376
 
3377
  return fig
@@ -3630,8 +3637,8 @@ def create_level_metric_chart(df, level, selected_models=None, max_models=5):
3630
  model_palette = [
3631
  '#ffd21e',
3632
  '#FF8A3C',
3633
- '#F970B9',
3634
- '#8B5CF6',
3635
  '#F8FAFC',
3636
  '#38BDF8',
3637
  ]
 
224
  # Level metadata for the 7-stage task framework
225
  level_details = {
226
  "ALL": {
227
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>ALL ยท ์ „์ฒด ํƒœ์Šคํฌ</span>",
228
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>7๊ฐœ์˜ ํƒœ์Šคํฌ ์ „๋ฐ˜์˜ ํ‰๊ท  ์„ฑ๋Šฅ์„ ํ•œ๋ˆˆ์— ์‚ดํŽด๋ณด๊ณ  ๊ฐ ๋ ˆ๋ฒจ ๋น„๊ต๋ฅผ ์œ„ํ•œ ๊ธฐ์ค€์ ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.</span>"
229
  },
230
  "L1": {
231
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L1 ยท ๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ</span>",
232
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ ๋Šฅ๋ ฅ๊ณผ ๊ธฐ๋ณธ์ ์ธ ๋ช…๋ น ์ˆ˜ํ–‰ ์ •ํ™•๋„๋ฅผ ํ‰๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.</span>"
233
  },
234
  "L2": {
235
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L2 ยท ๋„๊ตฌ ์„ ํƒ</span>",
236
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์š”๊ตฌ ์‚ฌํ•ญ์— ๋งž๋Š” ๋„๊ตฌ๋ฅผ ๊ณ ๋ฅด๊ณ  ์ ์ ˆํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ํ˜ธ์ถœํ•˜๋Š” ๋Šฅ๋ ฅ์„ ์ธก์ •ํ•ฉ๋‹ˆ๋‹ค.</span>"
237
  },
238
  "L3": {
239
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L3 ยท ๋„๊ตฌ ์ˆœ์ฐจ ์ถ”๋ก </span>",
240
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>๋ณต์ˆ˜ ๋‹จ๊ณ„์˜ ์ˆœ์ฐจ์  reasoning์„ ํ†ตํ•ด ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋Š” ๊ณผ์ •์„ ๊ฒ€์ฆํ•ฉ๋‹ˆ๋‹ค.</span>"
241
  },
242
  "L4": {
243
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L4 ยท ๋„๊ตฌ ๋ณ‘๋ ฌ ์ถ”๋ก </span>",
244
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์—ฌ๋Ÿฌ ์†Œ์Šค์˜ ์ •๋ณด๋ฅผ ๋ณ‘๋ ฌ์ ์œผ๋กœ ํ†ตํ•ฉํ•˜๊ณ  ์š”์•ฝํ•˜๋Š” ๋Šฅ๋ ฅ์„ ํ‰๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.</span>"
245
  },
246
  "L5": {
247
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L5 ยท ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ์™€ ๊ฐ•๊ฑด์„ฑ</span>",
248
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜๋‚˜ ์‹คํŒจ ์ƒํ™ฉ์— ๋Œ€ํ•œ ์ธ์ง€์™€ ๋Œ€์‘ ์ „๋žต์„ ํ™•์ธํ•ฉ๋‹ˆ๋‹ค.</span>"
249
  },
250
  "L6": {
251
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L6 ยท ํšจ์œจ์ ์ธ ๋„๊ตฌ ํ™œ์šฉ</span>",
252
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์ตœ์†Œํ•œ์˜ ํ˜ธ์ถœ๊ณผ ๋น„์šฉ์œผ๋กœ ๋ชฉํ‘œ๋ฅผ ๋‹ฌ์„ฑํ•˜๋Š” ์šด์˜ ํšจ์œจ์„ ์‚ดํŽด๋ด…๋‹ˆ๋‹ค.</span>"
253
  },
254
  "L7": {
255
+ "title": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>L7 ยท ์žฅ๊ธฐ ์ปจํ…์ŠคํŠธ ๊ธฐ์–ต</span>",
256
+ "description": "<span style='color: var(--text-primary); font-family: \"Nanum Gothic\", sans-serif !important;'>์žฅ๊ธฐ ๋Œ€ํ™” ๋งฅ๋ฝ์„ ์œ ์ง€ํ•˜๊ณ  ์ ์ ˆํžˆ ํ™œ์šฉํ•˜๋Š” ๋Šฅ๋ ฅ์„ ์ง‘์ค‘์ ์œผ๋กœ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค.</span>"
257
  }
258
  }
259
  default_level = "ALL"
 
291
  border-collapse: collapse;
292
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
293
  background: var(--bg-card);
294
+ color: var(--text-primary);
295
  }
296
 
297
  .v2-styled-table thead {
 
305
  padding: 14px 12px;
306
  text-align: left;
307
  font-weight: 600;
308
+ color: var(--text-primary);
309
  border-bottom: 2px solid var(--accent-primary);
310
  font-size: 13px;
311
  text-transform: uppercase;
 
319
  .v2-styled-table td {
320
  padding: 12px;
321
  border-bottom: 1px solid var(--border-subtle);
322
+ color: var(--text-primary);
323
  transition: all 0.2s ease;
324
  }
325
 
 
339
 
340
  .model-name {
341
  font-weight: 500;
342
+ color: var(--text-primary);
343
  transition: color 0.2s ease;
344
  }
345
 
346
  /* Keep model name color consistent on hover to emphasize row highlight */
347
  .v2-styled-table tr:hover .model-name {
348
+ color: var(--text-primary);
349
  }
350
 
351
  .numeric-cell {
352
  font-family: 'Geist Mono', monospace;
353
  font-size: 13px;
354
  text-align: center;
355
+ color: var(--text-primary);
356
  }
357
 
358
  .highlight-header {
359
  background: rgba(255, 210, 30, 0.14);
360
+ color: var(--text-primary);
361
  }
362
 
363
  .highlight-cell {
364
  background: rgba(255, 210, 30, 0.08);
365
+ color: var(--text-primary);
366
  font-weight: 600;
367
  }
368
  </style>
 
460
  return f"""
461
  <div class="domain-selector-container leaderboard-intro">
462
  <div class="domain-header">
463
+ <h2 class="domain-title" style="color: var(--text-primary);">Agent Leaderboard ยท {level_title}</h2>
464
+ <p class="domain-subtitle" style="color: var(--text-primary);">{level_description}</p>
465
  </div>
466
  <div class="dataframe-container">
467
  """
 
511
  # Load initial data
512
  initial_table = filter_and_sort_data(default_level, "All", "Overall Success", "Descending")
513
  initial_df = load_leaderboard_data() # Load raw data for model selector
514
+ if not initial_df.empty:
515
+ overall_success_numeric = pd.to_numeric(initial_df.get('Overall Success'), errors='coerce')
516
+ if overall_success_numeric.notna().any():
517
+ initial_df = initial_df.assign(**{'Overall Success': overall_success_numeric}).sort_values(
518
+ 'Overall Success', ascending=False, na_position='last'
519
+ )
520
+ else:
521
+ initial_df = initial_df.sort_values('Model')
522
  initial_selected_models = initial_df['Model'].tolist()[:5] if len(initial_df) > 0 else []
523
  initial_heatmap_models = initial_df['Model'].tolist()[:12] if len(initial_df) > 0 else []
524
  initial_heatmap = create_performance_heatmap(initial_df, initial_heatmap_models)
 
751
  display: inline-block !important;
752
  padding: 14px 28px !important;
753
  background: #ffd21e !important;
754
+ color: var(--text-primary) !important;
755
  text-decoration: none !important;
756
  border-radius: 16px !important;
757
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
 
785
  transform: translateY(-3px) !important;
786
  box-shadow: 0 12px 32px rgba(255, 210, 30, 0.5), 0 8px 16px rgba(0, 0, 0, 0.4) !important;
787
  background: #ffd21e !important;
788
+ color: var(--text-primary) !important;
789
  text-decoration: none !important;
790
  text-shadow: 0 2px 6px rgba(0, 0, 0, 0.45) !important;
791
  }
 
800
  filter: drop-shadow(0 0 8px rgba(255, 255, 255, 0.3));
801
  }
802
 
803
+ .hero-banner-wrapper {
804
+ position: relative;
805
+ width: 100vw;
806
+ margin: 0 calc(-50vw + 50%) 20px calc(-50vw + 50%);
807
  border-radius: 0 !important;
808
  overflow: hidden !important;
809
  box-shadow: 0 12px 32px rgba(0, 0, 0, 0.25) !important;
 
 
 
 
 
 
810
  }
811
+
812
+ .hero-banner-wrapper::before {
813
+ content: "";
814
+ position: absolute;
815
+ inset: 0;
816
+ background: #01091A;
817
+ z-index: 0;
818
+ }
819
+
820
+ #hero-banner {
821
+ position: relative;
822
+ width: 100% !important;
823
+ height: auto !important;
824
+ z-index: 1;
825
+ }
826
+
827
  #hero-banner img {
828
+ width: 100% !important;
829
+ height: auto !important;
830
+ display: block !important;
831
+ object-fit: cover !important;
832
  }
833
 
834
  .hero-title {
 
839
  -webkit-background-clip: text;
840
  -webkit-text-fill-color: transparent;
841
  margin-bottom: 1rem;
842
+ font-family: 'Nanum Gothic', sans-serif !important;
843
  }
844
 
845
  .hero-subtitle {
846
  color: var(--text-secondary);
847
  font-size: 3rem;
848
+ font-family: 'Nanum Gothic', sans-serif !important;
849
  margin-top: 0;
850
  }
851
 
 
929
  color: var(--text-primary);
930
  margin-bottom: 12px;
931
  text-align: center !important;
932
+ font-family: 'Nanum Gothic', sans-serif !important;
933
  }
934
 
935
  .section-lead, .section-subtitle {
 
994
  position: relative;
995
  font-size: 1.2rem !important;
996
  font-weight: 700;
997
+ color: var(--text-primary) !important;
998
  font-family: 'Nanum Gothic', sans-serif !important;
999
  }
1000
 
1001
  /* ์ถ”๊ฐ€์ ์ธ ๊ตฌ์ฒด์  ์„ ํƒ์ž */
1002
  .phase-card .phase-chart span {
1003
+ color: var(--text-primary) !important;
1004
  text-shadow: 0 1px 2px rgba(0, 0, 0, 0.8) !important;
1005
  font-family: 'Nanum Gothic', sans-serif !important;
1006
  }
1007
 
1008
  .phase-grid .phase-chart span {
1009
+ color: var(--text-primary) !important;
1010
  z-index: 10 !important;
1011
  font-family: 'Nanum Gothic', sans-serif !important;
1012
  }
 
1128
  </style>
1129
  """)
1130
 
1131
+ gr.HTML("<div class='hero-banner-wrapper'>")
1132
  gr.Image(
1133
  value="banner_wide.png",
1134
  show_label=False,
 
1136
  type="filepath",
1137
  elem_id="hero-banner"
1138
  )
1139
+ gr.HTML("</div>")
1140
 
1141
  gr.HTML("""
1142
  <div style="text-align: center; padding: 20px 0;">
 
1149
  gr.HTML("""
1150
  <div class="hero-actions">
1151
  <a href="https://hugging-face-krew.github.io/" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1152
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1153
  <path d="M15 7h3a5 5 0 0 1 5 5 5 5 0 0 1-5 5h-3m-6 0H6a5 5 0 0 1-5-5 5 5 0 0 1 5-5h3"/>
1154
  <line x1="8" y1="12" x2="16" y2="12"/>
1155
  </svg>
1156
  <span>๋ธ”๋กœ๊ทธ</span>
1157
  </a>
1158
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1159
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1160
  <path d="M9 19c-5 1.5-5-2.5-7-3"/>
1161
  <path d="M20 21v-3.87a3.37 3.37 0 0 0-.94-2.61c3.14-.35 6.44-1.54 6.44-7A5.44 5.44 0 0 0 20 4.77 5.07 5.07 0 0 0 19.91 1S18.73.65 16 2.48a13.38 13.38 0 0 0-7 0C6.27.65 5.09 1 5.09 1A5.07 5.07 0 0 0 5 4.77a5.44 5.44 0 0 0-1.5 3.78c0 5.42 3.3 6.61 6.44 7A3.37 3.37 0 0 0 9 18.13V22"/>
1162
  </svg>
1163
  <span>GitHub</span>
1164
  </a>
1165
  <a href="https://huggingface.co/datasets/huggingface-KREW/Ko-AgentBench" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1166
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1167
  <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/>
1168
  <polyline points="7 10 12 15 17 10"/>
1169
  <line x1="12" y1="15" x2="12" y2="3"/>
 
1171
  <span>๋ฐ์ดํ„ฐ์…‹</span>
1172
  </a>
1173
  <a href="https://github.com/Hugging-Face-KREW/Ko-AgentBench/blob/main/evaluate_model_run.py#L55" target="_blank" rel="noopener noreferrer" class="hero-action-button">
1174
+ <svg viewBox="0 0 24 24" fill="none" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1175
  <path d="M3 3v18h18"/>
1176
  <path d="M7 17v-6"/>
1177
  <path d="M12 17V7"/>
 
1186
  gr.HTML("""
1187
  <div class="dashboard-section">
1188
  <div class="section-header">
1189
+ <h2 class="section-title" style="font-family: 'Nanum Gothic', sans-serif; font-size: 2.5rem;">๋‹จ๊ณ„๋ณ„ ํƒœ์Šคํฌ ์„ค๊ณ„</h2>
1190
  </div>
1191
  <p class="section-lead" style="text-align: center; margin: 0 auto 24px auto; max-width: 720px; line-height: 1.7; word-break: keep-all;">๋‹จ์ˆœ ๋„๊ตฌ ํ˜ธ์ถœ๋ถ€ํ„ฐ ์žฅ๊ธฐ์  ๋งฅ๋ฝ ๋Šฅ๋ ฅ, ๊ฐ•๊ฑด์„ฑ ์ฒ˜๋ฆฌ ๋Šฅ๋ ฅ๊นŒ์ง€ ์—์ด์ „ํŠธ์˜ ๋Šฅ๋ ฅ์„ 7๋‹จ๊ณ„๋กœ ์ž…์ฒด์ ์œผ๋กœ ๋ถ„์„ํ•˜์˜€์Šต๋‹ˆ๋‹ค.</p>
1192
  <div class="phase-grid">
1193
  <div class="phase-card">
1194
  <h3>๋‹จ์ผ ํ„ด</h3>
1195
  <div class="phase-chart" style="--progress:80%;">
1196
+ <span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">80%</span>
1197
  </div>
1198
  <ul class="phase-list">
1199
+ <li style="color: var(--text-primary);">L1: ๋‹จ์ผ ๋„๊ตฌ ํ˜ธ์ถœ</li>
1200
+ <li style="color: var(--text-primary);">L2: ๋„๊ตฌ ์„ ํƒ</li>
1201
+ <li style="color: var(--text-primary);">L3: ๋„๊ตฌ ์ˆœ์ฐจ ์ถ”๋ก </li>
1202
+ <li style="color: var(--text-primary);">L4: ๋„๊ตฌ ๋ณ‘๋ ฌ ์ถ”๋ก </li>
1203
+ <li style="color: var(--text-primary);">L5: ์˜ค๋ฅ˜ ์ฒ˜๋ฆฌ์™€ ๊ฐ•๊ฑด์„ฑ</li>
1204
  </ul>
1205
  </div>
1206
  <div class="phase-card">
1207
  <h3>๋‹ค์ค‘ ํ„ด</h3>
1208
  <div class="phase-chart" style="--progress:20%;">
1209
+ <span style="color: var(--text-primary) !important; text-shadow: 0 1px 2px rgba(0,0,0,0.8) !important; font-weight: 700 !important;">20%</span>
1210
  </div>
1211
  <ul class="phase-list">
1212
+ <li style="color: var(--text-primary);">L6: ํšจ์œจ์ ์ธ ๋„๊ตฌ ํ™œ์šฉ</li>
1213
+ <li style="color: var(--text-primary);">L7: ์žฅ๊ธฐ ์ปจํ…์ŠคํŠธ ๊ธฐ์–ต</li>
1214
  </ul>
1215
  </div>
1216
  </div>
 
1224
  <h2 class="section-title" style="font-size: 2.0rem;">18๊ฐ€์ง€ ํ•œ๊ตญํ˜• API ์‚ฌ์šฉ ๋ฐ ์‹ค์ƒํ™œ ํ™˜๊ฒฝ์— ํŠนํ™”๋œ ๊ณ ํ’ˆ์งˆ ์‹œ๋‚˜๋ฆฌ์˜ค ๊ตฌ์„ฑ</h2>
1225
  </div>
1226
  <div class="scenario-body">
1227
+ <p style="color: var(--text-primary);">๋„ค์ด๋ฒ„, ์นด์นด์˜ค ๋“ฑ ๊ตญ๋‚ด ์‹ค์‚ฌ์šฉ API๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ, '์•ฝ์† ์˜ˆ์•ฝ', '๋ธ”๋กœ๊ทธ ํ›„๊ธฐ ๊ฒ€์ƒ‰'์ฒ˜๋Ÿผ ์ผ์ƒ์— ์œ ์šฉํ•œ ํ˜„์‹ค์ ์ธ ๋ฌธ์ œ ํ•ด๊ฒฐ ์‹œ๋‚˜๋ฆฌ์˜ค๋ฅผ ๊ตฌํ˜„ํ–ˆ์Šต๋‹ˆ๋‹ค.</p>
1228
  </div>
1229
 
1230
  </div>
 
1377
  filter: drop-shadow(0 0 2px rgba(255, 210, 30, 0.06));
1378
  letter-spacing: 0.02em;
1379
  animation: title-shimmer 1.25s ease-in-out infinite;
1380
+ font-family: 'Nanum Gothic', sans-serif !important;
1381
  }
1382
 
1383
  @keyframes title-shimmer {
 
1669
  border: 1px solid #333333 !important;
1670
  border-radius: 999px !important;
1671
  padding: 12px 24px !important;
1672
+ color: var(--text-primary) !important;
1673
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1674
  font-weight: 600 !important;
1675
  font-size: 1rem !important;
 
1700
  background: #000000 !important;
1701
  border: 1px solid #333333 !important;
1702
  border-radius: 999px !important;
1703
+ color: var(--text-primary) !important;
1704
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1705
  font-weight: 600 !important;
1706
  font-size: 0.95rem !important;
 
1755
  background: #ffd21e !important;
1756
  border: 1px solid rgba(255, 210, 30, 0.6) !important;
1757
  border-radius: 999px !important;
1758
+ color: var(--text-primary) !important;
1759
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
1760
  font-weight: 600 !important;
1761
  font-size: 0.95rem !important;
 
1802
  font-size: 1.5rem;
1803
  margin-bottom: 4px;
1804
  display: block;
1805
+ filter: drop-shadow(0 0 10px white);
1806
  }
1807
 
1808
  .domain-name {
 
1817
  top: 8px;
1818
  right: 8px;
1819
  background: var(--accent-primary);
1820
+ color: var(--text-primary);
1821
  font-size: 0.75rem;
1822
  padding: 2px 8px;
1823
  border-radius: 12px;
 
2065
  .inline-radio label[aria-checked="true"] {
2066
  background: rgba(255, 210, 30, 0.2) !important;
2067
  border-color: var(--accent-primary) !important;
2068
+ color: var(--text-primary) !important;
2069
  font-weight: 600 !important;
2070
  }
2071
  </style>
 
2078
  leaderboard_title = gr.HTML(update_leaderboard_title(default_level))
2079
 
2080
  # Integrated controls within leaderboard section - stacked vertically
2081
+ gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 5px 0; font-size: 1.2rem;'>ํƒœ์Šคํฌ ๋ ˆ๋ฒจ ์„ ํƒ</p>")
2082
  domain_filter = gr.Radio(
2083
  choices=level_options,
2084
  value=default_level,
 
2088
  elem_classes=["domain-radio", "inline-radio"]
2089
  )
2090
 
2091
+ gr.HTML("<p style='color: var(--text-primary); margin: 5px 0 0px 0; font-size: 1.2rem;'>๐Ÿ” ํ•„ํ„ฐ ๋ฐ ์ •๋ ฌ</p>")
2092
  with gr.Row():
2093
  with gr.Column(scale=1):
2094
+ gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>๋ชจ๋ธ ์ ‘๊ทผ</span>")
2095
  model_type_filter = gr.Radio(
2096
  choices=["All", "OSS", "API"],
2097
  value="All",
 
2100
  container=False
2101
  )
2102
  with gr.Column(scale=1):
2103
+ gr.HTML("<span style='color: var(--text-primary); font-size: 1.2rem; margin-bottom: 5px; display: block;'>์ •๋ ฌ ์ˆœ์„œ</span>")
2104
  sort_order = gr.Radio(
2105
  choices=["Descending", "Ascending"],
2106
  value="Descending",
 
2115
  gr.HTML("""
2116
  <div class="domain-selector-container domain-performance-container">
2117
  <div class="domain-header">
2118
+ <h2 class="domain-title" style="color: var(--text-primary);">ํ•ต์‹ฌ ์—ญ๋Ÿ‰ ๋ ˆ์ด๋”</h2>
2119
+ <p class="domain-subtitle" style="color: var(--text-primary);">6๊ฐ€์ง€ ํ•„์ˆ˜ ํ•ต์‹ฌ ์š”์†Œ(์„ฑ๊ณต, ์‹คํ–‰, ์ถ”๋ก , ๊ฐ•๊ฑด์„ฑ, ํšจ์œจ์„ฑ, ํ˜ธ์ถœ ์œ ํšจ์„ฑ)๋ฅผ ์ถ”์ ํ•ฉ๋‹ˆ๋‹ค.</p>
2120
  </div>
2121
  """)
2122
 
2123
+ gr.HTML("<p style='color: var(--text-primary); margin: 10px 0 0 0; font-size: 1.2rem; font-family: \"Nanum Gothic\", sans-serif;'>๋น„๊ตํ•  ๋ชจ๋ธ์„ ์„ ํƒํ•˜์„ธ์š”. ์ตœ๋Œ€ 5๊ฐœ๊นŒ์ง€ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค.</p>")
2124
  # gr.HTML("<p style='color: #b0b0b0; margin: 0 0 10px 0; font-size: 0.9rem;'>๋ชจ๋ธ์€ ์ตœ๋Œ€ 5๊ฐœ๊นŒ์ง€ ์„ ํƒ ๊ฐ€๋Šฅ ํ•ฉ๋‹ˆ๋‹ค.</p>")
2125
  model_selector = gr.Dropdown(
2126
  choices=initial_df['Model'].tolist()[:10],
 
2298
  gr.HTML("""
2299
  <div class="domain-selector-container performance-card-container">
2300
  <div class="domain-header">
2301
+ <h2 class="domain-title" style="color: var(--text-primary);">๋ชจ๋ธ ์„ฑ๋Šฅ ์นด๋“œ</h2>
2302
+ <p class="domain-subtitle" style="color: var(--text-primary);">
2303
  ๋ชจ๋ธ์˜ ์„ฑ๋Šฅ ์ŠคํŽ™ํŠธ๋Ÿผ์„ 6๋Œ€ ํ•ต์‹ฌ ์ง€ํ‘œ์™€ L1~L7 ๋‹จ๊ณ„๋ณ„ ์ข…ํ•ฉ ์„ฑ๊ณต๋ฅ (SR)๋กœ ์‹œ๊ฐํ™”ํ•œ ์ •๋ฐ€ ๋ถ„์„ ์นด๋“œ๋ฅผ ํ™•์ธํ•ด๋ณด์„ธ์š”.
2304
  </p>
2305
  <p class="domain-note" style="color: #bdbdbd; font-size: 0.85em; margin-top: 4px;">
 
2312
 
2313
  with gr.Column(elem_classes=["domain-selector-container", "model-selector-container"], elem_id="model-selector-box"):
2314
  gr.HTML("""
2315
+ <p class="domain-subtitle" style="color: var(--text-primary);">๋ถ„์„ ์นด๋“œ๋ฅผ ์ƒ์„ฑํ•  ๋ชจ๋ธ์„ ์„ ํƒํ•˜์„ธ์š”.</p>
2316
 
2317
  """)
2318
  card_model_selector = gr.Dropdown(
 
2349
  gr.HTML("""
2350
  <div class="domain-selector-container domain-performance-container level-metrics-wrapper">
2351
  <div class="domain-header">
2352
+ <h2 class="domain-title" style="color: var(--text-primary);">๋ ˆ๋ฒจ๋ณ„ ์ƒ์„ธ ์ง€ํ‘œ</h2>
2353
+ <p class="domain-subtitle" style="color: var(--text-primary);">๊ฐ Ko-AgentBench ๋‹จ๊ณ„๋ณ„ ๊ณ ์œ  ํ‰๊ฐ€ ์ง€ํ‘œ๋ฅผ ํ†ตํ•ด ๋ชจ๋ธ ์ ์ˆ˜๋ฅผ ๋น„๊ตํ•˜๊ณ  ๋” ์ž์„ธํžˆ ์‚ดํŽด๋ณด์„ธ์š”.</p>
2354
  </div>
2355
  """)
2356
 
 
 
 
 
2357
  with gr.Column(elem_classes=["domain-selector-container", "level-selector-container"], elem_id="level-selector-box"):
2358
  level_metric_selector = gr.Dropdown(
2359
  choices=level_ids,
 
2389
  # gr.HTML("""
2390
  # <div class="domain-selector-container domain-performance-container heatmap-wrapper">
2391
  # <div class="domain-header">
2392
+ # <h2 class="domain-title" style="color: var(--text-primary);">์ข…ํ•ฉ ์„ฑ๋Šฅ ํžˆํŠธ๋งต</h2>
2393
+ # <p class="domain-subtitle" style="color: var(--text-primary);">๊ฐ ๋ชจ๋ธ์˜ L1~L7 Ko-AgentBench SR(์„ฑ๊ณต๋ฅ ) ์ ์ˆ˜๋ฅผ ํ•œ๋ˆˆ์— ๋ณด์„ธ์š”.</p>
2394
  # </div>
2395
  # <div class="chart-container heatmap-chart-container">
2396
  # """)
 
2838
  border: 1px solid #333333 !important;
2839
  border-radius: 999px !important;
2840
  padding: 12px 20px !important;
2841
+ color: var(--text-primary) !important;
2842
  font-family: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif !important;
2843
  font-weight: 600 !important;
2844
  font-size: 0.95rem !important;
 
2867
  .level-model-dropdown button {
2868
  background: #000000 !important;
2869
  border: 1px solid #333333 !important;
2870
+ color: var(--text-primary) !important;
2871
  }
2872
 
2873
  .radar-placeholder {
 
3045
  h2.section-title,
3046
  .dashboard-section .section-title,
3047
  .section-header .section-title {
3048
+ font-family: "Nanum Gothic", sans-serif !important;
3049
  }
3050
 
3051
  .domain-title,
3052
  h2.domain-title,
3053
  .domain-header .domain-title {
3054
+ font-family: "Nanum Gothic", sans-serif !important;
3055
  }
3056
 
3057
  .hero-title,
3058
  .hero-subtitle,
3059
  h1.hero-title,
3060
  p.hero-subtitle {
3061
+ font-family: "Nanum Gothic", sans-serif !important;
3062
  font-size: 2rem; !important;
3063
  }
3064
 
 
3252
  palette = [
3253
  {'fill': 'rgba(255, 210, 30, 0.25)', 'line': '#ffd21e'},
3254
  {'fill': 'rgba(255, 138, 60, 0.22)', 'line': '#FF8A3C'},
3255
+ {'fill': 'rgba(161, 98, 7, 0.22)', 'line': '#A16207'},
3256
+ {'fill': 'rgba(220, 38, 38, 0.20)', 'line': '#DC2626'},
3257
  {'fill': 'rgba(248, 250, 252, 0.20)', 'line': '#F8FAFC'},
3258
  ]
3259
 
 
3378
  width=900,
3379
  margin=dict(t=30, b=50, l=10, r=10),
3380
  autosize=True,
3381
+ annotations=[]
 
 
 
 
 
 
 
 
 
3382
  )
3383
 
3384
  return fig
 
3637
  model_palette = [
3638
  '#ffd21e',
3639
  '#FF8A3C',
3640
+ '#A16207',
3641
+ '#DC2626',
3642
  '#F8FAFC',
3643
  '#38BDF8',
3644
  ]
utils.py CHANGED
@@ -9,8 +9,8 @@ def get_chart_colors():
9
  # "grid": (1, 1, 1, 0.1), # RGBA tuple for grid
10
  # }
11
  return {
12
- "Private": "#3F78FA", # accent-blue light
13
- "Open source": "#A13AE2", # accent-purple light
14
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
15
  "text": "#111827",
16
  "background": "#FFFFFF",
@@ -20,10 +20,12 @@ def get_chart_colors():
20
 
21
  def get_rank_badge(rank):
22
  """Generate HTML for rank badge with appropriate styling"""
 
 
23
  badge_styles = {
24
- 1: ("1st", "linear-gradient(145deg, #ffd700, #ffc400)", "#000"),
25
- 2: ("2nd", "linear-gradient(145deg, #9ca3af, #787C7E)", "#fff"),
26
- 3: ("3rd", "linear-gradient(145deg, #CD7F32, #b36a1d)", "#fff"),
27
  }
28
 
29
  if rank in badge_styles:
@@ -63,24 +65,25 @@ def get_type_badge(model_type):
63
  """Generate HTML for model type badge"""
64
  colors = get_chart_colors()
65
  color_map = {
66
- "Open source": colors.get("Open source", "#A13AE2"),
67
- "Proprietary": colors.get("Private", "#3F78FA"),
68
- "Private": colors.get("Private", "#3F78FA"),
69
  }
70
  label_map = {
71
  "Open source": "OSS",
72
  "Proprietary": "API",
73
  "Private": "API",
74
  }
75
- bg_color = color_map.get(model_type, "#4F46E5")
76
  display_label = label_map.get(model_type, model_type)
 
77
  return f"""
78
  <div style="
79
  display: inline-flex;
80
  align-items: center;
81
  padding: 4px 8px;
82
  background: {bg_color};
83
- color: white;
84
  border-radius: 4px;
85
  font-size: 0.85em;
86
  font-weight: 500;
 
9
  # "grid": (1, 1, 1, 0.1), # RGBA tuple for grid
10
  # }
11
  return {
12
+ "Private": "#593B1D", # rich brown for API
13
+ "Open source": "#FACC15", # warm amber for OSS
14
  "performance_bands": ["#DCFCE7", "#FEF9C3", "#FEE2E2"],
15
  "text": "#111827",
16
  "background": "#FFFFFF",
 
20
 
21
  def get_rank_badge(rank):
22
  """Generate HTML for rank badge with appropriate styling"""
23
+ tag_background = "#593B1D"
24
+ tag_text_color = "#FFFFFF"
25
  badge_styles = {
26
+ 1: ("1st", tag_background, tag_text_color),
27
+ 2: ("2nd", tag_background, tag_text_color),
28
+ 3: ("3rd", tag_background, tag_text_color),
29
  }
30
 
31
  if rank in badge_styles:
 
65
  """Generate HTML for model type badge"""
66
  colors = get_chart_colors()
67
  color_map = {
68
+ "Open source": colors.get("Open source", "#FACC15"),
69
+ "Proprietary": colors.get("Private", "#593B1D"),
70
+ "Private": colors.get("Private", "#593B1D"),
71
  }
72
  label_map = {
73
  "Open source": "OSS",
74
  "Proprietary": "API",
75
  "Private": "API",
76
  }
77
+ bg_color = color_map.get(model_type, "#593B1D")
78
  display_label = label_map.get(model_type, model_type)
79
+ text_color = "#111827" if display_label == "OSS" else "#FFFFFF"
80
  return f"""
81
  <div style="
82
  display: inline-flex;
83
  align-items: center;
84
  padding: 4px 8px;
85
  background: {bg_color};
86
+ color: {text_color};
87
  border-radius: 4px;
88
  font-size: 0.85em;
89
  font-weight: 500;