File size: 6,418 Bytes
7b6b43e 4966301 7b6b43e 4966301 7b6b43e 4966301 7b6b43e 4966301 7b6b43e 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 a399453 4966301 7b6b43e 4966301 7b6b43e 4966301 7b6b43e a399453 7b6b43e 4966301 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MCP Benchmark Leaderboard</title>
<link rel="stylesheet" href="style.css">
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
</head>
<body>
<div class="container">
<!-- Paper Information -->
<header class="paper-header">
<h1 class="paper-title">MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers</h1>
<div class="paper-authors">
<p>Zhenting Wang, Qi Chang, Hemani Patel, Shashank Biju, Cheng-En Wu, Quan Liu, Aolin Ding, Alireza Rezazadeh, Ankit Shah, Yujia Bao, Eugene Siow</p>
<p class="affiliation">Accenture, UC Berkeley</p>
</div>
<div class="paper-links">
<a href="https://github.com/Accenture/mcp-bench" class="paper-link">
<i class="fab fa-github"></i> GitHub
</a>
<a href="https://arxiv.org/abs/2508.20453" class="paper-link">
<i class="fas fa-file-pdf"></i> Paper
</a>
<a href="#leaderboard" class="paper-link">
<i class="fas fa-trophy"></i> Leaderboard
</a>
</div>
</header>
<!-- MCP Diagram -->
<section class="diagram-section">
<img src="mcp-bench.png" alt="MCP-Bench Architecture Diagram" class="diagram-image">
<p class="diagram-caption">
MCP-Bench is a comprehensive evaluation framework designed to assess Large Language Models' (LLMs) capabilities in tool-use scenarios through the Model Context Protocol (MCP). This benchmark provides an end-to-end pipeline for evaluating how effectively different LLMs can discover, select, and utilize tools to solve real-world tasks.
</p>
</section>
<!-- Ranking Chart -->
<section class="chart-section">
<h2 class="section-title">Performance Ranking</h2>
<img src="ranking.png" alt="MCP Benchmark Ranking Chart" class="ranking-chart">
</section>
<!-- Leaderboard Header -->
<section class="leaderboard-section" id="leaderboard">
<h2 class="section-title">Detailed Results</h2>
<div class="table-container">
<table class="leaderboard-table" id="leaderboardTable">
<thead>
<tr>
<th class="model-col">
<strong>Model</strong>
</th>
<th class="score-col">
<strong>Overall Score</strong>
</th>
<th class="metric-col">
Valid Tool<br>Name Rate
</th>
<th class="metric-col">
Schema<br>Compliance
</th>
<th class="metric-col">
Execution<br>Success
</th>
<th class="metric-col">
Task<br>Fulfillment
</th>
<th class="metric-col">
Information<br>Grounding
</th>
<th class="metric-col">
Tool<br>Appropriateness
</th>
<th class="metric-col">
Parameter<br>Accuracy
</th>
<th class="metric-col">
Dependency<br>Awareness
</th>
<th class="metric-col">
Parallelism<br>and Efficiency
</th>
</tr>
</thead>
<tbody id="tableBody">
<!-- Table rows will be generated by JavaScript -->
</tbody>
</table>
</div>
</section>
<!-- Citation Section -->
<section class="citation-section">
<h2 class="section-title">Citation</h2>
<div class="citation-box">
<pre class="citation-text">@article{wang2024mcpbench,
title={MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers},
author={Wang, Zhenting and Chang, Qi and Patel, Hemani and Biju, Shashank and Wu, Cheng-En and Liu, Quan and Ding, Aolin and Rezazadeh, Alireza and Shah, Ankit and Bao, Yujia and Siow, Eugene},
journal={arXiv preprint arXiv:2508.20453},
year={2024}
}</pre>
<button class="copy-citation-btn" onclick="copyCitation()">
<i class="fas fa-copy"></i> Copy Citation
</button>
</div>
</section>
<footer class="footer">
<p>Last updated: <span id="lastUpdated"></span></p>
<p>Data source: MCP-Bench Results (ArXiv: 2508.20453)</p>
</footer>
</div>
<script>
// Copy citation function
function copyCitation() {
const citationText = document.querySelector('.citation-text').textContent;
navigator.clipboard.writeText(citationText).then(() => {
const button = document.querySelector('.copy-citation-btn');
const originalText = button.innerHTML;
button.innerHTML = '<i class="fas fa-check"></i> Copied!';
button.style.backgroundColor = '#4caf50';
setTimeout(() => {
button.innerHTML = originalText;
button.style.backgroundColor = '';
}, 2000);
});
}
// Update last updated date
document.addEventListener('DOMContentLoaded', function() {
const lastUpdated = document.getElementById('lastUpdated');
if (lastUpdated) {
lastUpdated.textContent = 'December 2024';
}
});
</script>
</body>
</html> |