File size: 6,418 Bytes
7b6b43e
 
 
 
 
4966301
 
 
 
7b6b43e
4966301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b6b43e
 
4966301
 
 
 
 
 
7b6b43e
 
4966301
 
 
 
7b6b43e
4966301
 
 
 
 
 
 
 
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
a399453
4966301
 
 
 
 
 
 
 
 
 
 
7b6b43e
 
 
4966301
 
 
 
 
 
 
 
 
 
 
 
7b6b43e
 
 
4966301
 
 
 
7b6b43e
 
a399453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b6b43e
4966301
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>MCP Benchmark Leaderboard</title>
    <link rel="stylesheet" href="style.css">
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
</head>
<body>
    <div class="container">
        <!-- Paper Information -->
        <header class="paper-header">
            <h1 class="paper-title">MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers</h1>
            <div class="paper-authors">
                <p>Zhenting Wang, Qi Chang, Hemani Patel, Shashank Biju, Cheng-En Wu, Quan Liu, Aolin Ding, Alireza Rezazadeh, Ankit Shah, Yujia Bao, Eugene Siow</p>
                <p class="affiliation">Accenture, UC Berkeley</p>
            </div>
            <div class="paper-links">
                <a href="https://github.com/Accenture/mcp-bench" class="paper-link">
                    <i class="fab fa-github"></i> GitHub
                </a>
                <a href="https://arxiv.org/abs/2508.20453" class="paper-link">
                    <i class="fas fa-file-pdf"></i> Paper
                </a>
                <a href="#leaderboard" class="paper-link">
                    <i class="fas fa-trophy"></i> Leaderboard
                </a>
            </div>
        </header>

        <!-- MCP Diagram -->
        <section class="diagram-section">
            <img src="mcp-bench.png" alt="MCP-Bench Architecture Diagram" class="diagram-image">
            <p class="diagram-caption">
                MCP-Bench is a comprehensive evaluation framework designed to assess Large Language Models' (LLMs) capabilities in tool-use scenarios through the Model Context Protocol (MCP). This benchmark provides an end-to-end pipeline for evaluating how effectively different LLMs can discover, select, and utilize tools to solve real-world tasks.
            </p>
        </section>

        <!-- Ranking Chart -->
        <section class="chart-section">
            <h2 class="section-title">Performance Ranking</h2>
            <img src="ranking.png" alt="MCP Benchmark Ranking Chart" class="ranking-chart">
        </section>

        <!-- Leaderboard Header -->
        <section class="leaderboard-section" id="leaderboard">
            <h2 class="section-title">Detailed Results</h2>

        <div class="table-container">
            <table class="leaderboard-table" id="leaderboardTable">
                <thead>
                    <tr>
                        <th class="model-col">
                            <strong>Model</strong>
                        </th>
                        <th class="score-col">
                            <strong>Overall Score</strong>
                        </th>
                        <th class="metric-col">
                            Valid Tool<br>Name Rate
                        </th>
                        <th class="metric-col">
                            Schema<br>Compliance
                        </th>
                        <th class="metric-col">
                            Execution<br>Success
                        </th>
                        <th class="metric-col">
                            Task<br>Fulfillment
                        </th>
                        <th class="metric-col">
                            Information<br>Grounding
                        </th>
                        <th class="metric-col">
                            Tool<br>Appropriateness
                        </th>
                        <th class="metric-col">
                            Parameter<br>Accuracy
                        </th>
                        <th class="metric-col">
                            Dependency<br>Awareness
                        </th>
                        <th class="metric-col">
                            Parallelism<br>and Efficiency
                        </th>
                    </tr>
                </thead>
                <tbody id="tableBody">
                    <!-- Table rows will be generated by JavaScript -->
                </tbody>
            </table>
        </div>


        </section>

        <!-- Citation Section -->
        <section class="citation-section">
            <h2 class="section-title">Citation</h2>
            <div class="citation-box">
                <pre class="citation-text">@article{wang2024mcpbench,
  title={MCP-Bench: Benchmarking Tool-Using LLM Agents with Complex Real-World Tasks via MCP Servers},
  author={Wang, Zhenting and Chang, Qi and Patel, Hemani and Biju, Shashank and Wu, Cheng-En and Liu, Quan and Ding, Aolin and Rezazadeh, Alireza and Shah, Ankit and Bao, Yujia and Siow, Eugene},
  journal={arXiv preprint arXiv:2508.20453},
  year={2024}
}</pre>
                <button class="copy-citation-btn" onclick="copyCitation()">
                    <i class="fas fa-copy"></i> Copy Citation
                </button>
            </div>
        </section>

        <footer class="footer">
            <p>Last updated: <span id="lastUpdated"></span></p>
            <p>Data source: MCP-Bench Results (ArXiv: 2508.20453)</p>
        </footer>
    </div>

    <script>
        // Copy citation function
        function copyCitation() {
            const citationText = document.querySelector('.citation-text').textContent;
            navigator.clipboard.writeText(citationText).then(() => {
                const button = document.querySelector('.copy-citation-btn');
                const originalText = button.innerHTML;
                button.innerHTML = '<i class="fas fa-check"></i> Copied!';
                button.style.backgroundColor = '#4caf50';
                
                setTimeout(() => {
                    button.innerHTML = originalText;
                    button.style.backgroundColor = '';
                }, 2000);
            });
        }
        
        // Update last updated date
        document.addEventListener('DOMContentLoaded', function() {
            const lastUpdated = document.getElementById('lastUpdated');
            if (lastUpdated) {
                lastUpdated.textContent = 'December 2024';
            }
        });
    </script>
</body>
</html>