Spaces:
Running
Running
Update leaderboard.py
Browse files- leaderboard.py +38 -14
leaderboard.py
CHANGED
|
@@ -108,41 +108,64 @@ def get_human_readable_name(model_name: str) -> str:
|
|
| 108 |
|
| 109 |
def get_leaderboard():
|
| 110 |
leaderboard = load_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
sorted_results = sorted(
|
| 112 |
leaderboard.items(),
|
| 113 |
-
key=lambda x: (x[1]["
|
| 114 |
reverse=True
|
| 115 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
-
leaderboard_html = """
|
|
|
|
| 118 |
<style>
|
| 119 |
-
.leaderboard-table {
|
| 120 |
width: 100%;
|
| 121 |
border-collapse: collapse;
|
| 122 |
font-family: Arial, sans-serif;
|
| 123 |
-
}
|
| 124 |
-
.leaderboard-table th, .leaderboard-table td {
|
| 125 |
border: 1px solid #ddd;
|
| 126 |
padding: 8px;
|
| 127 |
text-align: left;
|
| 128 |
-
}
|
| 129 |
-
.leaderboard-table th {
|
| 130 |
background-color: rgba(255, 255, 255, 0.1);
|
| 131 |
font-weight: bold;
|
| 132 |
-
}
|
| 133 |
-
.rank-column {
|
| 134 |
width: 60px;
|
| 135 |
text-align: center;
|
| 136 |
-
}
|
| 137 |
-
.opponent-details {
|
| 138 |
font-size: 0.9em;
|
| 139 |
color: #888;
|
| 140 |
-
}
|
| 141 |
</style>
|
| 142 |
<table class='leaderboard-table'>
|
| 143 |
<tr>
|
| 144 |
<th class='rank-column'>Rank</th>
|
| 145 |
<th>Model</th>
|
|
|
|
| 146 |
<th>Wins</th>
|
| 147 |
<th>Losses</th>
|
| 148 |
<th>Win Rate</th>
|
|
@@ -170,6 +193,7 @@ def get_leaderboard():
|
|
| 170 |
<tr>
|
| 171 |
<td class='rank-column'>{rank_display}</td>
|
| 172 |
<td>{get_human_readable_name(model)}</td>
|
|
|
|
| 173 |
<td>{results['wins']}</td>
|
| 174 |
<td>{results['losses']}</td>
|
| 175 |
<td>{win_rate:.2f}%</td>
|
|
@@ -189,7 +213,7 @@ def get_elo_leaderboard():
|
|
| 189 |
min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
| 190 |
max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
| 191 |
|
| 192 |
-
|
| 193 |
<p style="font-size: 16px; margin-bottom: 20px;">
|
| 194 |
This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
|
| 195 |
Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
|
|
@@ -200,7 +224,7 @@ def get_elo_leaderboard():
|
|
| 200 |
"""
|
| 201 |
|
| 202 |
leaderboard_html = f"""
|
| 203 |
-
{
|
| 204 |
<style>
|
| 205 |
.elo-leaderboard-table {{
|
| 206 |
width: 100%;
|
|
|
|
| 108 |
|
| 109 |
def get_leaderboard():
|
| 110 |
leaderboard = load_leaderboard()
|
| 111 |
+
|
| 112 |
+
# Calculate scores for each model
|
| 113 |
+
for model, results in leaderboard.items():
|
| 114 |
+
total_battles = results["wins"] + results["losses"]
|
| 115 |
+
if total_battles > 0:
|
| 116 |
+
win_rate = results["wins"] / total_battles
|
| 117 |
+
results["score"] = win_rate * (1 - 1 / (total_battles + 1))
|
| 118 |
+
else:
|
| 119 |
+
results["score"] = 0
|
| 120 |
+
|
| 121 |
+
# Sort results by score, then by total battles
|
| 122 |
sorted_results = sorted(
|
| 123 |
leaderboard.items(),
|
| 124 |
+
key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
|
| 125 |
reverse=True
|
| 126 |
)
|
| 127 |
+
# Explanation of the main leaderboard
|
| 128 |
+
explanation = """
|
| 129 |
+
<p style="font-size: 16px; margin-bottom: 20px;">
|
| 130 |
+
This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
|
| 131 |
+
<br>
|
| 132 |
+
<strong>Score = Win Rate * (1 - 1 / (Total Battles + 1))</strong>
|
| 133 |
+
<br>
|
| 134 |
+
This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
|
| 135 |
+
</p>
|
| 136 |
+
"""
|
| 137 |
|
| 138 |
+
leaderboard_html = f"""
|
| 139 |
+
{explanation}
|
| 140 |
<style>
|
| 141 |
+
.leaderboard-table {{
|
| 142 |
width: 100%;
|
| 143 |
border-collapse: collapse;
|
| 144 |
font-family: Arial, sans-serif;
|
| 145 |
+
}}
|
| 146 |
+
.leaderboard-table th, .leaderboard-table td {{
|
| 147 |
border: 1px solid #ddd;
|
| 148 |
padding: 8px;
|
| 149 |
text-align: left;
|
| 150 |
+
}}
|
| 151 |
+
.leaderboard-table th {{
|
| 152 |
background-color: rgba(255, 255, 255, 0.1);
|
| 153 |
font-weight: bold;
|
| 154 |
+
}}
|
| 155 |
+
.rank-column {{
|
| 156 |
width: 60px;
|
| 157 |
text-align: center;
|
| 158 |
+
}}
|
| 159 |
+
.opponent-details {{
|
| 160 |
font-size: 0.9em;
|
| 161 |
color: #888;
|
| 162 |
+
}}
|
| 163 |
</style>
|
| 164 |
<table class='leaderboard-table'>
|
| 165 |
<tr>
|
| 166 |
<th class='rank-column'>Rank</th>
|
| 167 |
<th>Model</th>
|
| 168 |
+
<th>Score</th>
|
| 169 |
<th>Wins</th>
|
| 170 |
<th>Losses</th>
|
| 171 |
<th>Win Rate</th>
|
|
|
|
| 193 |
<tr>
|
| 194 |
<td class='rank-column'>{rank_display}</td>
|
| 195 |
<td>{get_human_readable_name(model)}</td>
|
| 196 |
+
<td>{results['score']:.4f}</td>
|
| 197 |
<td>{results['wins']}</td>
|
| 198 |
<td>{results['losses']}</td>
|
| 199 |
<td>{win_rate:.2f}%</td>
|
|
|
|
| 213 |
min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
| 214 |
max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
| 215 |
|
| 216 |
+
explanation_elo = f"""
|
| 217 |
<p style="font-size: 16px; margin-bottom: 20px;">
|
| 218 |
This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
|
| 219 |
Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
|
|
|
|
| 224 |
"""
|
| 225 |
|
| 226 |
leaderboard_html = f"""
|
| 227 |
+
{explanation_elo}
|
| 228 |
<style>
|
| 229 |
.elo-leaderboard-table {{
|
| 230 |
width: 100%;
|