Spaces:

k-mktr
/

gpu-poor-llm-arena

Running

App Files Files Community

k-mktr commited on Oct 22, 2024

Commit

0efd625

verified ·

1 Parent(s): 289638f

Update leaderboard.py

Browse files

Files changed (1) hide show

leaderboard.py +38 -14

leaderboard.py CHANGED Viewed

@@ -108,41 +108,64 @@ def get_human_readable_name(model_name: str) -> str:
 def get_leaderboard():
     leaderboard = load_leaderboard()
     sorted_results = sorted(
         leaderboard.items(),
-        key=lambda x: (x[1]["wins"] / (x[1]["wins"] + x[1]["losses"]) if x[1]["wins"] + x[1]["losses"] > 0 else 0, x[1]["wins"] + x[1]["losses"]),
         reverse=True
     )
-    leaderboard_html = """
     <style>
-        .leaderboard-table {
             width: 100%;
             border-collapse: collapse;
             font-family: Arial, sans-serif;
-        }
-        .leaderboard-table th, .leaderboard-table td {
             border: 1px solid #ddd;
             padding: 8px;
             text-align: left;
-        }
-        .leaderboard-table th {
             background-color: rgba(255, 255, 255, 0.1);
             font-weight: bold;
-        }
-        .rank-column {
             width: 60px;
             text-align: center;
-        }
-        .opponent-details {
             font-size: 0.9em;
             color: #888;
-        }
     </style>
     <table class='leaderboard-table'>
     <tr>
         <th class='rank-column'>Rank</th>
         <th>Model</th>
         <th>Wins</th>
         <th>Losses</th>
         <th>Win Rate</th>
@@ -170,6 +193,7 @@ def get_leaderboard():
         <tr>
             <td class='rank-column'>{rank_display}</td>
             <td>{get_human_readable_name(model)}</td>
             <td>{results['wins']}</td>
             <td>{results['losses']}</td>
             <td>{win_rate:.2f}%</td>
@@ -189,7 +213,7 @@ def get_elo_leaderboard():
     min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
     max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
-    explanation = f"""
     <p style="font-size: 16px; margin-bottom: 20px;">
     This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
     Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
@@ -200,7 +224,7 @@ def get_elo_leaderboard():
     """
     leaderboard_html = f"""
-    {explanation}
     <style>
         .elo-leaderboard-table {{
             width: 100%;

 def get_leaderboard():
     leaderboard = load_leaderboard()
+    # Calculate scores for each model
+    for model, results in leaderboard.items():
+        total_battles = results["wins"] + results["losses"]
+        if total_battles > 0:
+            win_rate = results["wins"] / total_battles
+            results["score"] = win_rate * (1 - 1 / (total_battles + 1))
+        else:
+            results["score"] = 0
+    # Sort results by score, then by total battles
     sorted_results = sorted(
         leaderboard.items(),
+        key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
         reverse=True
     )
+    # Explanation of the main leaderboard
+    explanation = """
+    <p style="font-size: 16px; margin-bottom: 20px;">
+    This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
+    <br>
+    <strong>Score = Win Rate * (1 - 1 / (Total Battles + 1))</strong>
+    <br>
+    This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
+    </p>
+    """
+    leaderboard_html = f"""
+    {explanation}
     <style>
+        .leaderboard-table {{
             width: 100%;
             border-collapse: collapse;
             font-family: Arial, sans-serif;
+        }}
+        .leaderboard-table th, .leaderboard-table td {{
             border: 1px solid #ddd;
             padding: 8px;
             text-align: left;
+        }}
+        .leaderboard-table th {{
             background-color: rgba(255, 255, 255, 0.1);
             font-weight: bold;
+        }}
+        .rank-column {{
             width: 60px;
             text-align: center;
+        }}
+        .opponent-details {{
             font-size: 0.9em;
             color: #888;
+        }}
     </style>
     <table class='leaderboard-table'>
     <tr>
         <th class='rank-column'>Rank</th>
         <th>Model</th>
+        <th>Score</th>
         <th>Wins</th>
         <th>Losses</th>
         <th>Win Rate</th>
         <tr>
             <td class='rank-column'>{rank_display}</td>
             <td>{get_human_readable_name(model)}</td>
+            <td>{results['score']:.4f}</td>
             <td>{results['wins']}</td>
             <td>{results['losses']}</td>
             <td>{win_rate:.2f}%</td>
     min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
     max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
+    explanation_elo = f"""
     <p style="font-size: 16px; margin-bottom: 20px;">
     This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
     Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
     """
     leaderboard_html = f"""
+    {explanation_elo}
     <style>
         .elo-leaderboard-table {{
             width: 100%;