leaderboard / app.py
burtenshaw
fix logging
ff5d3a1
import json
import random
from pathlib import Path
import gradio as gr
import pandas as pd
from datasets import load_dataset
abs_path = Path(__file__).parent
submissions = json.load(open(abs_path / "submissions.json"))
TASKS = [("gsm8k", "lighteval|gsm8k|0", "extractive_match")]
TYPES = [
"markdown",
"markdown",
"number",
"number",
"markdown",
]
COLUMNS = ["User", "Model Name", "MMLU", "Average ⬆️", "Results"]
WIDTHS = ["25%", "25%", "15%", "15%", "10%"]
def load_results(dataset):
results = []
try:
output = dataset["latest"]["results"]
output = output[-1]
except KeyError as e:
raise ValueError("Cannot find 'latest' key in the dataset")
try:
output = json.loads(output)
except ValueError as e:
raise ValueError("Cannot parse the output as JSON")
for name, task, metric in TASKS:
try:
output = output[task]
except KeyError as e:
raise ValueError(f"Cannot find '{task}' key in the dataset")
try:
output = (name, output[metric])
except KeyError as e:
raise ValueError("Cannot find 'extractive_match' key in the dataset")
results.append(output)
return results
def load_submissions():
leaderboard = []
for i, submission in enumerate(submissions["submissions"]):
try:
ds = load_dataset(submission["results-dataset"], "results")
results = load_results(ds)
except ValueError as e:
print(submission)
print(f"Cannot load results for {e}")
except Exception as e:
print(submission)
print(f"Cannot load dataset for {i} : {submission} {e}")
continue
leaderboard_row = {}
leaderboard_row["username"] = (
f"[{submission['username']}](https://huggingface.co/{submission['username']})"
)
leaderboard_row["model_name"] = (
f"[{submission['model_name']}](https://huggingface.co/{submission['username']}/{submission['model_name']})"
)
for name, result in results:
leaderboard_row[name] = result
leaderboard_row["Average ⬆️"] = sum(result for _, result in results) / len(
results
)
leaderboard_row["results-dataset"] = (
f"[🔗](https://huggingface.co/datasets/{submission['results-dataset']})"
)
leaderboard.append(leaderboard_row)
df = pd.DataFrame(leaderboard)
for column in df.columns:
if df[column].dtype == "float64":
df[column] = df[column].round(2)
return df.sort_values(by="Average ⬆️", ascending=False)
with gr.Blocks() as demo:
gr.Markdown("""
# 🥇 a smol course leaderboad
A leaderboard of smol course students' submissions.
""")
with gr.Tabs():
with gr.Tab("Demo"):
df = gr.Dataframe(
label="a smol course leaderboard",
value=load_submissions(),
headers=COLUMNS,
show_search="search",
show_copy_button=True,
show_fullscreen_button=True,
show_row_numbers=True,
pinned_columns=1,
static_columns=[0],
datatype=TYPES,
column_widths=WIDTHS,
)
with gr.Tab("Docs"):
gr.Markdown((Path(__file__).parent / "docs.md").read_text())
if __name__ == "__main__":
demo.launch()