Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from glob import glob | |
| csv_results = glob("results/*.csv") | |
| # load the csv files into a dict with keys being name of the file and values being the data | |
| data = {file: pd.read_csv(file) for file in csv_results} | |
| def calculate_accuracy(df): | |
| return df["parsed_judge_response"].mean() * 100 | |
| def accuracy_breakdown(df): | |
| # 4 level accuracy | |
| return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values | |
| # Define the column names with icons | |
| headers_with_icons = [ | |
| "π€ Model Name", | |
| "β Overall", | |
| "π Level 1", | |
| "π Level 2", | |
| "π Level 3", | |
| "π¬ Level 4", | |
| ] | |
| accuracy = {file: calculate_accuracy(data[file]) for file in data} | |
| # Create a list to hold the data | |
| data_for_df = [] | |
| # Define the column names with icons | |
| # Iterate over each file and its corresponding DataFrame in the data dictionary | |
| for file, df in data.items(): | |
| # Get the overall accuracy and round it | |
| overall_accuracy = round(calculate_accuracy(df), 2) | |
| # Get the breakdown accuracy and round each value | |
| breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)] | |
| # Prepare the model name from the file name | |
| model_name = file.split("/")[-1].replace(".csv", "") # Corrected the file extension | |
| # Append the data to the list | |
| data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy) | |
| # Define the column names, adjust based on the number of difficulty levels you have | |
| column_names = [ | |
| "Model Name", | |
| "Overall Accuracy", | |
| "Level 1 Accuracy", | |
| "Level 2 Accuracy", | |
| "Level 3 Accuracy", | |
| "Level 4 Accuracy", | |
| ] | |
| # Create the DataFrame | |
| accuracy_df = pd.DataFrame(data_for_df, columns=column_names) | |
| accuracy_df.columns = headers_with_icons | |
| accuracy_df.sort_values(by="β Overall", ascending=False, inplace=True) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# FSMBench Leaderboard") | |
| # add link to home page and dataset | |
| leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons) | |
| demo.launch() | |