Spaces:
Running
Running
benediktstroebl
big update with dynamic pricing, agent metadata, about page on top, and new benchmarks
56a86ce
| import pandas as pd | |
| TYPES = [ | |
| "str", | |
| "number", | |
| "number" | |
| ] | |
| SWEBENCH_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Accuracy", | |
| "Total Cost", | |
| "Runs", | |
| ] | |
| SWEBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| SWEBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
| USACO_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Accuracy", | |
| "Total Cost", | |
| "Runs", | |
| ] | |
| USACO_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| USACO_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
| COREBENCH_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Accuracy", | |
| "Total Cost", | |
| "Runs", | |
| ] | |
| COREBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| COREBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
| MLAGENTBENCH_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Overall Score", | |
| "Total Cost", | |
| ] | |
| MLAGENTBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| MLAGENTBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Accuracy'] | |
| NUMERIC_INTERVALS = { | |
| "?": pd.Interval(-1, 0, closed="right"), | |
| "~1.5": pd.Interval(0, 2, closed="right"), | |
| "~3": pd.Interval(2, 4, closed="right"), | |
| "~7": pd.Interval(4, 9, closed="right"), | |
| "~13": pd.Interval(9, 20, closed="right"), | |
| "~35": pd.Interval(20, 45, closed="right"), | |
| "~60": pd.Interval(45, 70, closed="right"), | |
| "70+": pd.Interval(70, 10000, closed="right"), | |
| } | |
| CYBENCH_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Accuracy", | |
| "Total Cost", | |
| "Runs", | |
| ] | |
| CYBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| CYBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
| APPWORLD_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Accuracy", | |
| "Total Cost", | |
| "Runs", | |
| "Scenario Goal Completion" | |
| ] | |
| APPWORLD_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| APPWORLD_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"] | |
| GAIA_ON_LOAD_COLUMNS = [ | |
| "Agent Name", | |
| "Accuracy", | |
| "Level 1 Accuracy", | |
| "Level 2 Accuracy", | |
| "Level 3 Accuracy", | |
| "Total Cost", | |
| "Runs", | |
| ] | |
| GAIA_SEARCH_COLUMNS = ['Total Cost', 'Agent Name'] | |
| GAIA_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score'] |