Mqleet's picture
[update] templates
a3d3755
// Global variables
let rawData;
let chart = null;
let lineGraph = null;
let taskScores_save = null;
let currentTask = 'Avg';
const sortby_options = {
BY_REWARD_SCORE: "sort-by-reward-score",
BY_SUCCESS_RATE: "sort-by-success-rate",
BY_GROUNDING_ACC: "sort-by-grounding-acc",
};
let cur_sortby_option = sortby_options.BY_REWARD_SCORE;
const taskSubtaskMapping = {
'Avg': ['AlfWorld', 'ScienceWorld', 'BabyAI', 'PDDL', 'Jericho', 'WebShop', 'WebArena', 'Tool-Query', 'Tool-Operation'],
'Embodied': ['AlfWorld', 'ScienceWorld', 'BabyAI'],
'Game': ['PDDL', 'Jericho'],
'Web': ['WebShop', 'WebArena'],
'Tools': ['Tool-Query', 'Tool-Operation'],
};
const SubtaskNameMapping = {
'AlfWorld': 'ALF',
'ScienceWorld': 'SW',
'BabyAI': 'BA',
'PDDL': 'PL',
'Jericho': 'JC',
'WebShop': 'WS',
'WebArena': 'WA',
'Tool-Query': 'T-Q',
'Tool-Operation': 'T-O',
};
const modelColors = {};
const borderStyles = {};
const colors = [
'rgba(255, 99, 132, 1)',
'rgba(54, 162, 235, 1)',
'rgba(255, 206, 86, 1)',
'rgba(75, 192, 192, 1)',
'rgba(153, 102, 255, 1)',
'rgba(255, 159, 64, 1)',
'rgba(199, 199, 199, 1)',
'rgba(83, 102, 255, 1)',
'rgba(40, 159, 64, 1)',
'rgba(143, 162, 235, 1)',
'rgba(255, 99, 75, 1)',
'rgba(71 ,150 ,87, 1)',
'rgba(210 ,102 ,95, 1)',
'rgba(51 ,47 ,180, 1)',
];
const borders = [
{
borderWidth: 2,
borderDash: [],
},
{
borderWidth: 2,
borderDash: [5, 5],
},
{
borderWidth: 2,
borderDash: [10, 5],
},
{
borderWidth: 2,
borderDash: [2, 2],
},
{
borderWidth: 2,
borderDash: [8, 4],
},
{
borderWidth: 2,
borderDash: [5, 10],
},
{
borderWidth: 2,
borderDash: [15, 5],
},
{
borderWidth: 2,
borderDash: [5, 15],
},
{
borderWidth: 2,
borderDash: [10, 10],
},
{
borderWidth: 2,
borderDash: [11, 4],
},
{
borderWidth: 2,
borderDash: [10, 2],
},
{
borderWidth: 2,
borderDash: [4, 8],
}, {
borderWidth: 2,
borderDash: [3, 5],
}, {
borderWidth: 2,
borderDash: [5, 3],
},
];
function generateModelColorsAndStyles(models) {
for (let i = 0; i < models.length; i++) {
const model = models[i];
modelColors[model] = colors[i % colors.length];
borderStyles[model] = borders[i % borders.length];
}
}
function getScoresForTask(rawData, task) {
console.log("Current task:", task);
return rawData.map(model => {
if (model.tasks[task]) {
return {
model: model.model,
score: parseFloat(model.tasks[task].score),
accuracy: parseFloat(model.tasks[task].accuracy),
grounding: parseFloat(model.tasks[task].grounding)
};
} else {
console.error("Task not found:", task);
return null;
}
}).filter(item => item !== null);
}
let selectedModelIndexInBar = null;
let selectedModelIndexInLine = null;
function createMainResultChart() {
const taskScores = getScoresForTask(rawData, currentTask);
if (cur_sortby_option === sortby_options.BY_REWARD_SCORE) {
taskScores.sort((a, b) => b.score - a.score);
} else if (cur_sortby_option === sortby_options.BY_SUCCESS_RATE) {
taskScores.sort((a, b) => b.accuracy - a.accuracy);
} else if (cur_sortby_option === sortby_options.BY_GROUNDING_ACC) {
taskScores.sort((a, b) => b.grounding - a.grounding);
}
taskScores_save = taskScores
const labels = taskScores.map(item => item.model);
const scores = taskScores.map(item => item.score);
const accuracies = taskScores.map(item => item.accuracy);
const groundings = taskScores.map(item => item.grounding);
if (chart) {
chart.destroy();
}
const ctx = document.getElementById('chart-success-reward-rate');
chart = new Chart(ctx, {
plugins: [ChartDataLabels],
type: 'bar',
data: {
labels: labels,
datasets: [
{
label: 'Progress Rate',
data: scores,
backgroundColor: '#f398ae',
},
{
label: 'Success Rate',
data: accuracies,
backgroundColor: '#78b5f1',
},
{
label: 'Grounding Accuracy',
data: groundings,
backgroundColor: '#f3e276',
}
]
},
options: {
responsive: true,
maintainAspectRatio: false,
interaction: {
mode: 'index',
},
indexAxis: 'y',
scales: {
x: {
ticks: {
beginAtZero: true,
min: 0,
max: 100,
font: {
size: 12,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
},
grace: 10,
title: {
display: true,
text: 'Value (%)',
font: {
size: 14,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
}
},
y: {
ticks: {
font: {
size: 12,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
},
title: {
display: true,
text: 'Model',
font: {
size: 14,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
}
}
},
plugins: {
legend: {
display: true,
labels: {
usePointStyle: true,
font: {
size: 10,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
},
align: 'center',
position: 'bottom'
},
tooltip: {
callbacks: {
label: function (context) {
let label = context.dataset.label || '';
if (label) {
label += ': ';
}
label += context.formattedValue;
return label;
}
}
},
datalabels: {
align: 'end',
anchor: 'end',
color: 'black',
padding: 0,
formatter: function (value, context) {
return value.toFixed(2);
},
font: function (context) {
var width = context.chart.width;
var size = Math.round(width / 48);
size = Math.min(size, 10);
return {
size: size,
family: "'Noto Sans', sans-serif",
};
}
}
}
}
});
const subTaskLabels = taskSubtaskMapping[currentTask] || [];
if (lineGraph) {
lineGraph.destroy();
}
let datasets = [];
let yAxisTitle = '';
const lineGraphCtx = document.getElementById('line-graph').getContext('2d');
if (cur_sortby_option === sortby_options.BY_SUCCESS_RATE) {
datasets = rawData.map(modelData => {
return {
label: modelData.model,
data: subTaskLabels.map(subtask => modelData.tasks[subtask] ?
parseFloat(modelData.tasks[subtask].accuracy) : null),
borderColor: modelColors[modelData.model] || '#4CAF50',
fill: false,
...borderStyles[modelData.model]
};
});
yAxisTitle = 'Success Rate (%)';
} else if (cur_sortby_option === sortby_options.BY_REWARD_SCORE) {
datasets = rawData.map(modelData => {
return {
label: modelData.model,
data: subTaskLabels.map(subtask => modelData.tasks[subtask] ?
parseFloat(modelData.tasks[subtask].score) : null),
borderColor: modelColors[modelData.model] || '#4CAF50',
fill: false,
...borderStyles[modelData.model]
};
});
yAxisTitle = 'Progress Rate (%)';
} else if (cur_sortby_option === sortby_options.BY_GROUNDING_ACC) {
datasets = rawData.map(modelData => {
return {
label: modelData.model,
data: subTaskLabels.map(subtask => modelData.tasks[subtask] ?
parseFloat(modelData.tasks[subtask].grounding) : null),
borderColor: modelColors[modelData.model] || '#4CAF50',
fill: false,
...borderStyles[modelData.model]
};
});
yAxisTitle = 'Grounding accuracy (%)';
}
lineGraph = new Chart(lineGraphCtx, {
type: 'line',
data: {
labels: subTaskLabels,
datasets: datasets
},
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
x: {
ticks: {
font: {
size: 12,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
},
title: {
display: true,
text: 'Sub-Task',
font: {
size: 14,
family: "'Noto Sans', sans-serif",
weight: 'bold'
},
},
},
y: {
ticks: {
font: {
size: 12,
family: "'Noto Sans', sans-serif",
weight: 'bold'
}
},
title: {
display: true,
text: yAxisTitle,
font: {
size: 14,
family: "'Noto Sans', sans-serif",
weight: 'bold'
},
},
min: 0,
max: 100,
},
},
plugins: {
legend: {
display: true,
labels: {
usePointStyle: true,
font: {
size: 10,
family: "'Noto Sans', sans-serif",
weight: 'bold'
},
},
align: 'center',
position: 'bottom',
},
annotation: {
annotations: []
}
}
}
});
if (ctx) {
ctx.addEventListener('mousemove', function (event) {
const activePoints = chart.getElementsAtEventForMode(event, 'nearest', {intersect: true}, true);
if (activePoints.length > 0) {
const selectedIndex = activePoints[0].index;
if (selectedIndex !== selectedModelIndexInBar) {
selectedModelIndexInBar = selectedIndex;
selectedModelIndexInLine = highlightModel(taskScores_save.map(item => item.model), selectedModelIndexInBar);
// updateLineGraphScale(taskScores_save, selectedModelIndexInLine);
}
} else {
selectedModelIndexInBar = null;
removeHighlight();
// resetLineGraphScale();
}
});
}
}
document.querySelectorAll('.btn-group.task-filter-selector .btn').forEach(btn => {
btn.addEventListener('click', () => {
currentTask = btn.id.replace('filter-by-', '');
document.querySelectorAll('.btn-group.task-filter-selector .btn.active').forEach(active => {
active.classList.remove('active');
});
btn.classList.add('active');
createMainResultChart();
});
});
function highlightModel(labels, index) {
let highlightedDatasetIndex = -1;
const modelName = labels[index];
lineGraph.data.datasets.forEach((dataset, datasetIndex) => {
if (dataset.label === modelName) {
dataset.borderColor = '#000000';
dataset.borderWidth = 4;
highlightedDatasetIndex = datasetIndex
} else {
dataset.borderColor = modelColors[dataset.label];
dataset.borderWidth = 2;
}
});
lineGraph.options.plugins.annotation.annotations = createAnnotations(modelName);
lineGraph.update();
return highlightedDatasetIndex
}
function removeHighlight() {
lineGraph.data.datasets.forEach((dataset) => {
dataset.borderColor = modelColors[dataset.label];
dataset.borderWidth = 2;
});
lineGraph.options.plugins.annotation.annotations = [];
lineGraph.update();
}
function createAnnotations(modelName) {
return taskSubtaskMapping[currentTask].map(subtask => {
const modelData = rawData.find(data => data.model === modelName);
if (!modelData || !modelData.tasks || !modelData.tasks[subtask]) {
return null;
}
let content = `${modelData.model} (success rate):\n`;
if (cur_sortby_option === sortby_options.BY_REWARD_SCORE) {
content = `${modelData.model} (progress rate):\n`
} else if (cur_sortby_option === sortby_options.BY_GROUNDING_ACC) {
content = `${modelData.model} (grounding acc):\n`
}
let focus_score = null
let avg_content = `${modelData.model}(%):\n`
let minYValue = 100;
let maxYValue = 0;
taskSubtaskMapping[currentTask].forEach(subtask => {
if (modelData.tasks && modelData.tasks[subtask] && cur_sortby_option === sortby_options.BY_SUCCESS_RATE) {
focus_score = parseFloat(modelData.tasks[subtask].accuracy);
} else if (modelData.tasks && modelData.tasks[subtask] && cur_sortby_option === sortby_options.BY_REWARD_SCORE) {
focus_score = parseFloat(modelData.tasks[subtask].score);
} else if (modelData.tasks && modelData.tasks[subtask] && cur_sortby_option === sortby_options.BY_GROUNDING_ACC) {
focus_score = parseFloat(modelData.tasks[subtask].grounding);
}
minYValue = Math.min(minYValue, focus_score);
maxYValue = Math.max(maxYValue, focus_score);
if (maxYValue === 0) {
maxYValue = 0.6
}
if (currentTask === 'Avg') {
subtask = SubtaskNameMapping[subtask]
content = avg_content + `${subtask}: ${focus_score.toFixed(1)}\n`;
avg_content += `${subtask}: ${focus_score.toFixed(1)}\n`
} else {
content += `${subtask}: ${focus_score.toFixed(1)}%\n`;
}
});
return {
type: 'label',
content: content,
xValue: (taskSubtaskMapping[currentTask].length - 1) / 2,
yValue: (maxYValue + minYValue) / 2,
backgroundColor: 'rgba(255,255,255,0.8)',
font: {
size: 9,
weight: 'bold',
color: 'black',
family: "'Noto Sans', sans-serif"
},
xPadding: 6,
yPadding: 6,
borderColor: 'black',
borderWidth: 1,
borderRadius: 6,
position: 'center',
adjustScaleRange: true
};
}).filter(annotation => annotation !== null);
}
function updateLineGraphScale(labels, selectedIndex) {
// const modelName = [index];
const dataset = lineGraph.data.datasets[selectedIndex];
const maxValue = Math.max(...dataset.data);
const minValue = Math.min(...dataset.data.filter(v => v !== null));
const range = maxValue - minValue;
let buffer = null;
if (range > 2) {
buffer = 0.5
} else {
buffer = 1.5;
}
lineGraph.options.scales.y.min = Math.max(0, minValue - buffer);
lineGraph.options.scales.y.max = maxValue + buffer;
lineGraph.update();
}
function resetLineGraphScale() {
lineGraph.options.scales.y.min = 0;
lineGraph.options.scales.y.max = 100;
lineGraph.update();
}
Object.values(sortby_options).forEach(sortby_option => {
const btn = document.getElementById(sortby_option);
btn.addEventListener('click', () => {
document.querySelectorAll('.btn-group.metric-filter-selector .btn.active').forEach(active => {
active.classList.remove('active');
});
btn.classList.add('active');
cur_sortby_option = sortby_option;
createMainResultChart();
});
});
document.addEventListener('DOMContentLoaded', function () {
fetch('/agentboard/data/To_Release/main_data_new.json').then(response => response.json()).then((loadedData) => {
rawData = loadedData;
generateModelColorsAndStyles(rawData.map(data => data.model));
createMainResultChart();
});
});