Add the chart showing the model accuracy by task
Browse files
app.py
CHANGED
|
@@ -15,6 +15,7 @@ import matplotlib.pyplot as plt
|
|
| 15 |
import re
|
| 16 |
import plotly.express as px
|
| 17 |
import plotly.graph_objects as go
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
def mean_of_max_per_field(df):
|
|
@@ -29,6 +30,8 @@ def mean_of_max_per_field(df):
|
|
| 29 |
"""
|
| 30 |
fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 31 |
|
|
|
|
|
|
|
| 32 |
# Controlla che tutte le colonne esistano nel DataFrame
|
| 33 |
missing = [f for f in fields if f not in df.columns]
|
| 34 |
if missing:
|
|
@@ -43,6 +46,99 @@ def mean_of_max_per_field(df):
|
|
| 43 |
return mean_max
|
| 44 |
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
def line_chart(dataframe):
|
| 47 |
# Separiamo i dati in base a IS_FS
|
| 48 |
df_true = dataframe[dataframe['IS_FS'] == True]
|
|
@@ -99,6 +195,7 @@ def line_chart(dataframe):
|
|
| 99 |
# Disabilita lo zoom e altri controlli
|
| 100 |
fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
|
| 101 |
fig.update_yaxes(fixedrange=True)
|
|
|
|
| 102 |
|
| 103 |
return fig
|
| 104 |
|
|
@@ -405,6 +502,7 @@ with demo:
|
|
| 405 |
#gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
|
| 406 |
#gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
|
| 407 |
gr.Plot(value=line_chart(LEADERBOARD_DF))
|
|
|
|
| 408 |
|
| 409 |
# About tab
|
| 410 |
with gr.TabItem("📝 About"):
|
|
|
|
| 15 |
import re
|
| 16 |
import plotly.express as px
|
| 17 |
import plotly.graph_objects as go
|
| 18 |
+
import numpy as np
|
| 19 |
|
| 20 |
|
| 21 |
def mean_of_max_per_field(df):
|
|
|
|
| 30 |
"""
|
| 31 |
fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 32 |
|
| 33 |
+
#print(df.columns)
|
| 34 |
+
|
| 35 |
# Controlla che tutte le colonne esistano nel DataFrame
|
| 36 |
missing = [f for f in fields if f not in df.columns]
|
| 37 |
if missing:
|
|
|
|
| 46 |
return mean_max
|
| 47 |
|
| 48 |
|
| 49 |
+
def boxplot_per_task(dataframe=None, baselines=None):
|
| 50 |
+
tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
|
| 51 |
+
|
| 52 |
+
if dataframe is None:
|
| 53 |
+
np.random.seed(42)
|
| 54 |
+
dataframe = pd.DataFrame({
|
| 55 |
+
task: np.random.uniform(0.4, 0.9, 20) * 100
|
| 56 |
+
for task in tasks
|
| 57 |
+
})
|
| 58 |
+
|
| 59 |
+
# baseline per ciascun task (se non viene passata, metto random tra 50 e 70)
|
| 60 |
+
if baselines is None:
|
| 61 |
+
baselines = {task: np.random.randint(50, 70) for task in tasks}
|
| 62 |
+
|
| 63 |
+
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
|
| 64 |
+
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
|
| 65 |
+
|
| 66 |
+
fig = go.Figure()
|
| 67 |
+
|
| 68 |
+
for i, task in enumerate(tasks):
|
| 69 |
+
if task in dataframe.columns:
|
| 70 |
+
y_data = dataframe[task].dropna().tolist()
|
| 71 |
+
|
| 72 |
+
# boxplot
|
| 73 |
+
fig.add_trace(go.Box(
|
| 74 |
+
y=y_data,
|
| 75 |
+
name=task,
|
| 76 |
+
boxmean="sd",
|
| 77 |
+
marker=dict(color=colors[i], line=dict(width=1)),
|
| 78 |
+
line=dict(color=colors[i]),
|
| 79 |
+
fillcolor=colors[i],
|
| 80 |
+
opacity=0.7,
|
| 81 |
+
hovertemplate=f"<b>{task}</b><br>Accuracy: "+"%{y:.2f}%"+"<extra></extra>",
|
| 82 |
+
width=0.6
|
| 83 |
+
))
|
| 84 |
+
|
| 85 |
+
# baseline per task (se disponibile)
|
| 86 |
+
if task in baselines and baselines[task] is not None:
|
| 87 |
+
# baseline come linea orizzontale
|
| 88 |
+
fig.add_shape(
|
| 89 |
+
type="line",
|
| 90 |
+
x0=i-0.3, x1=i+0.3, # larghezza in corrispondenza del box
|
| 91 |
+
y0=baselines[task], y1=baselines[task],
|
| 92 |
+
line=dict(color="black", width=2, dash="dash"),
|
| 93 |
+
xref="x", yref="y"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# label con valore baseline
|
| 97 |
+
fig.add_annotation(
|
| 98 |
+
x=i, y=baselines[task],
|
| 99 |
+
text=f"{baselines[task]}%",
|
| 100 |
+
showarrow=False,
|
| 101 |
+
yshift=10,
|
| 102 |
+
font=dict(size=10, color="black")
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
fig.update_layout(
|
| 106 |
+
title="Distribution of Model Accuracy by Task.",
|
| 107 |
+
xaxis_title="Task",
|
| 108 |
+
yaxis_title="Accuracy (%)",
|
| 109 |
+
template="plotly_white",
|
| 110 |
+
boxmode="group",
|
| 111 |
+
dragmode=False,
|
| 112 |
+
font=dict(family="Arial", size=13),
|
| 113 |
+
margin=dict(b=80),
|
| 114 |
+
annotations = [
|
| 115 |
+
dict(
|
| 116 |
+
text=(
|
| 117 |
+
"Boxplots show LLM accuracy in zero/few-shot settings. <br>"
|
| 118 |
+
"Black dashed lines indicate the best-performing supervised models evaluated during EVALITA."
|
| 119 |
+
),
|
| 120 |
+
xref="paper", yref="paper",
|
| 121 |
+
x=0.5, y=-0.33,
|
| 122 |
+
showarrow=False,
|
| 123 |
+
font=dict(size=12, color="gray")
|
| 124 |
+
)
|
| 125 |
+
]
|
| 126 |
+
)
|
| 127 |
+
#fig.update_yaxes(fixedrange=True)
|
| 128 |
+
fig.update_yaxes(range=[0, 100], fixedrange=True)
|
| 129 |
+
|
| 130 |
+
return fig
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# 🔹 Esempio d’uso
|
| 134 |
+
BASELINES = {
|
| 135 |
+
"TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
|
| 136 |
+
"LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
|
| 142 |
def line_chart(dataframe):
|
| 143 |
# Separiamo i dati in base a IS_FS
|
| 144 |
df_true = dataframe[dataframe['IS_FS'] == True]
|
|
|
|
| 195 |
# Disabilita lo zoom e altri controlli
|
| 196 |
fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
|
| 197 |
fig.update_yaxes(fixedrange=True)
|
| 198 |
+
#fig.update_yaxes(range=[0, 100], fixedrange=True)
|
| 199 |
|
| 200 |
return fig
|
| 201 |
|
|
|
|
| 502 |
#gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
|
| 503 |
#gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
|
| 504 |
gr.Plot(value=line_chart(LEADERBOARD_DF))
|
| 505 |
+
gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES), interactive_plot_config={'displayModeBar': False })
|
| 506 |
|
| 507 |
# About tab
|
| 508 |
with gr.TabItem("📝 About"):
|