Add charts
Browse files
app.py
CHANGED
|
@@ -11,6 +11,71 @@ from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REP
|
|
| 11 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 12 |
from src.submission.submit import add_new_eval
|
| 13 |
import random
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Define task metadata (icons, names, descriptions)
|
| 16 |
TASK_METADATA_MULTIPLECHOICE = {
|
|
@@ -79,6 +144,9 @@ def update_task_leaderboard(dataframe, default_selection=None, hidden_columns=No
|
|
| 79 |
|
| 80 |
sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
|
| 81 |
|
|
|
|
|
|
|
|
|
|
| 82 |
#print(sorted_dataframe['Combined Performance'])
|
| 83 |
|
| 84 |
field_list = fields(AutoEvalColumn)
|
|
@@ -178,6 +246,11 @@ with demo:
|
|
| 178 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
| 179 |
)
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
# About tab
|
| 182 |
with gr.TabItem("📝 About"):
|
| 183 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
|
| 11 |
from src.populate import get_evaluation_queue_df, get_leaderboard_df
|
| 12 |
from src.submission.submit import add_new_eval
|
| 13 |
import random
|
| 14 |
+
import matplotlib.pyplot as plt
|
| 15 |
+
import re
|
| 16 |
+
import plotly.express as px
|
| 17 |
+
import plotly.graph_objects as go
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def line_chart(dataframe):
|
| 21 |
+
# Separiamo i dati in base a IS_FS
|
| 22 |
+
df_true = dataframe[dataframe['IS_FS'] == True]
|
| 23 |
+
df_false = dataframe[dataframe['IS_FS'] == False]
|
| 24 |
+
|
| 25 |
+
# Estrai valori x, y e labels per True e False
|
| 26 |
+
x_true = df_true['#Params (B)'].tolist()
|
| 27 |
+
y_true = df_true['Avg. Combined Performance ⬆️'].tolist()
|
| 28 |
+
labels_true = [
|
| 29 |
+
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
| 30 |
+
for m in df_true['Model'].tolist()
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
x_false = df_false['#Params (B)'].tolist()
|
| 34 |
+
y_false = df_false['Avg. Combined Performance ⬆️'].tolist()
|
| 35 |
+
labels_false = [
|
| 36 |
+
re.search(r'>([^<>/]+/[^<>]+)<', m).group(1).split('/')[-1]
|
| 37 |
+
for m in df_false['Model'].tolist()
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
fig = go.Figure()
|
| 41 |
+
|
| 42 |
+
# Punti IS_FS=True
|
| 43 |
+
fig.add_trace(go.Scatter(
|
| 44 |
+
x=x_true,
|
| 45 |
+
y=y_true,
|
| 46 |
+
mode='markers', # solo marker, niente testo
|
| 47 |
+
name='5-Few-Shot',
|
| 48 |
+
marker=dict(color='red', size=10),
|
| 49 |
+
hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
|
| 50 |
+
customdata=labels_true # tutte le informazioni sul hover
|
| 51 |
+
))
|
| 52 |
+
|
| 53 |
+
# Punti IS_FS=False
|
| 54 |
+
fig.add_trace(go.Scatter(
|
| 55 |
+
x=x_false,
|
| 56 |
+
y=y_false,
|
| 57 |
+
mode='markers',
|
| 58 |
+
name='0-Shot',
|
| 59 |
+
marker=dict(color='blue', size=10),
|
| 60 |
+
hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
|
| 61 |
+
customdata=labels_false
|
| 62 |
+
))
|
| 63 |
+
|
| 64 |
+
fig.update_layout(
|
| 65 |
+
title="Avg. Combined Performance vs #Params",
|
| 66 |
+
xaxis_title="#Params (B)",
|
| 67 |
+
yaxis_title="Avg. Combined Performance ⬆️",
|
| 68 |
+
template="plotly_white",
|
| 69 |
+
hovermode="closest",
|
| 70 |
+
dragmode=False
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
# Disabilita lo zoom e altri controlli
|
| 74 |
+
fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
|
| 75 |
+
fig.update_yaxes(fixedrange=True)
|
| 76 |
+
|
| 77 |
+
return fig
|
| 78 |
+
|
| 79 |
|
| 80 |
# Define task metadata (icons, names, descriptions)
|
| 81 |
TASK_METADATA_MULTIPLECHOICE = {
|
|
|
|
| 144 |
|
| 145 |
sorted_dataframe = dataframe.sort_values(by="Combined Performance", ascending=False)
|
| 146 |
|
| 147 |
+
pd.set_option('display.max_colwidth', None)
|
| 148 |
+
#print("========================", dataframe['Model'])
|
| 149 |
+
|
| 150 |
#print(sorted_dataframe['Combined Performance'])
|
| 151 |
|
| 152 |
field_list = fields(AutoEvalColumn)
|
|
|
|
| 246 |
hidden_columns=[col for col in LEADERBOARD_DF.columns if col not in ['FS', 'Model', "Avg. Combined Performance ⬆️", "TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]]
|
| 247 |
)
|
| 248 |
|
| 249 |
+
with gr.TabItem("Charts"):
|
| 250 |
+
#gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
|
| 251 |
+
#gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
|
| 252 |
+
gr.Plot(value=line_chart(LEADERBOARD_DF))
|
| 253 |
+
|
| 254 |
# About tab
|
| 255 |
with gr.TabItem("📝 About"):
|
| 256 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|