evalita_llm_leaderboard

Running

App Files Files Community

rzanoli commited on Aug 29

Commit

b0db80c

1 Parent(s): 9cfb29f

Small changes

Browse files

Files changed (1) hide show

app.py +19 -99

app.py CHANGED Viewed

@@ -189,13 +189,13 @@ def boxplot_per_task(dataframe=None, baselines=None):
     fig.add_annotation(
         text=(
-            " In tasks like TE and SA, zero/few-shot models reach accuracy close to supervised <br>  "
-            "methods at EVALITA (dashed line); in NER and REL they remain much lower. "
         ),
         xref="paper", yref="paper",
         x=0.5, y=-0.30,
         showarrow=False,
-        font=dict(size=12, color="gray"),
         align="left"
     )
@@ -279,112 +279,27 @@ def boxplot_prompts_per_task(dataframe, tasks=None):
         barmode='group',
         template="plotly_white",
         font=dict(family="Arial", size=10),
-        yaxis=dict(range=[0, 100], fixedrange=True),
     )
-    # Aggiungi la caption come annotazione separata
     fig.add_annotation(
         text="There is no single prompt that performs best across all tasks.<br>"
              "Different prompts achieve the highest accuracy on different tasks.",
         xref="paper", yref="paper",
-        x=0, y=-0.3,
-        showarrow=False,
-        font=dict(size=11, color="gray"),
-        align="left"
-    )
-    return fig
-def line_chart2(dataframe):
-    # Normalizziamo le dimensioni per avere marker non troppo piccoli né enormi
-    def scale_sizes(values, min_size=8, max_size=30):
-        vmin, vmax = min(values), max(values)
-        return [
-            min_size + (val - vmin) / (vmax - vmin) * (max_size - min_size) if vmax > vmin else (min_size + max_size) / 2
-            for val in values
-        ]
-    # Separiamo i dati in base a IS_FS
-    df_true = dataframe[dataframe['IS_FS'] == True]
-    df_false = dataframe[dataframe['IS_FS'] == False]
-    # Estrai valori x, y e labels
-    x_true = df_true['#Params (B)'].tolist()
-    y_true = df_true['Avg. Comb. Perf. ⬆️'].tolist()
-    labels_true = [re.search(r'>([^<]+)<', m).group(1) for m in df_true['Model'].tolist()]
-    x_false = df_false['#Params (B)'].tolist()
-    y_false = df_false['Avg. Comb. Perf. ⬆️'].tolist()
-    labels_false = [re.search(r'>([^<]+)<', m).group(1) for m in df_false['Model'].tolist()]
-    fig = go.Figure()
-    # Punti IS_FS=True
-    fig.add_trace(go.Scatter(
-        x=x_true,
-        y=y_true,
-        mode='markers',
-        name='5-Shot',
-        marker=dict(
-            color='blue',
-            size=scale_sizes(x_true)  # marker più grandi se #Params è grande
-        ),
-        hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
-        customdata=labels_true
-    ))
-    # Punti IS_FS=False
-    fig.add_trace(go.Scatter(
-        x=x_false,
-        y=y_false,
-        mode='markers',
-        name='0-Shot',
-        marker=dict(
-            color='red',
-            size=scale_sizes(x_false)
-        ),
-        hovertemplate='<b>%{customdata}</b><br>#Params: %{x}<br>Performance: %{y}<extra></extra>',
-        customdata=labels_false
-    ))
-    fig.update_layout(
-        title="Avg. Combined Performance vs #Params",
-        xaxis_title="#Params (B)",
-        yaxis_title="Avg. Combined Performance",
-        template="plotly_white",
-        hovermode="closest",
-        font=dict(family="Arial", size=10),
-        dragmode=False,
-        xaxis=dict(
-            tickvals=[0, 25, 50, 75, 100, 125],  # valori che vuoi mostrare
-            ticktext=["0", "25", "50", "75", "100"]
-        )
-    )
-    # Caption
-    fig.add_annotation(
-        text="Accuracy generally rises with #Params, but smaller models with 5-shot <br> "
-             "can outperform larger zero-shot models.",
-        xref="paper", yref="paper",
-        x=0, y=-0.3,
         showarrow=False,
         font=dict(size=11, color="gray"),
-        align="left"
     )
-    fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
-    fig.update_yaxes(fixedrange=True)
     return fig
 def line_chart(dataframe):
-    import re
-    import plotly.graph_objects as go
-    # Normalizziamo le dimensioni per avere marker non troppo piccoli né enormi
     def scale_sizes(values, min_size=8, max_size=30):
         vmin, vmax = min(values), max(values)
         return [
@@ -392,7 +307,7 @@ def line_chart(dataframe):
             for val in values
         ]
-    # Separiamo i dati in base a IS_FS
     df_true = dataframe[dataframe['IS_FS'] == True]
     df_false = dataframe[dataframe['IS_FS'] == False]
@@ -473,18 +388,23 @@ def line_chart(dataframe):
         xaxis=dict(
             tickvals=[0, 25, 50, 75, 100, 125],
             ticktext=["0", "25", "50", "75", "100"]
         )
     )
     # Caption
     fig.add_annotation(
-        text="Accuracy generally rises with #Params, but smaller models with 5-shot <br>"
-             "can outperform larger zero-shot models.",
         xref="paper", yref="paper",
-        x=0, y=-0.3,
         showarrow=False,
         font=dict(size=11, color="gray"),
-        align="left"
     )
     fig.update_xaxes(fixedrange=True, rangeslider_visible=False)

     fig.add_annotation(
         text=(
+            "In tasks like TE and SA, models approach the accuracy of supervised <br>"
+            "models at EVALITA (dashed line); in NER and REL they remain lower."
         ),
         xref="paper", yref="paper",
         x=0.5, y=-0.30,
         showarrow=False,
+        font=dict(size=11, color="gray"),
         align="left"
     )
         barmode='group',
         template="plotly_white",
         font=dict(family="Arial", size=10),
+        yaxis=dict(range=[0, 100], fixedrange=True)
     )
+    # caption come annotazione separata
     fig.add_annotation(
         text="There is no single prompt that performs best across all tasks.<br>"
              "Different prompts achieve the highest accuracy on different tasks.",
         xref="paper", yref="paper",
+        x=0.5, y=-0.3,
         showarrow=False,
         font=dict(size=11, color="gray"),
+        align="center",
+        xanchor="center"
     )
     return fig
 def line_chart(dataframe):
+    # Normalizza le dimensioni per avere marker non troppo piccoli né enormi
     def scale_sizes(values, min_size=8, max_size=30):
         vmin, vmax = min(values), max(values)
         return [
             for val in values
         ]
+    # dati in base a IS_FS
     df_true = dataframe[dataframe['IS_FS'] == True]
     df_false = dataframe[dataframe['IS_FS'] == False]
         xaxis=dict(
             tickvals=[0, 25, 50, 75, 100, 125],
             ticktext=["0", "25", "50", "75", "100"]
+        ),
+        yaxis=dict(
+            tickvals=[0, 20, 40, 60, 80, 100],  # 👈 tick fissi
+            range=[0, 100]  # 👈 range bloccato
         )
     )
     # Caption
     fig.add_annotation(
+        text="Accuracy generally rises with #Params, but smaller models <br>"
+             "with 5-shot can outperform larger zero-shot models.",
         xref="paper", yref="paper",
+        x=0.5, y=-0.3,  # 👈 centrata
         showarrow=False,
         font=dict(size=11, color="gray"),
+        align="center",
+        xanchor="center"  # 👈 ancora centrata rispetto al testo
     )
     fig.update_xaxes(fixedrange=True, rangeslider_visible=False)