Spaces:
Running
Running
| # some code blocks are taken from https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard/tree/main | |
| import os | |
| import gradio as gr | |
| from huggingface_hub import HfApi | |
| from src.css_html import custom_css | |
| from src.text_content import ABOUT_TEXT, SUBMISSION_TEXT_3 | |
| from src.utils import ( | |
| plot_leaderboard_scores, | |
| create_result_dataframes, create_result_dataframes_lite | |
| ) | |
| TOKEN = os.environ.get("HF_TOKEN", None) | |
| api = HfApi(TOKEN) | |
| def filter_items(df, leaderboard_table, query): | |
| if query == "all": | |
| return df[leaderboard_table.columns] | |
| else: | |
| query = query[0] | |
| filtered_df = df[df["T"].str.contains(query, na=False)] | |
| return filtered_df[leaderboard_table.columns] | |
| def search_table(df, leaderboard_table, query): | |
| filtered_df = df[(df["Model"].str.contains(query, case=False))] | |
| return filtered_df[leaderboard_table.columns] | |
| demo = gr.Blocks(css=custom_css) | |
| with demo: | |
| with gr.Row(): | |
| gr.Markdown( | |
| """ | |
| <div style="text-align: center;"> | |
| <h1>π AfroBench <span style='color: #e6b800;'>Leaderboard</span></h1> | |
| </div> | |
| <p style="text-align: center; font-size: 16px;"> | |
| This leaderboard tracks the performance of multilingual models across <b>64 African languages</b>, <b>15 NLP tasks</b> and <b>22 datasets</b>, | |
| covering a range of tasks from POS tagging to question answering, summarization, and machine translation. | |
| </p> | |
| <p style="font-size: 14px; text-align: center;"> | |
| It's based on the <a href="https://mcgill-nlp.github.io/AfroBench/index.html" target="_blank">AfroBench benchmark</a> and is designed | |
| to highlight both full-scale evaluations and cost-efficient subsets (AfroBench-Lite).<br><br> | |
| We aim to support better transparency and tooling for evaluating models in African languages. | |
| </p> | |
| """, | |
| elem_classes="markdown-text", | |
| ) | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.Column(): | |
| with gr.Tabs(elem_classes="A100-tabs") as A100_tabs: | |
| with gr.TabItem("π Evaluation table", id=0): | |
| with gr.Column(): | |
| view_source = gr.Radio( | |
| label="π Select Leaderboard Source", | |
| choices=["afrobench", "afrobench_lite"], | |
| value="afrobench", | |
| interactive=True, | |
| ) | |
| view_selector = gr.Dropdown( | |
| label="π Select View", | |
| choices=["category", "task", "dataset"], | |
| value="category", | |
| interactive=True, | |
| ) | |
| with gr.Accordion("β‘οΈ See All Columns", open=False): | |
| shown_columns = gr.CheckboxGroup( | |
| choices=[], | |
| value=[], | |
| label="Select columns to display", | |
| elem_id="column-select", | |
| interactive=True, | |
| ) | |
| leaderboard_df = gr.Dataframe( | |
| label="Leaderboard", | |
| interactive=False, | |
| elem_id="leaderboard-table", | |
| wrap=True, | |
| ) | |
| view_options_map = { | |
| "afrobench": ["category", "task", "dataset"], | |
| "afrobench_lite": ["task", "dataset", "language"], | |
| } | |
| init_trigger = gr.Button(visible=False) | |
| def update_view_selector(source): | |
| options = view_options_map[source] | |
| default = options[0] | |
| return gr.update(choices=options, value=default), default | |
| def refresh_table_and_columns(view_type, source): | |
| path = "data/leaderboard_json/afrobench_lite.json" if source == "afrobench_lite" else "data/leaderboard_json/afrobench.json" | |
| if source == "afrobench_lite": | |
| df = create_result_dataframes_lite(path, level=view_type) | |
| else: | |
| df = create_result_dataframes(path, level=view_type) | |
| df.reset_index(inplace=True) | |
| df.rename(columns={"index": "Model"}, inplace=True) | |
| metric_cols = [col for col in df.columns if col != "Model"] | |
| df["Score"] = df[metric_cols].mean(axis=1).round(1) | |
| all_cols = ["Model", "Score"] + sorted( | |
| [col for col in df.columns if col not in ["Model", "Score"]]) | |
| df = df[all_cols] | |
| shown_choices = sorted([col for col in df.columns if col not in ["Model", "Score"]]) | |
| return df, gr.update(choices=shown_choices, value=shown_choices), shown_choices | |
| def refresh_table_only(view_type, selected_cols, source): | |
| path = "data/leaderboard_json/afrobench_lite.json" if source == "afrobench_lite" else "data/leaderboard_json/afrobench.json" | |
| if source == "afrobench_lite": | |
| df = create_result_dataframes_lite(path, level=view_type) | |
| else: | |
| df = create_result_dataframes(path, level=view_type) | |
| df.reset_index(inplace=True) | |
| df.rename(columns={"index": "Model"}, inplace=True) | |
| metric_cols = [col for col in df.columns if col != "Model"] | |
| df["Score"] = df[metric_cols].mean(axis=1).round(1) | |
| return df[["Model", "Score"] + [c for c in selected_cols if c in df.columns]] | |
| # Trigger once on launch | |
| def initialize(_): | |
| return refresh_table_and_columns("category", "afrobench") | |
| init_trigger.click( | |
| fn=initialize, | |
| inputs=[init_trigger], | |
| outputs=[leaderboard_df, shown_columns, shown_columns], | |
| ) | |
| view_source.change( | |
| fn=update_view_selector, | |
| inputs=[view_source], | |
| outputs=[view_selector, view_selector], | |
| ).then( | |
| fn=refresh_table_and_columns, | |
| inputs=[view_selector, view_source], | |
| outputs=[leaderboard_df, shown_columns, shown_columns], | |
| ) | |
| view_selector.change( | |
| fn=refresh_table_and_columns, | |
| inputs=[view_selector, view_source], | |
| outputs=[leaderboard_df, shown_columns, shown_columns], | |
| ) | |
| shown_columns.change( | |
| fn=refresh_table_only, | |
| inputs=[view_selector, shown_columns, view_source], | |
| outputs=leaderboard_df, | |
| ) | |
| demo.load( | |
| fn=initialize, | |
| inputs=[init_trigger], | |
| outputs=[leaderboard_df, shown_columns, shown_columns], | |
| ) | |
| gr.Markdown( | |
| """ | |
| **Notes:** | |
| - Score is the average across all the columns you're seeing in the leaderboard, based on the view and filters youβve selected. | |
| - For more details check the π About section. | |
| """, | |
| elem_classes="markdown-text", | |
| ) | |
| with gr.TabItem("π Performance Plot", id=1): | |
| with gr.Row(): | |
| model_score_plot = gr.Plot(label="Model Score Comparison") | |
| # Update plot when view_source, view_selector, or shown_columns change | |
| view_source.change( | |
| fn=plot_leaderboard_scores, | |
| inputs=[view_selector, shown_columns, view_source], | |
| outputs=model_score_plot, | |
| ) | |
| view_selector.change( | |
| fn=plot_leaderboard_scores, | |
| inputs=[view_selector, shown_columns, view_source], | |
| outputs=model_score_plot, | |
| ) | |
| shown_columns.change( | |
| fn=plot_leaderboard_scores, | |
| inputs=[view_selector, shown_columns, view_source], | |
| outputs=model_score_plot, | |
| ) | |
| demo.load( | |
| fn=plot_leaderboard_scores, | |
| inputs=[view_selector, shown_columns, view_source], | |
| outputs=model_score_plot, | |
| ) | |
| with gr.TabItem("π About", id=2): | |
| gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text") | |
| with gr.TabItem("Submit results π", id=3): | |
| gr.Markdown(SUBMISSION_TEXT_3) | |
| # demo.launch() | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |