Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from src.control_panel import create_control_panel, create_control_callback | |
| from src.latency_score_memory import create_lat_score_mem_plot | |
| from src.leaderboard import create_leaderboard_table | |
| from src.bettertransformer import create_bt_plots | |
| from src.flashattentionv2 import create_fa2_plots | |
| from src.custom_kernels import create_custom_kernels_plots | |
| from src.llm_perf import get_llm_perf_df | |
| from src.assets import custom_css | |
| from src.content import ( | |
| LOGO, | |
| TITLE, | |
| ABOUT, | |
| INTRODUCTION, | |
| EXAMPLE_CONFIG, | |
| CITATION_BUTTON, | |
| CITATION_BUTTON_LABEL, | |
| ) | |
| MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB π₯οΈ"} | |
| HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
| demo = gr.Blocks(css=custom_css) | |
| with demo: | |
| gr.HTML(LOGO, elem_classes="logo") | |
| gr.HTML(TITLE, elem_classes="title") | |
| gr.Markdown(INTRODUCTION, elem_classes="descriptive-text") | |
| ####################### HARDWARE TABS ####################### | |
| with gr.Tabs(elem_classes="tabs"): | |
| for id, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()): | |
| with gr.TabItem(hardware, id=id): | |
| ####################### CONTROL PANEL ####################### | |
| ( | |
| filter_button, | |
| machine_textbox, | |
| search_bar, | |
| score_slider, | |
| memory_slider, | |
| backend_checkboxes, | |
| datatype_checkboxes, | |
| optimization_checkboxes, | |
| quantization_checkboxes, | |
| ) = create_control_panel() | |
| ####################### HARDWARE SUBTABS ####################### | |
| with gr.Tabs(elem_classes="subtabs"): | |
| llm_perf_df = get_llm_perf_df(machine=machine) | |
| ####################### LEADERBOARD TAB ####################### | |
| with gr.TabItem("Leaderboard π ", id=0): | |
| leaderboard_table = create_leaderboard_table(llm_perf_df) | |
| ####################### LAT. vs. SCORE vs. MEM. TAB ####################### | |
| with gr.TabItem("Latency vs. Score vs. Memory π", id=1): | |
| lat_score_mem_plot = create_lat_score_mem_plot(llm_perf_df) | |
| ####################### BETTERTRANSFORMER SPEEDUP TAB ####################### | |
| with gr.TabItem("BetterTransformer Speedup π", id=2): | |
| bt_prefill_plot, bt_decode_plot = create_bt_plots(llm_perf_df) | |
| with gr.TabItem("FlashAttentionV2 Speedup π", id=3): | |
| fa2_prefill_plot, fa2_decode_plot = create_fa2_plots(llm_perf_df) | |
| with gr.TabItem("Custom Quantization Kernels Comparison π", id=4): | |
| custom_kernels_prefill_plot, custom_kernels_decode_plot = create_custom_kernels_plots( | |
| llm_perf_df | |
| ) | |
| ####################### CONTROL CALLBACK ####################### | |
| create_control_callback( | |
| filter_button, | |
| # inputs | |
| machine_textbox, | |
| search_bar, | |
| score_slider, | |
| memory_slider, | |
| backend_checkboxes, | |
| datatype_checkboxes, | |
| optimization_checkboxes, | |
| quantization_checkboxes, | |
| # outputs | |
| leaderboard_table, | |
| lat_score_mem_plot, | |
| bt_prefill_plot, | |
| bt_decode_plot, | |
| fa2_prefill_plot, | |
| fa2_decode_plot, | |
| custom_kernels_prefill_plot, | |
| custom_kernels_decode_plot, | |
| ) | |
| ####################### ABOUT TAB ####################### | |
| with gr.TabItem("About π", id=3): | |
| gr.HTML(ABOUT, elem_classes="descriptive-text") | |
| gr.Markdown(EXAMPLE_CONFIG, elem_classes="descriptive-text") | |
| ####################### CITATION | |
| with gr.Row(): | |
| with gr.Accordion("π Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| if __name__ == "__main__": | |
| # Launch demo | |
| demo.queue().launch() | |