Spaces:
Sleeping
Sleeping
| import re | |
| import gradio as gr | |
| import pandas as pd | |
| GPU_TFLOPS_NONSPARSE = dict( | |
| rtx_3070=40.6, | |
| rtx_3070_ti=43.5, | |
| rtx_3080_mobile=30, | |
| rtx_3080=59.5, | |
| rtx_3090=71, | |
| rtx_4070=58.3, | |
| rtx_4070_ti=80.2, | |
| rtx_4080=97.5, | |
| rtx_4090=165.2, | |
| rtx_5070=61.7, | |
| rtx_5070_ti=87.9, | |
| rtx_5080=112.6, | |
| rtx_5090=209.5, | |
| rtx_a6000=154.8, | |
| rtx_6000_ada=364, | |
| rtx_6000_blackwell_max_q=438.9, | |
| rtx_6000_blackwell=503.8, | |
| a100=312, | |
| h100_sxm5=1000, | |
| h100_pcie=800, | |
| ) | |
| # === Categorization rules === | |
| categories = { | |
| # consumer RTX cards (30xx/40xx/50xx series, including mobile) | |
| "consumer_rtx": [ | |
| k for k in GPU_TFLOPS_NONSPARSE | |
| if any(x in k for x in ["3070", "3080", "3090", "4070", "4080", "4090", "5070", "5080", "5090"]) | |
| ], | |
| # workstation 6000 family (A6000, Ada, Blackwell variants) | |
| "workstation_6000": [k for k in GPU_TFLOPS_NONSPARSE if "6000" in k], | |
| # datacenter accelerators | |
| "datacenter": [k for k in GPU_TFLOPS_NONSPARSE if any(x in k for x in ["a100", "h100"])], | |
| } | |
| # === Formatting function === | |
| def prettify_name(key: str) -> str: | |
| """Convert internal GPU key names to human-friendly titles.""" | |
| name = key.replace("_", " ").upper() | |
| name = re.sub(r"RTX A", "RTX A", name) | |
| name = name.replace("TI", "Ti") | |
| # Title case except GPU model identifiers (keep uppercase RTX/A/H) | |
| name = " ".join(word.capitalize() if not word.startswith(("RTX", "A", "H")) else word for word in name.split()) | |
| name = name.replace("Max Q", "Max-Q") | |
| name = name.replace("Pcie", "PCIe") | |
| name = name.replace("Smx5", "SXM5") | |
| return name | |
| def make_df(filtered_dict: dict) -> pd.DataFrame: | |
| df = pd.DataFrame( | |
| [(prettify_name(k), v) for k, v in filtered_dict.items()], | |
| columns=["GPU", "TFLOPS (non-sparse)"] | |
| ) | |
| return df.sort_values(by="TFLOPS (non-sparse)", ascending=False, ignore_index=True) | |
| def filter_table(hide_consumer: bool, hide_workstation: bool, hide_datacenter: bool) -> pd.DataFrame: | |
| data = GPU_TFLOPS_NONSPARSE.copy() | |
| if hide_consumer: | |
| for key in categories["consumer_rtx"]: | |
| data.pop(key, None) | |
| if hide_workstation: | |
| for key in categories["workstation_6000"]: | |
| data.pop(key, None) | |
| if hide_datacenter: | |
| for key in categories["datacenter"]: | |
| data.pop(key, None) | |
| return make_df(data) | |
| DEFAULT_DF = filter_table(False, False, False) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# BF16 GPU TFLOPS Viewer\nToggle categories to hide/show entries.\nWhen calculating the 'usable' TFLOPs of a particular card, these are what we call non-sparse TFLOPs. Below contains data gathered for the most commonly used CUDA cards and their BF16 with FP32 accum (what we use in PyTorch) values") | |
| with gr.Row(): | |
| hide_consumer = gr.Checkbox(label="Hide Consumer RTX", value=False) | |
| hide_workstation = gr.Checkbox(label="Hide Professional Workstation (6000)", value=False) | |
| hide_datacenter = gr.Checkbox(label="Hide Datacenter Cards", value=False) | |
| table = gr.Dataframe( | |
| value=DEFAULT_DF, | |
| headers=["GPU", "TFLOPS (non-sparse)"], | |
| datatype=["str", "number"], | |
| interactive=False, | |
| wrap=True, | |
| max_height=1000 | |
| ) | |
| for cb in (hide_consumer, hide_workstation, hide_datacenter): | |
| cb.change( | |
| fn=filter_table, | |
| inputs=[hide_consumer, hide_workstation, hide_datacenter], | |
| outputs=table | |
| ) | |
| demo.load(fn=filter_table, inputs=[hide_consumer, hide_workstation, hide_datacenter], outputs=table) | |
| if __name__ == "__main__": | |
| demo.launch() |