File size: 3,657 Bytes
12fd3c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import re
import gradio as gr
import pandas as pd

GPU_TFLOPS_NONSPARSE = dict(
    rtx_3070=40.6,
    rtx_3070_ti=43.5,
    rtx_3080_mobile=30,
    rtx_3080=59.5,
    rtx_3090=71,
    rtx_4070=58.3,
    rtx_4070_ti=80.2,
    rtx_4080=97.5,
    rtx_4090=165.2,
    rtx_5070=61.7,
    rtx_5070_ti=87.9,
    rtx_5080=112.6,
    rtx_5090=209.5,

    rtx_a6000=154.8,
    rtx_6000_ada=364,
    rtx_6000_blackwell_max_q=438.9,
    rtx_6000_blackwell=503.8,

    a100=312,
    h100_sxm5=1000,
    h100_pcie=800,
)

# === Categorization rules ===
categories = {
    # consumer RTX cards (30xx/40xx/50xx series, including mobile)
    "consumer_rtx": [
        k for k in GPU_TFLOPS_NONSPARSE
        if any(x in k for x in ["3070", "3080", "3090", "4070", "4080", "4090", "5070", "5080", "5090"])
    ],
    # workstation 6000 family (A6000, Ada, Blackwell variants)
    "workstation_6000": [k for k in GPU_TFLOPS_NONSPARSE if "6000" in k],
    # datacenter accelerators
    "datacenter": [k for k in GPU_TFLOPS_NONSPARSE if any(x in k for x in ["a100", "h100"])],
}
# === Formatting function ===
def prettify_name(key: str) -> str:
    """Convert internal GPU key names to human-friendly titles."""
    name = key.replace("_", " ").upper()
    name = re.sub(r"RTX A", "RTX A", name)
    name = name.replace("TI", "Ti")
    # Title case except GPU model identifiers (keep uppercase RTX/A/H)
    name = " ".join(word.capitalize() if not word.startswith(("RTX", "A", "H")) else word for word in name.split())
    name = name.replace("Max Q", "Max-Q")
    name = name.replace("Pcie", "PCIe")
    
    name = name.replace("Smx5", "SXM5")
    return name

def make_df(filtered_dict: dict) -> pd.DataFrame:
    df = pd.DataFrame(
        [(prettify_name(k), v) for k, v in filtered_dict.items()],
        columns=["GPU", "TFLOPS (non-sparse)"]
    )
    return df.sort_values(by="TFLOPS (non-sparse)", ascending=False, ignore_index=True)

def filter_table(hide_consumer: bool, hide_workstation: bool, hide_datacenter: bool) -> pd.DataFrame:
    data = GPU_TFLOPS_NONSPARSE.copy()

    if hide_consumer:
        for key in categories["consumer_rtx"]:
            data.pop(key, None)
    if hide_workstation:
        for key in categories["workstation_6000"]:
            data.pop(key, None)
    if hide_datacenter:
        for key in categories["datacenter"]:
            data.pop(key, None)

    return make_df(data)

DEFAULT_DF = filter_table(False, False, False)

with gr.Blocks() as demo:
    gr.Markdown("# BF16 GPU TFLOPS Viewer\nToggle categories to hide/show entries.\nWhen calculating the 'usable' TFLOPs of a particular card, these are what we call non-sparse TFLOPs. Below contains data gathered for the most commonly used CUDA cards and their BF16 with FP32 accum (what we use in PyTorch) values")

    with gr.Row():
        hide_consumer = gr.Checkbox(label="Hide Consumer RTX", value=False)
        hide_workstation = gr.Checkbox(label="Hide Professional Workstation (6000)", value=False)
        hide_datacenter = gr.Checkbox(label="Hide Datacenter Cards", value=False)

    table = gr.Dataframe(
        value=DEFAULT_DF,
        headers=["GPU", "TFLOPS (non-sparse)"],
        datatype=["str", "number"],
        interactive=False,
        wrap=True,
        max_height=1000
    )

    for cb in (hide_consumer, hide_workstation, hide_datacenter):
        cb.change(
            fn=filter_table,
            inputs=[hide_consumer, hide_workstation, hide_datacenter],
            outputs=table
        )

    demo.load(fn=filter_table, inputs=[hide_consumer, hide_workstation, hide_datacenter], outputs=table)

if __name__ == "__main__":
    demo.launch()