Spaces:
Running
Running
Ludwig Stumpp
commited on
Commit
·
9770a07
1
Parent(s):
f3fd684
Shown values in categorical filter now sorted
Browse files- README.md +3 -3
- streamlit_app.py +5 -3
README.md
CHANGED
|
@@ -24,8 +24,8 @@ https://llm-leaderboard.streamlit.app/
|
|
| 24 |
| [dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b) | Databricks | yes | [944](https://lmsys.org/blog/2023-05-03-arena/) | | | | | | | | | | | | |
|
| 25 |
| [eleuther-pythia-7b](https://huggingface.co/EleutherAI/pythia-6.9b) | EleutherAI | yes | | | [0.667](https://www.mosaicml.com/blog/mpt-7b) | | | [0.667](https://www.mosaicml.com/blog/mpt-7b) | | [0.265](https://www.mosaicml.com/blog/mpt-7b) | | [0.198](https://www.mosaicml.com/blog/mpt-7b) | | | |
|
| 26 |
| [eleuther-pythia-12b](https://huggingface.co/EleutherAI/pythia-12b) | EleutherAI | yes | | | [0.704](https://www.mosaicml.com/blog/mpt-7b) | | | [0.704](https://www.mosaicml.com/blog/mpt-7b) | | [0.253](https://www.mosaicml.com/blog/mpt-7b) | | [0.233](https://www.mosaicml.com/blog/mpt-7b) | | | |
|
| 27 |
-
| [fastchat-t5-3b](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0) |
|
| 28 |
-
| [gal-120b](https://arxiv.org/abs/2211.09085v1) |
|
| 29 |
| [gpt-3-7b / curie](https://arxiv.org/abs/2005.14165) | OpenAI | yes | | [0.682](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | | | | | [0.243](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | | |
|
| 30 |
| [gpt-3-175b / davinci](https://arxiv.org/abs/2005.14165) | OpenAI | yes | | [0.793](https://arxiv.org/abs/2005.14165) | [0.789](https://arxiv.org/abs/2005.14165) | | | | | | [0.439](https://arxiv.org/abs/2005.14165) | | | | |
|
| 31 |
| [gpt-3.5-175b / text-davinci-003](https://arxiv.org/abs/2303.08774v3) | OpenAI | yes | | [0.822](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | [0.481](https://arxiv.org/abs/2303.08774v3) | [0.762](https://arxiv.org/abs/2303.08774v3) | | | [0.569](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | | |
|
|
@@ -57,7 +57,7 @@ https://llm-leaderboard.streamlit.app/
|
|
| 57 |
| [starcoder-base-16b](https://huggingface.co/bigcode/starcoderbase) | BigCode | yes | | | | | [0.304](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) | | | | | | | | |
|
| 58 |
| [starcoder-16b](https://huggingface.co/bigcode/starcoder) | BigCode | yes | | | | | [0.336](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) | | | | | | | | |
|
| 59 |
| [starcoder-16b (prompted)](https://huggingface.co/bigcode/starcoder) | BigCode | yes | | | | | [0.408](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) | | | | | | | | |
|
| 60 |
-
| [vicuna-13b](https://huggingface.co/lmsys/vicuna-13b-delta-v0) |
|
| 61 |
|
| 62 |
## Benchmarks
|
| 63 |
|
|
|
|
| 24 |
| [dolly-v2-12b](https://huggingface.co/databricks/dolly-v2-12b) | Databricks | yes | [944](https://lmsys.org/blog/2023-05-03-arena/) | | | | | | | | | | | | |
|
| 25 |
| [eleuther-pythia-7b](https://huggingface.co/EleutherAI/pythia-6.9b) | EleutherAI | yes | | | [0.667](https://www.mosaicml.com/blog/mpt-7b) | | | [0.667](https://www.mosaicml.com/blog/mpt-7b) | | [0.265](https://www.mosaicml.com/blog/mpt-7b) | | [0.198](https://www.mosaicml.com/blog/mpt-7b) | | | |
|
| 26 |
| [eleuther-pythia-12b](https://huggingface.co/EleutherAI/pythia-12b) | EleutherAI | yes | | | [0.704](https://www.mosaicml.com/blog/mpt-7b) | | | [0.704](https://www.mosaicml.com/blog/mpt-7b) | | [0.253](https://www.mosaicml.com/blog/mpt-7b) | | [0.233](https://www.mosaicml.com/blog/mpt-7b) | | | |
|
| 27 |
+
| [fastchat-t5-3b](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0) | Lmsys.org | yes | [951](https://lmsys.org/blog/2023-05-03-arena/) | | | | | | | | | | | | |
|
| 28 |
+
| [gal-120b](https://arxiv.org/abs/2211.09085v1) | Lmsys.org | no | | | | | | | | [0.526](https://paperswithcode.com/paper/galactica-a-large-language-model-for-science-1) | | | | | |
|
| 29 |
| [gpt-3-7b / curie](https://arxiv.org/abs/2005.14165) | OpenAI | yes | | [0.682](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | | | | | [0.243](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | | |
|
| 30 |
| [gpt-3-175b / davinci](https://arxiv.org/abs/2005.14165) | OpenAI | yes | | [0.793](https://arxiv.org/abs/2005.14165) | [0.789](https://arxiv.org/abs/2005.14165) | | | | | | [0.439](https://arxiv.org/abs/2005.14165) | | | | |
|
| 31 |
| [gpt-3.5-175b / text-davinci-003](https://arxiv.org/abs/2303.08774v3) | OpenAI | yes | | [0.822](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | [0.481](https://arxiv.org/abs/2303.08774v3) | [0.762](https://arxiv.org/abs/2303.08774v3) | | | [0.569](https://crfm.stanford.edu/helm/latest/?group=core_scenarios) | | | | |
|
|
|
|
| 57 |
| [starcoder-base-16b](https://huggingface.co/bigcode/starcoderbase) | BigCode | yes | | | | | [0.304](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) | | | | | | | | |
|
| 58 |
| [starcoder-16b](https://huggingface.co/bigcode/starcoder) | BigCode | yes | | | | | [0.336](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) | | | | | | | | |
|
| 59 |
| [starcoder-16b (prompted)](https://huggingface.co/bigcode/starcoder) | BigCode | yes | | | | | [0.408](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) | | | | | | | | |
|
| 60 |
+
| [vicuna-13b](https://huggingface.co/lmsys/vicuna-13b-delta-v0) | Lmsys.org | no | [1169](https://lmsys.org/blog/2023-05-03-arena/) | | | | | | | | | | | | |
|
| 61 |
|
| 62 |
## Benchmarks
|
| 63 |
|
streamlit_app.py
CHANGED
|
@@ -110,11 +110,13 @@ def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[s
|
|
| 110 |
modification_container = st.container()
|
| 111 |
|
| 112 |
with modification_container:
|
| 113 |
-
to_filter_index = st.multiselect("Filter by model:", df.index)
|
| 114 |
if to_filter_index:
|
| 115 |
df = pd.DataFrame(df.loc[to_filter_index])
|
| 116 |
|
| 117 |
-
to_filter_columns = st.multiselect(
|
|
|
|
|
|
|
| 118 |
if to_filter_columns:
|
| 119 |
df = pd.DataFrame(df[ignore_columns + to_filter_columns])
|
| 120 |
|
|
@@ -177,7 +179,7 @@ def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
|
|
| 177 |
else:
|
| 178 |
selected_values = right.multiselect(
|
| 179 |
f"Values for {column}:",
|
| 180 |
-
df[column].unique(),
|
| 181 |
)
|
| 182 |
|
| 183 |
if selected_values:
|
|
|
|
| 110 |
modification_container = st.container()
|
| 111 |
|
| 112 |
with modification_container:
|
| 113 |
+
to_filter_index = st.multiselect("Filter by model:", sorted(df.index))
|
| 114 |
if to_filter_index:
|
| 115 |
df = pd.DataFrame(df.loc[to_filter_index])
|
| 116 |
|
| 117 |
+
to_filter_columns = st.multiselect(
|
| 118 |
+
"Filter by benchmark:", sorted([c for c in df.columns if c not in ignore_columns])
|
| 119 |
+
)
|
| 120 |
if to_filter_columns:
|
| 121 |
df = pd.DataFrame(df[ignore_columns + to_filter_columns])
|
| 122 |
|
|
|
|
| 179 |
else:
|
| 180 |
selected_values = right.multiselect(
|
| 181 |
f"Values for {column}:",
|
| 182 |
+
sorted(df[column].unique()),
|
| 183 |
)
|
| 184 |
|
| 185 |
if selected_values:
|