Spaces:
Runtime error
Runtime error
Commit
·
0d4e8d1
1
Parent(s):
fced179
add
Browse files- results/DeepSeek-R1.json +3 -1
- results/Llama-3.1-70B-Instruct.json +3 -1
- results/Llama-3.1-8B-Instruct.json +3 -1
- results/Llama-3.3-70B-Instruct.json +3 -1
- results/Mistral-7B-Instruct-v0.3.json +3 -1
- results/Mistral-Large-Instruct-2411.json +3 -1
- results/Mistral-Small-Instruct-2409.json +3 -1
- results/QwQ-32B-Preview.json +3 -1
- results/Qwen2.5-32B-Instruct.json +3 -1
- results/Qwen2.5-72B-Instruct.json +3 -1
- results/Qwen2.5-7B-Instruct.json +3 -1
- src/display/formatting.py +2 -3
- src/leaderboard/read_evals.py +12 -7
results/DeepSeek-R1.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "deepseek-ai/DeepSeek-R1"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "deepseek-ai/DeepSeek-R1",
|
| 4 |
+
"link": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
| 5 |
+
"Params": "671B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Llama-3.1-70B-Instruct.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "meta-llama/
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "meta-llama/Llama-3.1-70B-Instruct",
|
| 4 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
|
| 5 |
+
"Params": "70B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Llama-3.1-8B-Instruct.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "meta-llama/
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
|
| 4 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct",
|
| 5 |
+
"Params": "8B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Llama-3.3-70B-Instruct.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "meta-llama/Llama-3.3-70B-Instruct"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "meta-llama/Llama-3.3-70B-Instruct",
|
| 4 |
+
"link": "https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
| 5 |
+
"Params": "70B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Mistral-7B-Instruct-v0.3.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "mistralai/Mistral-7B-Instruct-v0.3",
|
| 4 |
+
"link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
|
| 5 |
+
"Params": "7B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Mistral-Large-Instruct-2411.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "mistralai/Mistral-Large-Instruct-2411"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "mistralai/Mistral-Large-Instruct-2411",
|
| 4 |
+
"link": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2411",
|
| 5 |
+
"Params": "123B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Mistral-Small-Instruct-2409.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "mistralai/Mistral-Small-Instruct-2409"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "mistralai/Mistral-Small-Instruct-2409",
|
| 4 |
+
"link": "https://huggingface.co/mistralai/Mistral-Small-Instruct-2409",
|
| 5 |
+
"Params": "22B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/QwQ-32B-Preview.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "Qwen/QwQ-32B-Preview"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "Qwen/QwQ-32B-Preview",
|
| 4 |
+
"link": "https://huggingface.co/Qwen/QwQ-32B-Preview",
|
| 5 |
+
"Params": "32B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Qwen2.5-32B-Instruct.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "Qwen/Qwen2.5-32B-Instruct"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "Qwen/Qwen2.5-32B-Instruct",
|
| 4 |
+
"link": "https://huggingface.co/Qwen/Qwen2.5-32B-Instruct",
|
| 5 |
+
"Params": "32B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Qwen2.5-72B-Instruct.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "Qwen/Qwen2.5-72B-Instruct",
|
| 4 |
+
"link": "https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
| 5 |
+
"Params": "72B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
results/Qwen2.5-7B-Instruct.json
CHANGED
|
@@ -1,6 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
-
"model_name": "Qwen/Qwen2.5-7B-Instruct"
|
|
|
|
|
|
|
| 4 |
},
|
| 5 |
"results": {
|
| 6 |
"Overall": {
|
|
|
|
| 1 |
{
|
| 2 |
"config": {
|
| 3 |
+
"model_name": "Qwen/Qwen2.5-7B-Instruct",
|
| 4 |
+
"link": "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
|
| 5 |
+
"Params": "7B"
|
| 6 |
},
|
| 7 |
"results": {
|
| 8 |
"Overall": {
|
src/display/formatting.py
CHANGED
|
@@ -2,9 +2,8 @@ def model_hyperlink(link, model_name):
|
|
| 2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
| 3 |
|
| 4 |
|
| 5 |
-
def make_clickable_model(model_name,
|
| 6 |
-
if
|
| 7 |
-
link = f"https://huggingface.co/{model_name}"
|
| 8 |
return model_hyperlink(link, model_name)
|
| 9 |
else:
|
| 10 |
return f'<span>{model_name}</span>'
|
|
|
|
| 2 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
| 3 |
|
| 4 |
|
| 5 |
+
def make_clickable_model(model_name, link=''):
|
| 6 |
+
if link:
|
|
|
|
| 7 |
return model_hyperlink(link, model_name)
|
| 8 |
else:
|
| 9 |
return f'<span>{model_name}</span>'
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -28,9 +28,10 @@ class EvalResult:
|
|
| 28 |
# architecture: str = "Unknown"
|
| 29 |
# license: str = "?"
|
| 30 |
# likes: int = 0
|
| 31 |
-
num_params:
|
| 32 |
# date: str = "" # submission date of request file
|
| 33 |
-
still_on_hub: bool = False
|
|
|
|
| 34 |
|
| 35 |
@classmethod
|
| 36 |
def init_from_json_file(self, json_filepath):
|
|
@@ -46,6 +47,8 @@ class EvalResult:
|
|
| 46 |
# Get model and org
|
| 47 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
| 48 |
org_and_model = org_and_model.split("/", 1)
|
|
|
|
|
|
|
| 49 |
|
| 50 |
if len(org_and_model) == 1:
|
| 51 |
org = None
|
|
@@ -59,9 +62,9 @@ class EvalResult:
|
|
| 59 |
result_key = f"{org}_{model}"
|
| 60 |
full_model = "/".join(org_and_model)
|
| 61 |
|
| 62 |
-
still_on_hub, _, model_config = is_model_on_hub(
|
| 63 |
-
|
| 64 |
-
)
|
| 65 |
# architecture = "?"
|
| 66 |
# if model_config is not None:
|
| 67 |
# architectures = getattr(model_config, "architectures", None)
|
|
@@ -87,9 +90,11 @@ class EvalResult:
|
|
| 87 |
org=org,
|
| 88 |
model=model,
|
| 89 |
results=results,
|
|
|
|
|
|
|
| 90 |
# precision=precision,
|
| 91 |
# revision= config.get("model_sha", ""),
|
| 92 |
-
still_on_hub=still_on_hub,
|
| 93 |
# architecture=architecture
|
| 94 |
)
|
| 95 |
|
|
@@ -121,7 +126,7 @@ class EvalResult:
|
|
| 121 |
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 122 |
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 123 |
# AutoEvalColumn.architecture.name: self.architecture,
|
| 124 |
-
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.
|
| 125 |
# AutoEvalColumn.revision.name: self.revision,
|
| 126 |
AutoEvalColumn.average.name: average,
|
| 127 |
# AutoEvalColumn.license.name: self.license,
|
|
|
|
| 28 |
# architecture: str = "Unknown"
|
| 29 |
# license: str = "?"
|
| 30 |
# likes: int = 0
|
| 31 |
+
num_params: str = '-'
|
| 32 |
# date: str = "" # submission date of request file
|
| 33 |
+
# still_on_hub: bool = False
|
| 34 |
+
link: str = ''
|
| 35 |
|
| 36 |
@classmethod
|
| 37 |
def init_from_json_file(self, json_filepath):
|
|
|
|
| 47 |
# Get model and org
|
| 48 |
org_and_model = config.get("model_name", config.get("model_args", None))
|
| 49 |
org_and_model = org_and_model.split("/", 1)
|
| 50 |
+
link = config.get("link", '')
|
| 51 |
+
params = config.get("params", '-')
|
| 52 |
|
| 53 |
if len(org_and_model) == 1:
|
| 54 |
org = None
|
|
|
|
| 62 |
result_key = f"{org}_{model}"
|
| 63 |
full_model = "/".join(org_and_model)
|
| 64 |
|
| 65 |
+
# still_on_hub, _, model_config = is_model_on_hub(
|
| 66 |
+
# full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
|
| 67 |
+
# )
|
| 68 |
# architecture = "?"
|
| 69 |
# if model_config is not None:
|
| 70 |
# architectures = getattr(model_config, "architectures", None)
|
|
|
|
| 90 |
org=org,
|
| 91 |
model=model,
|
| 92 |
results=results,
|
| 93 |
+
link=link,
|
| 94 |
+
num_params=params,
|
| 95 |
# precision=precision,
|
| 96 |
# revision= config.get("model_sha", ""),
|
| 97 |
+
# still_on_hub=still_on_hub,
|
| 98 |
# architecture=architecture
|
| 99 |
)
|
| 100 |
|
|
|
|
| 126 |
# AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
| 127 |
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
| 128 |
# AutoEvalColumn.architecture.name: self.architecture,
|
| 129 |
+
AutoEvalColumn.model.name: make_clickable_model(self.full_model, self.link),
|
| 130 |
# AutoEvalColumn.revision.name: self.revision,
|
| 131 |
AutoEvalColumn.average.name: average,
|
| 132 |
# AutoEvalColumn.license.name: self.license,
|