Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
f04f90e
1
Parent(s):
4b67a33
better checkboxes, better filtering
Browse files- app.py +19 -40
- src/scripts/update_all_request_files.py +5 -26
- src/submission/check_validity.py +33 -4
- src/submission/submit.py +3 -25
app.py
CHANGED
|
@@ -99,13 +99,10 @@ def update_table(
|
|
| 99 |
type_query: list,
|
| 100 |
precision_query: str,
|
| 101 |
size_query: list,
|
| 102 |
-
|
| 103 |
-
show_merges: bool,
|
| 104 |
-
show_moe: bool,
|
| 105 |
-
show_flagged: bool,
|
| 106 |
query: str,
|
| 107 |
):
|
| 108 |
-
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query,
|
| 109 |
filtered_df = filter_queries(query, filtered_df)
|
| 110 |
df = select_columns(filtered_df, columns)
|
| 111 |
return df
|
|
@@ -153,21 +150,21 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
|
|
| 153 |
|
| 154 |
|
| 155 |
def filter_models(
|
| 156 |
-
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list,
|
| 157 |
) -> pd.DataFrame:
|
| 158 |
# Show all models
|
| 159 |
-
if
|
| 160 |
-
filtered_df = df
|
| 161 |
-
else: # Show only still on the hub models
|
| 162 |
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
|
|
|
|
|
|
| 163 |
|
| 164 |
-
if
|
| 165 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
|
| 166 |
|
| 167 |
-
if
|
| 168 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
|
| 169 |
|
| 170 |
-
if
|
| 171 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
|
| 172 |
|
| 173 |
type_emoji = [t[0] for t in type_query]
|
|
@@ -186,10 +183,7 @@ leaderboard_df = filter_models(
|
|
| 186 |
type_query=[t.to_str(" : ") for t in ModelType],
|
| 187 |
size_query=list(NUMERIC_INTERVALS.keys()),
|
| 188 |
precision_query=[i.value.name for i in Precision],
|
| 189 |
-
|
| 190 |
-
show_merges=False,
|
| 191 |
-
show_moe=True,
|
| 192 |
-
show_flagged=False
|
| 193 |
)
|
| 194 |
|
| 195 |
demo = gr.Blocks(css=custom_css)
|
|
@@ -224,17 +218,11 @@ with demo:
|
|
| 224 |
interactive=True,
|
| 225 |
)
|
| 226 |
with gr.Row():
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
)
|
| 233 |
-
moe_models_visibility = gr.Checkbox(
|
| 234 |
-
value=True, label="Show MoE", interactive=True
|
| 235 |
-
)
|
| 236 |
-
flagged_models_visibility = gr.Checkbox(
|
| 237 |
-
value=False, label="Show flagged models", interactive=True
|
| 238 |
)
|
| 239 |
with gr.Column(min_width=320):
|
| 240 |
#with gr.Box(elem_id="box-filter"):
|
|
@@ -289,10 +277,7 @@ with demo:
|
|
| 289 |
filter_columns_type,
|
| 290 |
filter_columns_precision,
|
| 291 |
filter_columns_size,
|
| 292 |
-
|
| 293 |
-
merged_models_visibility,
|
| 294 |
-
moe_models_visibility,
|
| 295 |
-
flagged_models_visibility,
|
| 296 |
search_bar,
|
| 297 |
],
|
| 298 |
leaderboard_table,
|
|
@@ -308,10 +293,7 @@ with demo:
|
|
| 308 |
filter_columns_type,
|
| 309 |
filter_columns_precision,
|
| 310 |
filter_columns_size,
|
| 311 |
-
|
| 312 |
-
merged_models_visibility,
|
| 313 |
-
moe_models_visibility,
|
| 314 |
-
flagged_models_visibility,
|
| 315 |
search_bar,
|
| 316 |
],
|
| 317 |
leaderboard_table,
|
|
@@ -319,7 +301,7 @@ with demo:
|
|
| 319 |
# Check query parameter once at startup and update search bar + hidden component
|
| 320 |
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
| 321 |
|
| 322 |
-
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size,
|
| 323 |
selector.change(
|
| 324 |
update_table,
|
| 325 |
[
|
|
@@ -328,10 +310,7 @@ with demo:
|
|
| 328 |
filter_columns_type,
|
| 329 |
filter_columns_precision,
|
| 330 |
filter_columns_size,
|
| 331 |
-
|
| 332 |
-
merged_models_visibility,
|
| 333 |
-
moe_models_visibility,
|
| 334 |
-
flagged_models_visibility,
|
| 335 |
search_bar,
|
| 336 |
],
|
| 337 |
leaderboard_table,
|
|
|
|
| 99 |
type_query: list,
|
| 100 |
precision_query: str,
|
| 101 |
size_query: list,
|
| 102 |
+
hide_models: list,
|
|
|
|
|
|
|
|
|
|
| 103 |
query: str,
|
| 104 |
):
|
| 105 |
+
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, hide_models)
|
| 106 |
filtered_df = filter_queries(query, filtered_df)
|
| 107 |
df = select_columns(filtered_df, columns)
|
| 108 |
return df
|
|
|
|
| 150 |
|
| 151 |
|
| 152 |
def filter_models(
|
| 153 |
+
df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list
|
| 154 |
) -> pd.DataFrame:
|
| 155 |
# Show all models
|
| 156 |
+
if "Private or deleted" in hide_models:
|
|
|
|
|
|
|
| 157 |
filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
|
| 158 |
+
else:
|
| 159 |
+
filtered_df = df
|
| 160 |
|
| 161 |
+
if "Merges and moerges" in hide_models:
|
| 162 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
|
| 163 |
|
| 164 |
+
if "MoE" in hide_models:
|
| 165 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
|
| 166 |
|
| 167 |
+
if "Flagged" in hide_models:
|
| 168 |
filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
|
| 169 |
|
| 170 |
type_emoji = [t[0] for t in type_query]
|
|
|
|
| 183 |
type_query=[t.to_str(" : ") for t in ModelType],
|
| 184 |
size_query=list(NUMERIC_INTERVALS.keys()),
|
| 185 |
precision_query=[i.value.name for i in Precision],
|
| 186 |
+
hide_models=[True, True, True, False], # Deleted, merges, flagged, MoEs
|
|
|
|
|
|
|
|
|
|
| 187 |
)
|
| 188 |
|
| 189 |
demo = gr.Blocks(css=custom_css)
|
|
|
|
| 218 |
interactive=True,
|
| 219 |
)
|
| 220 |
with gr.Row():
|
| 221 |
+
hide_models = gr.CheckboxGroup(
|
| 222 |
+
label="Hide models",
|
| 223 |
+
choices = ["Private or deleted", "Merges and moerges", "Flagged", "MoE"],
|
| 224 |
+
value=["Private or deleted", "Merges and moerges", "Flagged"],
|
| 225 |
+
interactive=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
)
|
| 227 |
with gr.Column(min_width=320):
|
| 228 |
#with gr.Box(elem_id="box-filter"):
|
|
|
|
| 277 |
filter_columns_type,
|
| 278 |
filter_columns_precision,
|
| 279 |
filter_columns_size,
|
| 280 |
+
hide_models,
|
|
|
|
|
|
|
|
|
|
| 281 |
search_bar,
|
| 282 |
],
|
| 283 |
leaderboard_table,
|
|
|
|
| 293 |
filter_columns_type,
|
| 294 |
filter_columns_precision,
|
| 295 |
filter_columns_size,
|
| 296 |
+
hide_models,
|
|
|
|
|
|
|
|
|
|
| 297 |
search_bar,
|
| 298 |
],
|
| 299 |
leaderboard_table,
|
|
|
|
| 301 |
# Check query parameter once at startup and update search bar + hidden component
|
| 302 |
demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
|
| 303 |
|
| 304 |
+
for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models]:
|
| 305 |
selector.change(
|
| 306 |
update_table,
|
| 307 |
[
|
|
|
|
| 310 |
filter_columns_type,
|
| 311 |
filter_columns_precision,
|
| 312 |
filter_columns_size,
|
| 313 |
+
hide_models,
|
|
|
|
|
|
|
|
|
|
| 314 |
search_bar,
|
| 315 |
],
|
| 316 |
leaderboard_table,
|
src/scripts/update_all_request_files.py
CHANGED
|
@@ -3,7 +3,7 @@ from huggingface_hub import ModelCard
|
|
| 3 |
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
-
from src.submission.check_validity import is_model_on_hub, check_model_card
|
| 7 |
from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
|
| 8 |
|
| 9 |
def update_models(file_path, models):
|
|
@@ -35,37 +35,16 @@ def update_models(file_path, models):
|
|
| 35 |
# If the model doesn't have a model card or a license, we consider it's deleted
|
| 36 |
if still_on_hub:
|
| 37 |
try:
|
| 38 |
-
|
|
|
|
| 39 |
still_on_hub = False
|
| 40 |
except Exception:
|
|
|
|
| 41 |
still_on_hub = False
|
| 42 |
data['still_on_hub'] = still_on_hub
|
| 43 |
|
| 44 |
-
# Check if the model is a merge
|
| 45 |
-
is_merge_from_metadata = False
|
| 46 |
-
is_moe_from_metadata = False
|
| 47 |
if still_on_hub:
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
# Storing the model metadata
|
| 51 |
-
tags = []
|
| 52 |
-
if model_card.data.tags:
|
| 53 |
-
is_merge_from_metadata = "merge" in model_card.data.tags
|
| 54 |
-
is_moe_from_metadata = "moe" in model_card.data.tags
|
| 55 |
-
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
| 56 |
-
# If the model is a merge but not saying it in the metadata, we flag it
|
| 57 |
-
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
| 58 |
-
if is_merge_from_model_card or is_merge_from_metadata:
|
| 59 |
-
tags.append("merge")
|
| 60 |
-
if not is_merge_from_metadata:
|
| 61 |
-
tags.append("flagged:undisclosed_merge")
|
| 62 |
-
moe_keywords = ["moe", "mixture of experts", "mixtral"]
|
| 63 |
-
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
| 64 |
-
is_moe_from_name = "moe" in model_id.lower().replace("/", "-").replace("_", "-").split("-")
|
| 65 |
-
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
| 66 |
-
tags.append("moe")
|
| 67 |
-
if not is_moe_from_metadata:
|
| 68 |
-
tags.append("flagged:undisclosed_moe")
|
| 69 |
|
| 70 |
data["tags"] = tags
|
| 71 |
|
|
|
|
| 3 |
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
+
from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_tags
|
| 7 |
from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API, H4_TOKEN
|
| 8 |
|
| 9 |
def update_models(file_path, models):
|
|
|
|
| 35 |
# If the model doesn't have a model card or a license, we consider it's deleted
|
| 36 |
if still_on_hub:
|
| 37 |
try:
|
| 38 |
+
status, msg, model_card = check_model_card(model_id)
|
| 39 |
+
if status is False:
|
| 40 |
still_on_hub = False
|
| 41 |
except Exception:
|
| 42 |
+
model_card = None
|
| 43 |
still_on_hub = False
|
| 44 |
data['still_on_hub'] = still_on_hub
|
| 45 |
|
|
|
|
|
|
|
|
|
|
| 46 |
if still_on_hub:
|
| 47 |
+
tags = get_model_tags(model_card, model_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
data["tags"] = tags
|
| 50 |
|
src/submission/check_validity.py
CHANGED
|
@@ -19,7 +19,7 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
| 19 |
try:
|
| 20 |
card = ModelCard.load(repo_id)
|
| 21 |
except huggingface_hub.utils.EntryNotFoundError:
|
| 22 |
-
return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
|
| 23 |
|
| 24 |
# Enforce license metadata
|
| 25 |
if card.data.license is None:
|
|
@@ -27,13 +27,13 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
|
|
| 27 |
return False, (
|
| 28 |
"License not found. Please add a license to your model card using the `license` metadata or a"
|
| 29 |
" `license_name`/`license_link` pair."
|
| 30 |
-
)
|
| 31 |
|
| 32 |
# Enforce card content
|
| 33 |
if len(card.text) < 200:
|
| 34 |
-
return False, "Please add a description to your model card, it is too short."
|
| 35 |
|
| 36 |
-
return True, ""
|
| 37 |
|
| 38 |
|
| 39 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
|
|
@@ -133,3 +133,32 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
|
|
| 133 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
| 134 |
|
| 135 |
return set(file_names), users_to_submission_dates
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
try:
|
| 20 |
card = ModelCard.load(repo_id)
|
| 21 |
except huggingface_hub.utils.EntryNotFoundError:
|
| 22 |
+
return False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None
|
| 23 |
|
| 24 |
# Enforce license metadata
|
| 25 |
if card.data.license is None:
|
|
|
|
| 27 |
return False, (
|
| 28 |
"License not found. Please add a license to your model card using the `license` metadata or a"
|
| 29 |
" `license_name`/`license_link` pair."
|
| 30 |
+
), None
|
| 31 |
|
| 32 |
# Enforce card content
|
| 33 |
if len(card.text) < 200:
|
| 34 |
+
return False, "Please add a description to your model card, it is too short.", None
|
| 35 |
|
| 36 |
+
return True, "", card
|
| 37 |
|
| 38 |
|
| 39 |
def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str, AutoConfig]:
|
|
|
|
| 133 |
users_to_submission_dates[organisation].append(info["submitted_time"])
|
| 134 |
|
| 135 |
return set(file_names), users_to_submission_dates
|
| 136 |
+
|
| 137 |
+
def get_model_tags(model_card, model: str):
|
| 138 |
+
is_merge_from_metadata = False
|
| 139 |
+
is_moe_from_metadata = False
|
| 140 |
+
|
| 141 |
+
tags = []
|
| 142 |
+
if model_card is None:
|
| 143 |
+
return tags
|
| 144 |
+
if model_card.data.tags:
|
| 145 |
+
is_merge_from_metadata = "merge" in model_card.data.tags
|
| 146 |
+
is_moe_from_metadata = "moe" in model_card.data.tags
|
| 147 |
+
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
| 148 |
+
# If the model is a merge but not saying it in the metadata, we flag it
|
| 149 |
+
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
| 150 |
+
if is_merge_from_model_card or is_merge_from_metadata:
|
| 151 |
+
tags.append("merge")
|
| 152 |
+
if not is_merge_from_metadata:
|
| 153 |
+
tags.append("flagged:undisclosed_merge")
|
| 154 |
+
moe_keywords = ["moe", "mixture of experts", "mixtral"]
|
| 155 |
+
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
| 156 |
+
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
| 157 |
+
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
| 158 |
+
tags.append("moe")
|
| 159 |
+
# We no longer tag undisclosed MoEs
|
| 160 |
+
#if not is_moe_from_metadata:
|
| 161 |
+
# tags.append("flagged:undisclosed_moe")
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
return tags
|
src/submission/submit.py
CHANGED
|
@@ -13,6 +13,7 @@ from src.submission.check_validity import (
|
|
| 13 |
get_model_size,
|
| 14 |
is_model_on_hub,
|
| 15 |
user_submission_permission,
|
|
|
|
| 16 |
)
|
| 17 |
|
| 18 |
REQUESTED_MODELS = None
|
|
@@ -96,34 +97,11 @@ def add_new_eval(
|
|
| 96 |
except Exception:
|
| 97 |
return styled_error("Please select a license for your model")
|
| 98 |
|
| 99 |
-
modelcard_OK, error_msg = check_model_card(model)
|
| 100 |
if not modelcard_OK:
|
| 101 |
return styled_error(error_msg)
|
| 102 |
|
| 103 |
-
|
| 104 |
-
is_moe_from_metadata = False
|
| 105 |
-
model_card = ModelCard.load(model)
|
| 106 |
-
|
| 107 |
-
# Storing the model tags
|
| 108 |
-
tags = []
|
| 109 |
-
if model_card.data.tags:
|
| 110 |
-
is_merge_from_metadata = "merge" in model_card.data.tags
|
| 111 |
-
is_moe_from_metadata = "moe" in model_card.data.tags
|
| 112 |
-
merge_keywords = ["mergekit", "merged model", "merge model", "merging"]
|
| 113 |
-
# If the model is a merge but not saying it in the metadata, we flag it
|
| 114 |
-
is_merge_from_model_card = any(keyword in model_card.text.lower() for keyword in merge_keywords)
|
| 115 |
-
if is_merge_from_model_card or is_merge_from_metadata:
|
| 116 |
-
tags.append("merge")
|
| 117 |
-
if not is_merge_from_metadata:
|
| 118 |
-
tags.append("flagged:undisclosed_merge")
|
| 119 |
-
moe_keywords = ["moe", "mixture of experts", "mixtral"]
|
| 120 |
-
is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in moe_keywords)
|
| 121 |
-
is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
|
| 122 |
-
if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
|
| 123 |
-
tags.append("moe")
|
| 124 |
-
if not is_moe_from_metadata:
|
| 125 |
-
tags.append("flagged:undisclosed_moe")
|
| 126 |
-
|
| 127 |
|
| 128 |
# Seems good, creating the eval
|
| 129 |
print("Adding new eval")
|
|
|
|
| 13 |
get_model_size,
|
| 14 |
is_model_on_hub,
|
| 15 |
user_submission_permission,
|
| 16 |
+
get_model_tags
|
| 17 |
)
|
| 18 |
|
| 19 |
REQUESTED_MODELS = None
|
|
|
|
| 97 |
except Exception:
|
| 98 |
return styled_error("Please select a license for your model")
|
| 99 |
|
| 100 |
+
modelcard_OK, error_msg, model_card = check_model_card(model)
|
| 101 |
if not modelcard_OK:
|
| 102 |
return styled_error(error_msg)
|
| 103 |
|
| 104 |
+
tags = get_model_tags(model_card, model)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
# Seems good, creating the eval
|
| 107 |
print("Adding new eval")
|