Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Clémentine
commited on
Commit
·
c2e4da0
1
Parent(s):
e703fd8
push validation to public dataset
Browse files
app.py
CHANGED
|
@@ -21,6 +21,7 @@ OWNER="gaia-benchmark"
|
|
| 21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
| 22 |
INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
|
| 23 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
|
|
|
| 24 |
CONTACT_DATASET = f"{OWNER}/contact_info"
|
| 25 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
| 26 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
|
@@ -76,6 +77,7 @@ def add_new_eval(
|
|
| 76 |
organisation: str,
|
| 77 |
mail: str,
|
| 78 |
):
|
|
|
|
| 79 |
# Very basic email parsing
|
| 80 |
_, parsed_mail = parseaddr(mail)
|
| 81 |
if not "@" in parsed_mail:
|
|
@@ -84,7 +86,7 @@ def add_new_eval(
|
|
| 84 |
print("Adding new eval")
|
| 85 |
|
| 86 |
# Check if the combination model/org already exists and prints a warning message if yes
|
| 87 |
-
if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for
|
| 88 |
return format_warning("This model has been already submitted.")
|
| 89 |
|
| 90 |
if path_to_file is None:
|
|
@@ -135,7 +137,7 @@ def add_new_eval(
|
|
| 135 |
scores[level] += score
|
| 136 |
num_questions["all"] += 1
|
| 137 |
num_questions[level] += 1
|
| 138 |
-
|
| 139 |
# Save scored file
|
| 140 |
api.upload_file(
|
| 141 |
repo_id=SUBMISSION_DATASET,
|
|
@@ -145,6 +147,16 @@ def add_new_eval(
|
|
| 145 |
token=TOKEN
|
| 146 |
)
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
# Actual submission
|
| 149 |
eval_entry = {
|
| 150 |
"model": model,
|
|
|
|
| 21 |
DATA_DATASET = f"{OWNER}/GAIA"
|
| 22 |
INTERNAL_DATA_DATASET = f"{OWNER}/GAIA_internal"
|
| 23 |
SUBMISSION_DATASET = f"{OWNER}/submissions_internal"
|
| 24 |
+
SUBMISSION_DATASET_PUBLIC = f"{OWNER}/submissions_public"
|
| 25 |
CONTACT_DATASET = f"{OWNER}/contact_info"
|
| 26 |
RESULTS_DATASET = f"{OWNER}/results_public"
|
| 27 |
LEADERBOARD_PATH = f"{OWNER}/leaderboard"
|
|
|
|
| 77 |
organisation: str,
|
| 78 |
mail: str,
|
| 79 |
):
|
| 80 |
+
is_validation = val_or_test == "validation"
|
| 81 |
# Very basic email parsing
|
| 82 |
_, parsed_mail = parseaddr(mail)
|
| 83 |
if not "@" in parsed_mail:
|
|
|
|
| 86 |
print("Adding new eval")
|
| 87 |
|
| 88 |
# Check if the combination model/org already exists and prints a warning message if yes
|
| 89 |
+
if model.lower() in set([m.lower() for m in eval_results[val_or_test]["model"]]) and organisation.lower() in set([o.lower() for o in eval_results[val_or_test]["organisation"]]):
|
| 90 |
return format_warning("This model has been already submitted.")
|
| 91 |
|
| 92 |
if path_to_file is None:
|
|
|
|
| 137 |
scores[level] += score
|
| 138 |
num_questions["all"] += 1
|
| 139 |
num_questions[level] += 1
|
| 140 |
+
|
| 141 |
# Save scored file
|
| 142 |
api.upload_file(
|
| 143 |
repo_id=SUBMISSION_DATASET,
|
|
|
|
| 147 |
token=TOKEN
|
| 148 |
)
|
| 149 |
|
| 150 |
+
# Save scored file
|
| 151 |
+
if is_validation:
|
| 152 |
+
api.upload_file(
|
| 153 |
+
repo_id=SUBMISSION_DATASET_PUBLIC,
|
| 154 |
+
path_or_fileobj=f"scored/{organisation}_{model}.jsonl",
|
| 155 |
+
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl",
|
| 156 |
+
repo_type="dataset",
|
| 157 |
+
token=TOKEN
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
# Actual submission
|
| 161 |
eval_entry = {
|
| 162 |
"model": model,
|