Spaces:
				
			
			
	
			
			
					
		Running
		
			on 
			
			CPU Upgrade
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
			on 
			
			CPU Upgrade
	
		Clémentine
		
	commited on
		
		
					Commit 
							
							·
						
						df66f6e
	
1
								Parent(s):
							
							bb17be3
								
refacto style + rate limit
Browse files- app.py +30 -22
- scripts/create_request_file.py +4 -3
- src/display/formatting.py +1 -0
- src/display/utils.py +2 -1
- src/envs.py +2 -0
- src/leaderboard/read_evals.py +9 -7
- src/populate.py +2 -2
- src/submission/check_validity.py +9 -4
- src/submission/submit.py +8 -8
- src/tools/collections.py +3 -3
- src/tools/plots.py +5 -3
    	
        app.py
    CHANGED
    
    | @@ -6,18 +6,6 @@ import pandas as pd | |
| 6 | 
             
            from apscheduler.schedulers.background import BackgroundScheduler
         | 
| 7 | 
             
            from huggingface_hub import snapshot_download
         | 
| 8 |  | 
| 9 | 
            -
            from src.display.utils import (
         | 
| 10 | 
            -
                COLS,
         | 
| 11 | 
            -
                TYPES,
         | 
| 12 | 
            -
                BENCHMARK_COLS,
         | 
| 13 | 
            -
                EVAL_COLS,
         | 
| 14 | 
            -
                EVAL_TYPES,
         | 
| 15 | 
            -
                AutoEvalColumn,
         | 
| 16 | 
            -
                ModelType,
         | 
| 17 | 
            -
                NUMERIC_INTERVALS,
         | 
| 18 | 
            -
                fields,
         | 
| 19 | 
            -
            )
         | 
| 20 | 
            -
            from src.display.css_html_js import custom_css, get_window_url_params
         | 
| 21 | 
             
            from src.display.about import (
         | 
| 22 | 
             
                CITATION_BUTTON_LABEL,
         | 
| 23 | 
             
                CITATION_BUTTON_TEXT,
         | 
| @@ -26,17 +14,29 @@ from src.display.about import ( | |
| 26 | 
             
                LLM_BENCHMARKS_TEXT,
         | 
| 27 | 
             
                TITLE,
         | 
| 28 | 
             
            )
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 29 | 
             
            from src.tools.plots import (
         | 
|  | |
| 30 | 
             
                create_metric_plot_obj,
         | 
| 31 | 
            -
                create_scores_df,
         | 
| 32 | 
             
                create_plot_df,
         | 
|  | |
| 33 | 
             
                join_model_info_with_results,
         | 
| 34 | 
            -
                HUMAN_BASELINES,
         | 
| 35 | 
             
            )
         | 
| 36 | 
            -
            from src.tools.collections import update_collections
         | 
| 37 | 
            -
            from src.populate import get_evaluation_queue_df, get_leaderboard_df
         | 
| 38 | 
            -
            from src.envs import H4_TOKEN, QUEUE_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, RESULTS_REPO, API, REPO_ID, IS_PUBLIC
         | 
| 39 | 
            -
            from src.submission.submit import add_new_eval
         | 
| 40 |  | 
| 41 |  | 
| 42 | 
             
            def restart_space():
         | 
| @@ -61,9 +61,9 @@ original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS) | |
| 61 | 
             
            update_collections(original_df.copy())
         | 
| 62 | 
             
            leaderboard_df = original_df.copy()
         | 
| 63 |  | 
| 64 | 
            -
            #models = original_df["model_name_for_query"].tolist()  # needed for model backlinks in their to the leaderboard
         | 
| 65 | 
             
            # plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
         | 
| 66 | 
            -
            #to_be_dumped = f"models = {repr(models)}\n"
         | 
| 67 |  | 
| 68 | 
             
            (
         | 
| 69 | 
             
                finished_eval_queue_df,
         | 
| @@ -173,8 +173,16 @@ with demo: | |
| 173 | 
             
                                    )
         | 
| 174 | 
             
                                with gr.Row():
         | 
| 175 | 
             
                                    shown_columns = gr.CheckboxGroup(
         | 
| 176 | 
            -
                                        choices=[ | 
| 177 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 178 | 
             
                                        label="Select columns to show",
         | 
| 179 | 
             
                                        elem_id="column-select",
         | 
| 180 | 
             
                                        interactive=True,
         | 
|  | |
| 6 | 
             
            from apscheduler.schedulers.background import BackgroundScheduler
         | 
| 7 | 
             
            from huggingface_hub import snapshot_download
         | 
| 8 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 9 | 
             
            from src.display.about import (
         | 
| 10 | 
             
                CITATION_BUTTON_LABEL,
         | 
| 11 | 
             
                CITATION_BUTTON_TEXT,
         | 
|  | |
| 14 | 
             
                LLM_BENCHMARKS_TEXT,
         | 
| 15 | 
             
                TITLE,
         | 
| 16 | 
             
            )
         | 
| 17 | 
            +
            from src.display.css_html_js import custom_css, get_window_url_params
         | 
| 18 | 
            +
            from src.display.utils import (
         | 
| 19 | 
            +
                BENCHMARK_COLS,
         | 
| 20 | 
            +
                COLS,
         | 
| 21 | 
            +
                EVAL_COLS,
         | 
| 22 | 
            +
                EVAL_TYPES,
         | 
| 23 | 
            +
                NUMERIC_INTERVALS,
         | 
| 24 | 
            +
                TYPES,
         | 
| 25 | 
            +
                AutoEvalColumn,
         | 
| 26 | 
            +
                ModelType,
         | 
| 27 | 
            +
                fields,
         | 
| 28 | 
            +
            )
         | 
| 29 | 
            +
            from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
         | 
| 30 | 
            +
            from src.populate import get_evaluation_queue_df, get_leaderboard_df
         | 
| 31 | 
            +
            from src.submission.submit import add_new_eval
         | 
| 32 | 
            +
            from src.tools.collections import update_collections
         | 
| 33 | 
             
            from src.tools.plots import (
         | 
| 34 | 
            +
                HUMAN_BASELINES,
         | 
| 35 | 
             
                create_metric_plot_obj,
         | 
|  | |
| 36 | 
             
                create_plot_df,
         | 
| 37 | 
            +
                create_scores_df,
         | 
| 38 | 
             
                join_model_info_with_results,
         | 
|  | |
| 39 | 
             
            )
         | 
|  | |
|  | |
|  | |
|  | |
| 40 |  | 
| 41 |  | 
| 42 | 
             
            def restart_space():
         | 
|  | |
| 61 | 
             
            update_collections(original_df.copy())
         | 
| 62 | 
             
            leaderboard_df = original_df.copy()
         | 
| 63 |  | 
| 64 | 
            +
            # models = original_df["model_name_for_query"].tolist()  # needed for model backlinks in their to the leaderboard
         | 
| 65 | 
             
            # plot_df = create_plot_df(create_scores_df(join_model_info_with_results(original_df)))
         | 
| 66 | 
            +
            # to_be_dumped = f"models = {repr(models)}\n"
         | 
| 67 |  | 
| 68 | 
             
            (
         | 
| 69 | 
             
                finished_eval_queue_df,
         | 
|  | |
| 173 | 
             
                                    )
         | 
| 174 | 
             
                                with gr.Row():
         | 
| 175 | 
             
                                    shown_columns = gr.CheckboxGroup(
         | 
| 176 | 
            +
                                        choices=[
         | 
| 177 | 
            +
                                            c.name
         | 
| 178 | 
            +
                                            for c in fields(AutoEvalColumn)
         | 
| 179 | 
            +
                                            if not c.hidden and not c.never_hidden and not c.dummy
         | 
| 180 | 
            +
                                        ],
         | 
| 181 | 
            +
                                        value=[
         | 
| 182 | 
            +
                                            c.name
         | 
| 183 | 
            +
                                            for c in fields(AutoEvalColumn)
         | 
| 184 | 
            +
                                            if c.displayed_by_default and not c.hidden and not c.never_hidden
         | 
| 185 | 
            +
                                        ],
         | 
| 186 | 
             
                                        label="Select columns to show",
         | 
| 187 | 
             
                                        elem_id="column-select",
         | 
| 188 | 
             
                                        interactive=True,
         | 
    	
        scripts/create_request_file.py
    CHANGED
    
    | @@ -1,11 +1,12 @@ | |
| 1 | 
            -
            from datetime import datetime, timezone
         | 
| 2 | 
             
            import json
         | 
| 3 | 
             
            import os
         | 
|  | |
| 4 | 
             
            import re
         | 
|  | |
|  | |
| 5 | 
             
            import click
         | 
| 6 | 
            -
            from huggingface_hub import HfApi, snapshot_download
         | 
| 7 | 
             
            from colorama import Fore
         | 
| 8 | 
            -
            import  | 
| 9 |  | 
| 10 | 
             
            EVAL_REQUESTS_PATH = "eval-queue"
         | 
| 11 | 
             
            QUEUE_REPO = "open-llm-leaderboard/requests"
         | 
|  | |
|  | |
| 1 | 
             
            import json
         | 
| 2 | 
             
            import os
         | 
| 3 | 
            +
            import pprint
         | 
| 4 | 
             
            import re
         | 
| 5 | 
            +
            from datetime import datetime, timezone
         | 
| 6 | 
            +
             | 
| 7 | 
             
            import click
         | 
|  | |
| 8 | 
             
            from colorama import Fore
         | 
| 9 | 
            +
            from huggingface_hub import HfApi, snapshot_download
         | 
| 10 |  | 
| 11 | 
             
            EVAL_REQUESTS_PATH = "eval-queue"
         | 
| 12 | 
             
            QUEUE_REPO = "open-llm-leaderboard/requests"
         | 
    	
        src/display/formatting.py
    CHANGED
    
    | @@ -1,4 +1,5 @@ | |
| 1 | 
             
            import os
         | 
|  | |
| 2 | 
             
            from huggingface_hub import HfApi
         | 
| 3 |  | 
| 4 | 
             
            API = HfApi()
         | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
            +
             | 
| 3 | 
             
            from huggingface_hub import HfApi
         | 
| 4 |  | 
| 5 | 
             
            API = HfApi()
         | 
    	
        src/display/utils.py
    CHANGED
    
    | @@ -1,7 +1,8 @@ | |
| 1 | 
             
            from dataclasses import dataclass
         | 
| 2 | 
            -
            import pandas as pd
         | 
| 3 | 
             
            from enum import Enum
         | 
| 4 |  | 
|  | |
|  | |
| 5 |  | 
| 6 | 
             
            # These classes are for user facing column names,
         | 
| 7 | 
             
            # to avoid having to change them all around the code
         | 
|  | |
| 1 | 
             
            from dataclasses import dataclass
         | 
|  | |
| 2 | 
             
            from enum import Enum
         | 
| 3 |  | 
| 4 | 
            +
            import pandas as pd
         | 
| 5 | 
            +
             | 
| 6 |  | 
| 7 | 
             
            # These classes are for user facing column names,
         | 
| 8 | 
             
            # to avoid having to change them all around the code
         | 
    	
        src/envs.py
    CHANGED
    
    | @@ -1,4 +1,5 @@ | |
| 1 | 
             
            import os
         | 
|  | |
| 2 | 
             
            from huggingface_hub import HfApi
         | 
| 3 |  | 
| 4 | 
             
            # clone / pull the lmeh eval data
         | 
| @@ -24,5 +25,6 @@ PATH_TO_COLLECTION = "open-llm-leaderboard/llm-leaderboard-best-models-652d6c796 | |
| 24 | 
             
            # Rate limit variables
         | 
| 25 | 
             
            RATE_LIMIT_PERIOD = 7
         | 
| 26 | 
             
            RATE_LIMIT_QUOTA = 5
         | 
|  | |
| 27 |  | 
| 28 | 
             
            API = HfApi(token=H4_TOKEN)
         | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
            +
             | 
| 3 | 
             
            from huggingface_hub import HfApi
         | 
| 4 |  | 
| 5 | 
             
            # clone / pull the lmeh eval data
         | 
|  | |
| 25 | 
             
            # Rate limit variables
         | 
| 26 | 
             
            RATE_LIMIT_PERIOD = 7
         | 
| 27 | 
             
            RATE_LIMIT_QUOTA = 5
         | 
| 28 | 
            +
            HAS_HIGHER_RATE_LIMIT = ["TheBloke"]
         | 
| 29 |  | 
| 30 | 
             
            API = HfApi(token=H4_TOKEN)
         | 
    	
        src/leaderboard/read_evals.py
    CHANGED
    
    | @@ -1,15 +1,15 @@ | |
|  | |
| 1 | 
             
            import json
         | 
| 2 | 
            -
            import os
         | 
| 3 | 
             
            import math
         | 
| 4 | 
            -
            import  | 
| 5 | 
             
            from dataclasses import dataclass
         | 
| 6 | 
             
            from typing import Dict, List, Tuple
         | 
| 7 |  | 
| 8 | 
             
            import dateutil
         | 
| 9 | 
             
            import numpy as np
         | 
| 10 |  | 
| 11 | 
            -
            from src.display.utils import AutoEvalColumn, ModelType, Tasks
         | 
| 12 | 
             
            from src.display.formatting import make_clickable_model
         | 
|  | |
| 13 | 
             
            from src.submission.check_validity import is_model_on_hub
         | 
| 14 |  | 
| 15 |  | 
| @@ -56,7 +56,9 @@ class EvalResult: | |
| 56 | 
             
                        model = org_and_model[1]
         | 
| 57 | 
             
                        result_key = f"{org}_{model}_{precision}"
         | 
| 58 |  | 
| 59 | 
            -
                    still_on_hub = is_model_on_hub( | 
|  | |
|  | |
| 60 |  | 
| 61 | 
             
                    # Extract results available in this file (some results are split in several files)
         | 
| 62 | 
             
                    results = {}
         | 
| @@ -73,8 +75,8 @@ class EvalResult: | |
| 73 | 
             
                            continue
         | 
| 74 |  | 
| 75 | 
             
                        # Some truthfulQA values are NaNs
         | 
| 76 | 
            -
                        if task.benchmark == "truthfulqa:mc" and  | 
| 77 | 
            -
                            if math.isnan(float(data["results"][ | 
| 78 | 
             
                                results[task.benchmark] = 0.0
         | 
| 79 | 
             
                                continue
         | 
| 80 |  | 
| @@ -191,7 +193,7 @@ def get_eval_results(results_path: str) -> List[EvalResult]: | |
| 191 | 
             
                for v in eval_results.values():
         | 
| 192 | 
             
                    try:
         | 
| 193 | 
             
                        results.append(v.to_dict())
         | 
| 194 | 
            -
                    except KeyError: | 
| 195 | 
             
                        continue
         | 
| 196 |  | 
| 197 | 
             
                return results
         | 
|  | |
| 1 | 
            +
            import glob
         | 
| 2 | 
             
            import json
         | 
|  | |
| 3 | 
             
            import math
         | 
| 4 | 
            +
            import os
         | 
| 5 | 
             
            from dataclasses import dataclass
         | 
| 6 | 
             
            from typing import Dict, List, Tuple
         | 
| 7 |  | 
| 8 | 
             
            import dateutil
         | 
| 9 | 
             
            import numpy as np
         | 
| 10 |  | 
|  | |
| 11 | 
             
            from src.display.formatting import make_clickable_model
         | 
| 12 | 
            +
            from src.display.utils import AutoEvalColumn, ModelType, Tasks
         | 
| 13 | 
             
            from src.submission.check_validity import is_model_on_hub
         | 
| 14 |  | 
| 15 |  | 
|  | |
| 56 | 
             
                        model = org_and_model[1]
         | 
| 57 | 
             
                        result_key = f"{org}_{model}_{precision}"
         | 
| 58 |  | 
| 59 | 
            +
                    still_on_hub = is_model_on_hub(
         | 
| 60 | 
            +
                        "/".join(org_and_model), config.get("model_sha", "main"), trust_remote_code=True
         | 
| 61 | 
            +
                    )[0]
         | 
| 62 |  | 
| 63 | 
             
                    # Extract results available in this file (some results are split in several files)
         | 
| 64 | 
             
                    results = {}
         | 
|  | |
| 75 | 
             
                            continue
         | 
| 76 |  | 
| 77 | 
             
                        # Some truthfulQA values are NaNs
         | 
| 78 | 
            +
                        if task.benchmark == "truthfulqa:mc" and "harness|truthfulqa:mc|0" in data["results"]:
         | 
| 79 | 
            +
                            if math.isnan(float(data["results"]["harness|truthfulqa:mc|0"][task.metric])):
         | 
| 80 | 
             
                                results[task.benchmark] = 0.0
         | 
| 81 | 
             
                                continue
         | 
| 82 |  | 
|  | |
| 193 | 
             
                for v in eval_results.values():
         | 
| 194 | 
             
                    try:
         | 
| 195 | 
             
                        results.append(v.to_dict())
         | 
| 196 | 
            +
                    except KeyError:  # not all eval values present
         | 
| 197 | 
             
                        continue
         | 
| 198 |  | 
| 199 | 
             
                return results
         | 
    	
        src/populate.py
    CHANGED
    
    | @@ -3,10 +3,10 @@ import os | |
| 3 |  | 
| 4 | 
             
            import pandas as pd
         | 
| 5 |  | 
|  | |
|  | |
| 6 | 
             
            from src.leaderboard.filter_models import filter_models
         | 
| 7 | 
             
            from src.leaderboard.read_evals import get_eval_results
         | 
| 8 | 
            -
            from src.display.formatting import make_clickable_model, has_no_nan_values
         | 
| 9 | 
            -
            from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
         | 
| 10 |  | 
| 11 |  | 
| 12 | 
             
            def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
         | 
|  | |
| 3 |  | 
| 4 | 
             
            import pandas as pd
         | 
| 5 |  | 
| 6 | 
            +
            from src.display.formatting import has_no_nan_values, make_clickable_model
         | 
| 7 | 
            +
            from src.display.utils import AutoEvalColumn, EvalQueueColumn, baseline_row
         | 
| 8 | 
             
            from src.leaderboard.filter_models import filter_models
         | 
| 9 | 
             
            from src.leaderboard.read_evals import get_eval_results
         | 
|  | |
|  | |
| 10 |  | 
| 11 |  | 
| 12 | 
             
            def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
         | 
    	
        src/submission/check_validity.py
    CHANGED
    
    | @@ -1,13 +1,15 @@ | |
| 1 | 
            -
            import huggingface_hub
         | 
| 2 | 
            -
            import os
         | 
| 3 | 
             
            import json
         | 
|  | |
| 4 | 
             
            import re
         | 
| 5 | 
             
            from collections import defaultdict
         | 
| 6 | 
            -
            from  | 
|  | |
|  | |
| 7 | 
             
            from huggingface_hub import ModelCard
         | 
|  | |
| 8 | 
             
            from transformers import AutoConfig
         | 
| 9 |  | 
| 10 | 
            -
            from  | 
| 11 |  | 
| 12 |  | 
| 13 | 
             
            # ht to @Wauplin, thank you for the snippet!
         | 
| @@ -76,6 +78,9 @@ def user_submission_permission(submission_name, users_to_submission_dates, rate_ | |
| 76 | 
             
                submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
         | 
| 77 |  | 
| 78 | 
             
                num_models_submitted_in_period = len(submissions_after_timelimit)
         | 
|  | |
|  | |
|  | |
| 79 | 
             
                if num_models_submitted_in_period > rate_limit_quota:
         | 
| 80 | 
             
                    error_msg = f"Organisation or user `{org_or_user}`"
         | 
| 81 | 
             
                    error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
         | 
|  | |
|  | |
|  | |
| 1 | 
             
            import json
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
             
            import re
         | 
| 4 | 
             
            from collections import defaultdict
         | 
| 5 | 
            +
            from datetime import datetime, timedelta, timezone
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import huggingface_hub
         | 
| 8 | 
             
            from huggingface_hub import ModelCard
         | 
| 9 | 
            +
            from huggingface_hub.hf_api import ModelInfo
         | 
| 10 | 
             
            from transformers import AutoConfig
         | 
| 11 |  | 
| 12 | 
            +
            from src.envs import HAS_HIGHER_RATE_LIMIT
         | 
| 13 |  | 
| 14 |  | 
| 15 | 
             
            # ht to @Wauplin, thank you for the snippet!
         | 
|  | |
| 78 | 
             
                submissions_after_timelimit = [d for d in submission_dates if d > time_limit]
         | 
| 79 |  | 
| 80 | 
             
                num_models_submitted_in_period = len(submissions_after_timelimit)
         | 
| 81 | 
            +
                if org_or_user in HAS_HIGHER_RATE_LIMIT:
         | 
| 82 | 
            +
                    rate_limit_quota = 2 * rate_limit_quota
         | 
| 83 | 
            +
             | 
| 84 | 
             
                if num_models_submitted_in_period > rate_limit_quota:
         | 
| 85 | 
             
                    error_msg = f"Organisation or user `{org_or_user}`"
         | 
| 86 | 
             
                    error_msg += f"already has {num_models_submitted_in_period} model requests submitted to the leaderboard "
         | 
    	
        src/submission/submit.py
    CHANGED
    
    | @@ -1,17 +1,17 @@ | |
| 1 | 
            -
            import  | 
| 2 | 
            -
             | 
| 3 | 
             
            from datetime import datetime, timezone
         | 
| 4 |  | 
| 5 | 
            -
            from src.display.formatting import styled_error,  | 
|  | |
| 6 | 
             
            from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
         | 
| 7 | 
             
            from src.submission.check_validity import (
         | 
| 8 | 
            -
                user_submission_permission,
         | 
| 9 | 
            -
                is_model_on_hub,
         | 
| 10 | 
            -
                get_model_size,
         | 
| 11 | 
            -
                check_model_card,
         | 
| 12 | 
             
                already_submitted_models,
         | 
|  | |
|  | |
|  | |
|  | |
| 13 | 
             
            )
         | 
| 14 | 
            -
            from src.envs import RATE_LIMIT_QUOTA, RATE_LIMIT_PERIOD, H4_TOKEN, EVAL_REQUESTS_PATH, API, QUEUE_REPO
         | 
| 15 |  | 
| 16 | 
             
            requested_models, users_to_submission_dates = already_submitted_models(EVAL_REQUESTS_PATH)
         | 
| 17 |  | 
|  | |
| 1 | 
            +
            import json
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
             
            from datetime import datetime, timezone
         | 
| 4 |  | 
| 5 | 
            +
            from src.display.formatting import styled_error, styled_message, styled_warning
         | 
| 6 | 
            +
            from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO, RATE_LIMIT_PERIOD, RATE_LIMIT_QUOTA
         | 
| 7 | 
             
            from src.leaderboard.filter_models import DO_NOT_SUBMIT_MODELS
         | 
| 8 | 
             
            from src.submission.check_validity import (
         | 
|  | |
|  | |
|  | |
|  | |
| 9 | 
             
                already_submitted_models,
         | 
| 10 | 
            +
                check_model_card,
         | 
| 11 | 
            +
                get_model_size,
         | 
| 12 | 
            +
                is_model_on_hub,
         | 
| 13 | 
            +
                user_submission_permission,
         | 
| 14 | 
             
            )
         | 
|  | |
| 15 |  | 
| 16 | 
             
            requested_models, users_to_submission_dates = already_submitted_models(EVAL_REQUESTS_PATH)
         | 
| 17 |  | 
    	
        src/tools/collections.py
    CHANGED
    
    | @@ -1,11 +1,11 @@ | |
| 1 | 
             
            import os
         | 
|  | |
| 2 | 
             
            import pandas as pd
         | 
| 3 | 
            -
            from  | 
| 4 | 
            -
            from huggingface_hub import get_collection, add_collection_item, update_collection_item, delete_collection_item
         | 
| 5 | 
             
            from huggingface_hub.utils._errors import HfHubHTTPError
         | 
|  | |
| 6 |  | 
| 7 | 
             
            from src.display.utils import AutoEvalColumn, ModelType
         | 
| 8 | 
            -
             | 
| 9 | 
             
            from src.envs import H4_TOKEN, PATH_TO_COLLECTION
         | 
| 10 |  | 
| 11 | 
             
            # Specific intervals for the collections
         | 
|  | |
| 1 | 
             
            import os
         | 
| 2 | 
            +
             | 
| 3 | 
             
            import pandas as pd
         | 
| 4 | 
            +
            from huggingface_hub import add_collection_item, delete_collection_item, get_collection, update_collection_item
         | 
|  | |
| 5 | 
             
            from huggingface_hub.utils._errors import HfHubHTTPError
         | 
| 6 | 
            +
            from pandas import DataFrame
         | 
| 7 |  | 
| 8 | 
             
            from src.display.utils import AutoEvalColumn, ModelType
         | 
|  | |
| 9 | 
             
            from src.envs import H4_TOKEN, PATH_TO_COLLECTION
         | 
| 10 |  | 
| 11 | 
             
            # Specific intervals for the collections
         | 
    	
        src/tools/plots.py
    CHANGED
    
    | @@ -1,9 +1,11 @@ | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            import pandas as pd
         | 
| 2 | 
             
            import plotly.express as px
         | 
| 3 | 
             
            from plotly.graph_objs import Figure
         | 
| 4 | 
            -
             | 
| 5 | 
            -
            from datetime import datetime, timezone
         | 
| 6 | 
            -
            from typing import List, Dict, Tuple, Any
         | 
| 7 | 
             
            from src.leaderboard.filter_models import FLAGGED_MODELS
         | 
| 8 |  | 
| 9 | 
             
            # Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
         | 
|  | |
| 1 | 
            +
            import pickle
         | 
| 2 | 
            +
            from datetime import datetime, timezone
         | 
| 3 | 
            +
            from typing import Any, Dict, List, Tuple
         | 
| 4 | 
            +
             | 
| 5 | 
             
            import pandas as pd
         | 
| 6 | 
             
            import plotly.express as px
         | 
| 7 | 
             
            from plotly.graph_objs import Figure
         | 
| 8 | 
            +
             | 
|  | |
|  | |
| 9 | 
             
            from src.leaderboard.filter_models import FLAGGED_MODELS
         | 
| 10 |  | 
| 11 | 
             
            # Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
         | 
 
			
