Spaces:
Running
on
Zero
Running
on
Zero
| import glob | |
| import sys | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download, upload_file | |
| from huggingface_hub.utils._errors import EntryNotFoundError | |
| sys.path.append(".") | |
| from utils import BASE_PATH, FINAL_CSV_FILE, GITHUB_SHA, REPO_ID, collate_csv # noqa: E402 | |
| def has_previous_benchmark() -> str: | |
| csv_path = None | |
| try: | |
| csv_path = hf_hub_download(repo_id=REPO_ID, repo_type="dataset", filename=FINAL_CSV_FILE) | |
| except EntryNotFoundError: | |
| csv_path = None | |
| return csv_path | |
| def filter_float(value): | |
| if isinstance(value, str): | |
| return float(value.split()[0]) | |
| return value | |
| def push_to_hf_dataset(): | |
| all_csvs = sorted(glob.glob(f"{BASE_PATH}/*.csv")) | |
| collate_csv(all_csvs, FINAL_CSV_FILE) | |
| # If there's an existing benchmark file, we should report the changes. | |
| csv_path = has_previous_benchmark() | |
| if csv_path is not None: | |
| current_results = pd.read_csv(FINAL_CSV_FILE) | |
| previous_results = pd.read_csv(csv_path) | |
| numeric_columns = current_results.select_dtypes(include=["float64", "int64"]).columns | |
| numeric_columns = [ | |
| c for c in numeric_columns if c not in ["batch_size", "num_inference_steps", "actual_gpu_memory (gbs)"] | |
| ] | |
| for column in numeric_columns: | |
| previous_results[column] = previous_results[column].map(lambda x: filter_float(x)) | |
| # Calculate the percentage change | |
| current_results[column] = current_results[column].astype(float) | |
| previous_results[column] = previous_results[column].astype(float) | |
| percent_change = ((current_results[column] - previous_results[column]) / previous_results[column]) * 100 | |
| # Format the values with '+' or '-' sign and append to original values | |
| current_results[column] = current_results[column].map(str) + percent_change.map( | |
| lambda x: f" ({'+' if x > 0 else ''}{x:.2f}%)" | |
| ) | |
| # There might be newly added rows. So, filter out the NaNs. | |
| current_results[column] = current_results[column].map(lambda x: x.replace(" (nan%)", "")) | |
| # Overwrite the current result file. | |
| current_results.to_csv(FINAL_CSV_FILE, index=False) | |
| commit_message = f"upload from sha: {GITHUB_SHA}" if GITHUB_SHA is not None else "upload benchmark results" | |
| upload_file( | |
| repo_id=REPO_ID, | |
| path_in_repo=FINAL_CSV_FILE, | |
| path_or_fileobj=FINAL_CSV_FILE, | |
| repo_type="dataset", | |
| commit_message=commit_message, | |
| ) | |
| if __name__ == "__main__": | |
| push_to_hf_dataset() | |