Spaces:
Sleeping
Sleeping
| """ | |
| Handle submissions to the GuardBench leaderboard. | |
| """ | |
| import json | |
| import os | |
| import tempfile | |
| from datetime import datetime | |
| from typing import Dict, List, Tuple | |
| import shutil | |
| from huggingface_hub import HfApi | |
| from datasets import load_dataset | |
| import subprocess | |
| from src.display.formatting import styled_error, styled_message | |
| from src.envs import RESULTS_DATASET_ID, TOKEN | |
| from src.leaderboard.processor import process_jsonl_submission | |
| from guardbench.evaluator import Evaluator | |
| from guardbench.context import GuardbenchContext | |
| from guardbench.models_config import ModelType | |
| def validate_submission(file_path: str) -> Tuple[bool, str]: | |
| """ | |
| Validate a submission file. | |
| """ | |
| try: | |
| entries, message = process_jsonl_submission(file_path) | |
| if not entries: | |
| return False, message | |
| return True, "Submission is valid" | |
| except Exception as e: | |
| return False, f"Error validating submission: {e}" | |
| def submit_entry_to_hub(entry: Dict, model_name: str, version="v0") -> Tuple[bool, str]: | |
| """ | |
| Submit a model's evaluation entry to the HuggingFace dataset. | |
| """ | |
| try: | |
| # Create safe model name for file path | |
| model_name_safe = model_name.replace("/", "_").replace(" ", "_") | |
| # Create entry path in entries folder | |
| entry_path = f"entries/entry_{model_name_safe}_{version}.json" | |
| # Save entry to temporary file | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: | |
| json.dump(entry, temp_file, indent=2) | |
| temp_path = temp_file.name | |
| # Upload file | |
| api = HfApi(token=TOKEN) | |
| api.upload_file( | |
| path_or_fileobj=temp_path, | |
| path_in_repo=entry_path, | |
| repo_id=RESULTS_DATASET_ID, | |
| repo_type="dataset", | |
| commit_message=f"Add evaluation entry for {model_name} (version {version})" | |
| ) | |
| os.unlink(temp_path) | |
| return True, f"Successfully uploaded evaluation entry for {model_name}" | |
| except Exception as e: | |
| return False, f"Error submitting entry to dataset: {e}" | |
| def submit_leaderboard_to_hub(entries: List[Dict], version="v0") -> Tuple[bool, str]: | |
| """ | |
| Submit updated leaderboard to the HuggingFace dataset. | |
| """ | |
| try: | |
| # Create leaderboard data | |
| leaderboard_data = { | |
| "entries": entries, | |
| "last_updated": datetime.now().isoformat(), | |
| "version": version | |
| } | |
| # Save to temporary file | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as temp_file: | |
| json.dump(leaderboard_data, temp_file, indent=2) | |
| temp_path = temp_file.name | |
| # Upload file | |
| api = HfApi(token=TOKEN) | |
| api.upload_file( | |
| path_or_fileobj=temp_path, | |
| path_in_repo=f"leaderboards/leaderboard_{version}.json", | |
| repo_id=RESULTS_DATASET_ID, | |
| repo_type="dataset", | |
| commit_message=f"Update leaderboard for version {version}" | |
| ) | |
| os.unlink(temp_path) | |
| return True, "Leaderboard updated successfully" | |
| except Exception as e: | |
| return False, f"Error updating leaderboard: {e}" | |
| def process_submission(file_path: str, metadata: Dict, version="v0") -> str: | |
| """ | |
| Process a submission to the GuardBench leaderboard. | |
| """ | |
| try: | |
| # Validate submission | |
| is_valid, validation_message = validate_submission(file_path) | |
| if not is_valid: | |
| return styled_error(validation_message) | |
| # Get GuardBench results directory path | |
| guardbench_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "guard-bench-submodule") | |
| results_dir = os.path.join(guardbench_dir, "results") | |
| os.makedirs(results_dir, exist_ok=True) | |
| # Copy submission to GuardBench results directory | |
| model_name = metadata.get("model_name", "unknown") | |
| model_name_safe = model_name.replace("/", "_").replace(" ", "_") | |
| guard_model_type = metadata.get("guard_model_type", "unknown") | |
| target_file = os.path.join(results_dir + "/guardbench_dataset_1k_public", f"{model_name_safe}.jsonl") | |
| # Upload raw submission file | |
| api = HfApi(token=TOKEN) | |
| submission_path = f"submissions_{version}/{model_name_safe}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.jsonl" | |
| api.upload_file( | |
| path_or_fileobj=file_path, | |
| path_in_repo=submission_path, | |
| repo_id=RESULTS_DATASET_ID, | |
| repo_type="dataset", | |
| commit_message=f"Add raw submission for {model_name}" | |
| ) | |
| os.makedirs(results_dir + "/guardbench_dataset_1k_public", exist_ok=True) | |
| # (f"Submission path: {submission_path}") | |
| # print(f"Target file: {target_file}") | |
| # printprint(f"Results dir: {results_dir}") | |
| shutil.copy2(file_path, target_file) | |
| # print(f"Copied file to target file: {target_file}") | |
| # print(f" ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/: {subprocess.check_output('ls /home/user/app/guard-bench-submodule/results/guardbench_dataset_1k_public/', shell=True).decode('utf-8')}") | |
| try: | |
| # Initialize GuardBench context | |
| ctx = GuardbenchContext() | |
| # Set results directory | |
| ctx.results_dir = results_dir | |
| # Set bench name from the results directory | |
| ctx.bench_name = "guardbench_dataset_1k_public" | |
| # Load dataset | |
| ctx.load_dataset("whitecircle-ai/guardbench_dataset_1k_public") | |
| # Mark as initialized | |
| ctx.is_initialized = True | |
| evaluator = Evaluator(ctx, force=True, using_cached=True) | |
| # Run evaluation and get entry | |
| evaluator.evaluate_model(model_name_safe, str(guard_model_type).lower()) | |
| # Get the entry from results | |
| with open(os.path.join(results_dir + "/" + ctx.bench_name, "leaderboard.json"), 'r') as f: | |
| results_data = json.load(f) | |
| model_entry = next( | |
| (entry for entry in results_data.get("entries", []) | |
| if entry.get("model_name") == model_name_safe), | |
| None | |
| ) | |
| if not model_entry: | |
| return styled_error("No evaluation results found") | |
| # Add metadata to entry | |
| model_entry.update({ | |
| "model_name": metadata.get("model_name"), # Use original model name | |
| "model_type": metadata.get("model_type"), | |
| "guard_model_type": str(metadata.get("guard_model_type")).lower(), | |
| "base_model": metadata.get("base_model"), | |
| "revision": metadata.get("revision"), | |
| "precision": metadata.get("precision"), | |
| "weight_type": metadata.get("weight_type"), | |
| "version": version, | |
| "submission_date": datetime.now().isoformat() | |
| }) | |
| # Submit entry to entries folder | |
| success, message = submit_entry_to_hub(model_entry, model_name, version) | |
| if not success: | |
| return styled_error(message) | |
| # Get all entries from HF dataset | |
| api = HfApi(token=TOKEN) | |
| files = api.list_repo_files(repo_id=RESULTS_DATASET_ID, repo_type="dataset") | |
| entry_files = [f for f in files if f.startswith("entries/") and f.endswith(f"_{version}.json")] | |
| all_entries = [] | |
| for entry_file in entry_files: | |
| try: | |
| entry_path = api.hf_hub_download( | |
| repo_id=RESULTS_DATASET_ID, | |
| filename=entry_file, | |
| repo_type="dataset", | |
| ) | |
| with open(entry_path, 'r') as f: | |
| entry_data = json.load(f) | |
| all_entries.append(entry_data) | |
| except Exception as e: | |
| print(f"Error loading entry {entry_file}: {e}") | |
| # Update leaderboard with all entries | |
| success, message = submit_leaderboard_to_hub(all_entries, version) | |
| if not success: | |
| return styled_error(message) | |
| return styled_message(f"Submission successful! Model evaluated and leaderboard updated.") | |
| except Exception as eval_error: | |
| return styled_error(f"Error during evaluation: {eval_error}") | |
| except Exception as e: | |
| return styled_error(f"Error processing submission: {e}") | |
| finally: | |
| # Clean up temporary files | |
| try: | |
| if os.path.exists(file_path): | |
| os.remove(file_path) | |
| if os.path.exists(target_file): | |
| os.remove(target_file) | |
| except: | |
| pass | |