Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from sklearn.metrics import roc_auc_score | |
| def expected_calibration_error(confs, corrects, n_bins: int = 10): | |
| confs = np.array(confs, dtype=float) | |
| corrects = np.array(corrects, dtype=int) | |
| if len(confs) == 0: | |
| return None | |
| bins = np.linspace(0.0, 1.0, n_bins+1) | |
| ece = 0.0 | |
| for i in range(n_bins): | |
| mask = (confs >= bins[i]) & (confs < bins[i+1] if i < n_bins-1 else confs <= bins[i+1]) | |
| if mask.any(): | |
| acc = corrects[mask].mean() | |
| conf = confs[mask].mean() | |
| ece += (mask.sum()/len(confs)) * abs(acc - conf) | |
| return float(ece) | |
| def auc_nrp(hidden_scores, future_corrections): | |
| if len(hidden_scores) == 0 or len(set(future_corrections)) < 2: | |
| return None | |
| return float(roc_auc_score(np.array(future_corrections).astype(int), np.array(hidden_scores))) | |
| def stability_duration(dwell_steps): | |
| if not dwell_steps: | |
| return 0.0 | |
| return float(np.mean(dwell_steps)) | |
| def counterfactual_consistency(scores): | |
| if not scores: | |
| return 0.0 | |
| return float(np.mean(scores)) | |