Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.metrics import roc_auc_score | |
| def compute_roc_auc_from_csv(preds_csv: str, labels_csv: str, valid_mask): | |
| """ | |
| Compute ROC AUC per class and overall mean, similar to the PyTorch-style function. | |
| Handles missing labels (NaN) like y_mask. | |
| """ | |
| preds = pd.read_csv(preds_csv) | |
| labels = pd.read_csv(labels_csv) | |
| smiles_cols = [c for c in preds.columns if "smiles" in c.lower()] | |
| if smiles_cols: | |
| print(f"🧪 Dropping SMILES columns: {smiles_cols}") | |
| preds = preds.drop(columns=smiles_cols, errors="ignore") | |
| labels = labels.drop(columns=smiles_cols, errors="ignore") | |
| shared_cols = [c for c in preds.columns if c in labels.columns] | |
| preds = preds[shared_cols].apply(pd.to_numeric, errors="coerce") | |
| labels = labels[shared_cols].apply(pd.to_numeric, errors="coerce") | |
| y_pred_clean = preds.to_numpy(dtype=float) | |
| y_true = labels.to_numpy(dtype=float) | |
| valid_mask = valid_mask[-y_true.shape[0]:] | |
| #Re-expand to original size | |
| y_pred = np.full((len(valid_mask), y_pred_clean.shape[1]), 0.5, dtype=float) | |
| y_pred[valid_mask] = y_pred_clean | |
| y_mask = ~np.isnan(y_true) | |
| auc_list = [] | |
| for i in range(y_true.shape[1]): | |
| mask_i = y_mask[:, i] | |
| if mask_i.sum() > 0: | |
| try: | |
| auc = roc_auc_score(y_true[mask_i, i], y_pred[mask_i, i]) | |
| except ValueError: | |
| auc = np.nan | |
| else: | |
| auc = np.nan | |
| auc_list.append(auc) | |
| auc_array = np.array(auc_list, dtype=np.float32) | |
| mean_auc = np.nanmean(auc_array) | |
| return auc_array, mean_auc |