Sonja Topf
changed evaluation
e448508
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
def compute_roc_auc_from_csv(preds_csv: str, labels_csv: str, valid_mask):
"""
Compute ROC AUC per class and overall mean, similar to the PyTorch-style function.
Handles missing labels (NaN) like y_mask.
"""
preds = pd.read_csv(preds_csv)
labels = pd.read_csv(labels_csv)
smiles_cols = [c for c in preds.columns if "smiles" in c.lower()]
if smiles_cols:
print(f"🧪 Dropping SMILES columns: {smiles_cols}")
preds = preds.drop(columns=smiles_cols, errors="ignore")
labels = labels.drop(columns=smiles_cols, errors="ignore")
shared_cols = [c for c in preds.columns if c in labels.columns]
preds = preds[shared_cols].apply(pd.to_numeric, errors="coerce")
labels = labels[shared_cols].apply(pd.to_numeric, errors="coerce")
y_pred_clean = preds.to_numpy(dtype=float)
y_true = labels.to_numpy(dtype=float)
valid_mask = valid_mask[-y_true.shape[0]:]
#Re-expand to original size
y_pred = np.full((len(valid_mask), y_pred_clean.shape[1]), 0.5, dtype=float)
y_pred[valid_mask] = y_pred_clean
y_mask = ~np.isnan(y_true)
auc_list = []
for i in range(y_true.shape[1]):
mask_i = y_mask[:, i]
if mask_i.sum() > 0:
try:
auc = roc_auc_score(y_true[mask_i, i], y_pred[mask_i, i])
except ValueError:
auc = np.nan
else:
auc = np.nan
auc_list.append(auc)
auc_array = np.array(auc_list, dtype=np.float32)
mean_auc = np.nanmean(auc_array)
return auc_array, mean_auc