DanielGallagherIRE's picture
Upload metrics.py
6d26940 verified
import pandas as pd
import numpy as np
import math
from typing import List
def compute_mean(list_of_values: List[float]) -> float:
return sum(list_of_values) / len(list_of_values)
def compute_surprisal(p: float) -> float:
return -math.log2(p) if p and p > 0 else float("inf")
def compute_avg_surprisal(probs: pd.Series) -> float:
as_surprisal = probs.apply(compute_surprisal)
return as_surprisal.mean()
def compute_average_surprisal_difference(
correct_form_probs: pd.Series, wrong_form_probs: pd.Series
) -> float:
correct_form_avg_surp = compute_avg_surprisal(correct_form_probs)
wrong_form_avg_surp = compute_avg_surprisal(wrong_form_probs)
return wrong_form_avg_surp - correct_form_avg_surp
def compute_normalised_surprisal_difference(
correct_form_probs: pd.Series, wrong_form_probs: pd.Series
) -> float:
correct_form_avg_surp = compute_avg_surprisal(correct_form_probs)
wrong_form_avg_surp = compute_avg_surprisal(wrong_form_probs)
return (wrong_form_avg_surp - correct_form_avg_surp) / correct_form_avg_surp
def compute_entropy(probs, k=None, normalise=False):
probs = np.array(probs, dtype=np.float64)
# remove zeros to avoid log(0)
probs = probs[probs > 0]
# get top-k probabilities
if k is not None:
probs = np.sort(probs)[::-1][:k]
probs = probs / probs.sum() # renormalize to sum to 1
H = -np.sum(probs * np.log(probs))
if normalise:
n = len(probs)
return H, 1 - H / np.log(n)
else:
return H
def get_predictions(df: pd.DataFrame) -> np.ndarray:
"""
Convert probabilities to binary predictions.
Predicts grammatical (1) if p_grammatical > p_ungrammatical, else ungrammatical (0).
"""
predictions = (df['p_grammatical'] > df['p_ungrammatical']).astype(int)
return predictions.values
def calculate_accuracy(df: pd.DataFrame) -> float:
"""
Calculate accuracy: proportion of correct predictions.
Assumes the model should always predict grammatical form (label = 1).
"""
predictions = get_predictions(df)
# True labels: all should be grammatical (1)
true_labels = np.ones(len(df), dtype=int)
correct = np.sum(predictions == true_labels)
total = len(predictions)
return correct / total if total > 0 else 0.0
def calculate_all_metrics(df: pd.DataFrame) -> dict:
accuracy = calculate_accuracy(df)
return {
'accuracy': round(accuracy,2),
}