import pandas as pd import numpy as np import math from typing import List def compute_mean(list_of_values: List[float]) -> float: return sum(list_of_values) / len(list_of_values) def compute_surprisal(p: float) -> float: return -math.log2(p) if p and p > 0 else float("inf") def compute_avg_surprisal(probs: pd.Series) -> float: as_surprisal = probs.apply(compute_surprisal) return as_surprisal.mean() def compute_average_surprisal_difference( correct_form_probs: pd.Series, wrong_form_probs: pd.Series ) -> float: correct_form_avg_surp = compute_avg_surprisal(correct_form_probs) wrong_form_avg_surp = compute_avg_surprisal(wrong_form_probs) return wrong_form_avg_surp - correct_form_avg_surp def compute_normalised_surprisal_difference( correct_form_probs: pd.Series, wrong_form_probs: pd.Series ) -> float: correct_form_avg_surp = compute_avg_surprisal(correct_form_probs) wrong_form_avg_surp = compute_avg_surprisal(wrong_form_probs) return (wrong_form_avg_surp - correct_form_avg_surp) / correct_form_avg_surp def compute_entropy(probs, k=None, normalise=False): probs = np.array(probs, dtype=np.float64) # remove zeros to avoid log(0) probs = probs[probs > 0] # get top-k probabilities if k is not None: probs = np.sort(probs)[::-1][:k] probs = probs / probs.sum() # renormalize to sum to 1 H = -np.sum(probs * np.log(probs)) if normalise: n = len(probs) return H, 1 - H / np.log(n) else: return H def get_predictions(df: pd.DataFrame) -> np.ndarray: """ Convert probabilities to binary predictions. Predicts grammatical (1) if p_grammatical > p_ungrammatical, else ungrammatical (0). """ predictions = (df['p_grammatical'] > df['p_ungrammatical']).astype(int) return predictions.values def calculate_accuracy(df: pd.DataFrame) -> float: """ Calculate accuracy: proportion of correct predictions. Assumes the model should always predict grammatical form (label = 1). """ predictions = get_predictions(df) # True labels: all should be grammatical (1) true_labels = np.ones(len(df), dtype=int) correct = np.sum(predictions == true_labels) total = len(predictions) return correct / total if total > 0 else 0.0 def calculate_all_metrics(df: pd.DataFrame) -> dict: accuracy = calculate_accuracy(df) return { 'accuracy': round(accuracy,2), }