Implement MMLU (Massive Multitask Language Understanding) log-probability scoring. Given a prompt, a set of answer choices (A, B, C, D), and the log-probabilities assigned by a language model to each choice token, determine the model's selected answer by picking the choice with the highest log-probability.
import math
from typing import Dict, List
def mmlu_log_prob_scoring(
log_probs: Dict[str, float],
choices: List[str] = ["A", "B", "C", "D"]
) -> Dict:
filtered = {c: log_probs.get(c, float('-inf')) for c in choices}
selected = max(filtered, key=filtered.get)
# Convert log-probs to probabilities via softmax
max_lp = max(filtered.values())
exp_vals = {c: math.exp(lp - max_lp) for c, lp in filtered.items()}
total = sum(exp_vals.values())
probs = {c: exp_vals[c] / total for c in choices}
return {
"selected_answer": selected,
"probabilities": probs,
"log_probs": filtered
}