#429 · Machine Learning · Medium
⊣ Solve on deep-ml.comEvaluate the quality of a quantized model by computing the perplexity delta (change in perplexity) between the original and quantized model. Given token-level log probabilities from both models on a calibration dataset, compute the perplexity of each and the relative change.
import math
def perplexity_delta(
original_log_probs: list[float],
quantized_log_probs: list[float]
) -> dict:
n = len(original_log_probs)
assert n == len(quantized_log_probs) and n > 0
# Perplexity = exp(-1/N * sum(log_probs))
avg_orig = sum(original_log_probs) / n
avg_quant = sum(quantized_log_probs) / n
ppl_orig = math.exp(-avg_orig)
ppl_quant = math.exp(-avg_quant)
absolute_delta = ppl_quant - ppl_orig
relative_delta_pct = (absolute_delta / ppl_orig) * 100 if ppl_orig > 0 else 0.0
# Quality assessment
if relative_delta_pct < 1.0:
quality = "excellent"
elif relative_delta_pct < 5.0:
quality = "good"
elif relative_delta_pct < 10.0:
quality = "acceptable"
else:
quality = "poor"
return {
"original_perplexity": round(ppl_orig, 4),
"quantized_perplexity": round(ppl_quant, 4),
"absolute_delta": round(absolute_delta, 4),
"relative_delta_pct": round(relative_delta_pct, 4),
"quality": quality
}