Implement ROC (Receiver Operating Characteristic) Curve calculation from scratch. Given true binary labels and predicted probabilities, compute the true positive rate (TPR) and false positive rate (FPR) at various thresholds.
Sort predictions by descending score, sweep through thresholds, and compute TPR and FPR at each step.
def roc_curve(
y_true: list[int],
y_scores: list[float],
) -> dict:
n = len(y_true)
total_pos = sum(y_true)
total_neg = n - total_pos
if total_pos == 0 or total_neg == 0:
return {"fpr": [0.0, 1.0], "tpr": [0.0, 1.0], "thresholds": [1.0, 0.0]}
# Pair scores with labels and sort by score descending
paired = sorted(zip(y_scores, y_true), key=lambda x: -x[0])
fpr_list = [0.0]
tpr_list = [0.0]
thresholds = [paired[0][0] + 1e-9]
tp = 0
fp = 0
prev_score = None
for score, label in paired:
if score != prev_score and prev_score is not None:
fpr_list.append(fp / total_neg)
tpr_list.append(tp / total_pos)
thresholds.append(score)
if label == 1:
tp += 1
else:
fp += 1
prev_score = score
# Final point (threshold = -inf, all classified as positive)
fpr_list.append(fp / total_neg)
tpr_list.append(tp / total_pos)
thresholds.append(prev_score - 1e-9)
return {
"fpr": [round(f, 6) for f in fpr_list],
"tpr": [round(t, 6) for t in tpr_list],
"thresholds": [round(th, 6) for th in thresholds],
}