#193 · Machine Learning · Medium
⊣ Solve on deep-ml.comCompute a Confusion Matrix from true labels and predicted labels, with optional normalization (by row to get recall-based normalization, by column for precision-based, or overall).
import numpy as np
def confusion_matrix(y_true: list, y_pred: list,
normalize: str = None) -> np.ndarray:
y_true = np.array(y_true)
y_pred = np.array(y_pred)
classes = np.unique(np.concatenate([y_true, y_pred]))
n_classes = len(classes)
class_to_idx = {c: i for i, c in enumerate(classes)}
cm = np.zeros((n_classes, n_classes), dtype=float)
for t, p in zip(y_true, y_pred):
cm[class_to_idx[t], class_to_idx[p]] += 1
if normalize == "true": # normalize over predicted (rows)
row_sums = cm.sum(axis=1, keepdims=True)
row_sums[row_sums == 0] = 1
cm = cm / row_sums
elif normalize == "pred": # normalize over columns
col_sums = cm.sum(axis=0, keepdims=True)
col_sums[col_sums == 0] = 1
cm = cm / col_sums
elif normalize == "all":
total = cm.sum()
if total > 0:
cm = cm / total
return cm"true" divides each row by its sum (gives recall per class), "pred" divides each column by its sum (gives precision per class), "all" divides by the total count.