Handle missing values (NaN) in a dataset by implementing common imputation strategies: mean, median, and most-frequent (mode) imputation for each feature column.
import numpy as np
from collections import Counter
def impute(X: list[list[float]], strategy: str = "mean") -> list[list[float]]:
X = [row[:] for row in X]
n_rows = len(X)
n_cols = len(X[0])
for col in range(n_cols):
values = [X[r][col] for r in range(n_rows) if not np.isnan(X[r][col])]
if not values:
fill = 0.0
elif strategy == "mean":
fill = sum(values) / len(values)
elif strategy == "median":
values.sort()
m = len(values)
fill = (values[m // 2] + values[(m - 1) // 2]) / 2
elif strategy == "most_frequent":
fill = Counter(values).most_common(1)[0][0]
else:
raise ValueError(f"Unknown strategy: {strategy}")
for r in range(n_rows):
if np.isnan(X[r][col]):
X[r][col] = fill
return X