#19 · Machine Learning · Medium
⊣ Solve on deep-ml.comImplement Principal Component Analysis (PCA) from scratch. Given a dataset, reduce its dimensionality to a specified number of principal components.
import numpy as np
def pca(data: list[list[float]], num_components: int) -> list[list[float]]:
X = np.array(data, dtype=float)
n = X.shape[0]
# Center the data (subtract mean of each feature)
mean = X.mean(axis=0)
X_centered = X - mean
# Compute covariance matrix
cov_matrix = (X_centered.T @ X_centered) / (n - 1)
# Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
# Sort by eigenvalue descending
idx = np.argsort(eigenvalues)[::-1]
eigenvectors = eigenvectors[:, idx]
# Select top num_components eigenvectors
W = eigenvectors[:, :num_components]
# Project data
projected = X_centered @ W
return np.round(projected, 4).tolist()num_components.