#350 · Machine Learning · Medium
⊣ Solve on deep-ml.comCalculate the explained variance ratio for PCA. Given data, compute the principal components and determine what fraction of the total variance each component explains.
import numpy as np
from typing import Dict
def pca_explained_variance(
X: np.ndarray,
n_components: int = None
) -> Dict:
n, p = X.shape
if n_components is None:
n_components = min(n, p)
# Center the data
mean = X.mean(axis=0)
X_centered = X - mean
# Covariance matrix
cov = (X_centered.T @ X_centered) / (n - 1)
# Eigendecomposition
eigenvalues, eigenvectors = np.linalg.eigh(cov)
# Sort descending
idx = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[idx]
eigenvectors = eigenvectors[:, idx]
# Total variance
total_variance = np.sum(eigenvalues)
# Explained variance ratio
explained_variance = eigenvalues[:n_components]
explained_ratio = explained_variance / total_variance
cumulative_ratio = np.cumsum(explained_ratio)
# Project data
components = eigenvectors[:, :n_components]
X_transformed = X_centered @ components
return {
"explained_variance": explained_variance.tolist(),
"explained_variance_ratio": [round(float(r), 4) for r in explained_ratio],
"cumulative_variance_ratio": [round(float(r), 4) for r in cumulative_ratio],
"total_variance": round(float(total_variance), 4),
"n_components": n_components,
"transformed_shape": list(X_transformed.shape)
}