Calculate the correlation matrix for a dataset. Given a 2D NumPy array where each column is a feature, compute the Pearson correlation coefficient between every pair of features.
import numpy as np
def calculate_correlation_matrix(X):
n_features = X.shape[1]
means = np.mean(X, axis=0)
stds = np.std(X, axis=0, ddof=0)
corr = np.zeros((n_features, n_features))
for i in range(n_features):
for j in range(n_features):
if stds[i] == 0 or stds[j] == 0:
corr[i][j] = 0.0 if i != j else 1.0
else:
cov = np.mean((X[:, i] - means[i]) * (X[:, j] - means[j]))
corr[i][j] = cov / (stds[i] * stds[j])
return corr.tolist()(i, j), compute the covariance as the mean of the product of deviations from the mean.