#359 · Deep Learning · Medium
⊣ Solve on deep-ml.comImplement distance correlation, a measure of dependence between two random variables that can detect nonlinear relationships (unlike Pearson correlation). Given two sample vectors X and Y, compute their distance correlation.
import numpy as np
def distance_correlation(X: np.ndarray, Y: np.ndarray) -> float:
n = len(X)
X = X.reshape(-1, 1) if X.ndim == 1 else X
Y = Y.reshape(-1, 1) if Y.ndim == 1 else Y
# Compute pairwise distance matrices
def pairwise_dist(Z):
return np.sqrt(np.sum((Z[:, None] - Z[None, :]) ** 2, axis=-1))
a = pairwise_dist(X)
b = pairwise_dist(Y)
# Double center the distance matrices
def double_center(D):
row_mean = D.mean(axis=1, keepdims=True)
col_mean = D.mean(axis=0, keepdims=True)
grand_mean = D.mean()
return D - row_mean - col_mean + grand_mean
A = double_center(a)
B = double_center(b)
# Compute distance covariance and variances
dcov_xy = np.sqrt(np.maximum((A * B).sum() / (n * n), 0))
dvar_x = np.sqrt(np.maximum((A * A).sum() / (n * n), 0))
dvar_y = np.sqrt(np.maximum((B * B).sum() / (n * n), 0))
if dvar_x * dvar_y == 0:
return 0.0
return dcov_xy / np.sqrt(dvar_x * dvar_y)