Implement SMOTE (Synthetic Minority Over-sampling Technique) to handle imbalanced classification data. Given minority class samples and a desired number of synthetic samples, generate new samples by interpolating between each minority sample and its nearest neighbors.
import numpy as np
def smote(X_minority: np.ndarray, n_synthetic: int, k: int = 5) -> np.ndarray:
n_samples, n_features = X_minority.shape
k = min(k, n_samples - 1)
# Compute pairwise distances and find k nearest neighbors
dists = np.sum((X_minority[:, None] - X_minority[None, :]) ** 2, axis=2)
neighbors = np.argsort(dists, axis=1)[:, 1:k+1]
synthetic = np.zeros((n_synthetic, n_features))
for i in range(n_synthetic):
idx = i % n_samples
nn_idx = neighbors[idx, np.random.randint(0, k)]
lam = np.random.random()
synthetic[i] = X_minority[idx] + lam * (X_minority[nn_idx] - X_minority[idx])
return syntheticx_new = x + lambda * (x_neighbor - x).