#364 · Machine Learning · Medium
⊣ Solve on deep-ml.comImplement agglomerative (bottom-up) hierarchical clustering. Start with each data point as its own cluster, then iteratively merge the two closest clusters until the desired number of clusters is reached. Support single, complete, and average linkage.
import numpy as np
def agglomerative_clustering(
X: np.ndarray, n_clusters: int, linkage: str = "single"
) -> np.ndarray:
n = X.shape[0]
# Initialize each point as its own cluster
clusters = {i: [i] for i in range(n)}
labels = np.arange(n)
# Precompute pairwise distances
dists = np.sum((X[:, None] - X[None, :]) ** 2, axis=2)
np.fill_diagonal(dists, np.inf)
current_clusters = set(range(n))
while len(current_clusters) > n_clusters:
# Find the two closest clusters
min_dist = np.inf
merge_a, merge_b = -1, -1
cluster_list = sorted(current_clusters)
for i in range(len(cluster_list)):
for j in range(i + 1, len(cluster_list)):
ci, cj = cluster_list[i], cluster_list[j]
pts_i, pts_j = clusters[ci], clusters[cj]
if linkage == "single":
d = min(dists[a, b] for a in pts_i for b in pts_j)
elif linkage == "complete":
d = max(dists[a, b] for a in pts_i for b in pts_j)
else: # average
d = np.mean([dists[a, b] for a in pts_i for b in pts_j])
if d < min_dist:
min_dist = d
merge_a, merge_b = ci, cj
# Merge clusters
clusters[merge_a] = clusters[merge_a] + clusters[merge_b]
del clusters[merge_b]
current_clusters.remove(merge_b)
# Assign labels
result = np.zeros(n, dtype=int)
for label, (_, members) in enumerate(sorted(clusters.items())):
for m in members:
result[m] = label
return resultn_clusters clusters remain and assign labels.