#256 · Machine Learning · Medium
⊣ Solve on deep-ml.comCompute the Davies-Bouldin Index to evaluate clustering quality. Lower values indicate better clustering. For each cluster, find the worst-case (most similar) pair and average across all clusters.
For each cluster compute the average distance of points to the centroid (scatter). For each pair of clusters, compute the ratio (scatter_i + scatter_j) / centroid_distance(i, j). The DB index is the mean of the maximum ratio for each cluster.
import math
def davies_bouldin_index(X: list[list[float]], labels: list[int]) -> float:
unique_labels = sorted(set(labels))
k = len(unique_labels)
if k <= 1:
return 0.0
dim = len(X[0])
# Compute centroids and scatter for each cluster
centroids: dict[int, list[float]] = {}
scatters: dict[int, float] = {}
for lab in unique_labels:
members = [X[i] for i in range(len(X)) if labels[i] == lab]
n_m = len(members)
centroid = [sum(members[j][d] for j in range(n_m)) / n_m for d in range(dim)]
centroids[lab] = centroid
scatter = sum(
math.sqrt(sum((members[j][d] - centroid[d]) ** 2 for d in range(dim)))
for j in range(n_m)
) / n_m
scatters[lab] = scatter
def centroid_dist(l1: int, l2: int) -> float:
return math.sqrt(sum((centroids[l1][d] - centroids[l2][d]) ** 2 for d in range(dim)))
db_values = []
for i_idx, li in enumerate(unique_labels):
max_ratio = 0.0
for j_idx, lj in enumerate(unique_labels):
if li == lj:
continue
d_ij = centroid_dist(li, lj)
if d_ij == 0:
continue
ratio = (scatters[li] + scatters[lj]) / d_ij
max_ratio = max(max_ratio, ratio)
db_values.append(max_ratio)
return round(sum(db_values) / len(db_values), 4)R_ij = (S_i + S_j) / d(c_i, c_j).