#254 · Machine Learning · Medium
⊣ Solve on deep-ml.comCompute the Silhouette Score for a set of clustered data points. For each point, measure how similar it is to its own cluster versus the nearest other cluster, and return the mean silhouette value across all points.
For each point, compute a(i) (mean intra-cluster distance) and b(i) (mean distance to nearest other cluster). The silhouette for point i is (b(i) - a(i)) / max(a(i), b(i)).
import math
def euclidean_distance(p1: list[float], p2: list[float]) -> float:
return math.sqrt(sum((a - b) ** 2 for a, b in zip(p1, p2)))
def silhouette_score(X: list[list[float]], labels: list[int]) -> float:
n = len(X)
if n <= 1:
return 0.0
unique_labels = list(set(labels))
if len(unique_labels) <= 1:
return 0.0
# Group indices by cluster
clusters: dict[int, list[int]] = {}
for i, lab in enumerate(labels):
clusters.setdefault(lab, []).append(i)
silhouettes = []
for i in range(n):
own_cluster = labels[i]
own_members = clusters[own_cluster]
# a(i): mean distance to own cluster (excluding self)
if len(own_members) <= 1:
a_i = 0.0
else:
a_i = sum(euclidean_distance(X[i], X[j]) for j in own_members if j != i) / (len(own_members) - 1)
# b(i): min mean distance to any other cluster
b_i = float("inf")
for lab in unique_labels:
if lab == own_cluster:
continue
members = clusters[lab]
mean_dist = sum(euclidean_distance(X[i], X[j]) for j in members) / len(members)
b_i = min(b_i, mean_dist)
if max(a_i, b_i) == 0:
silhouettes.append(0.0)
else:
silhouettes.append((b_i - a_i) / max(a_i, b_i))
return round(sum(silhouettes) / len(silhouettes), 4)a(i).b(i).(b - a) / max(a, b). Ranges from -1 (bad) to +1 (good).