Implement the DBSCAN (Density-Based Spatial Clustering of Applications with Noise) algorithm from scratch. Given data points, an epsilon radius, and a minimum number of points, cluster the data and identify noise points.
For each unvisited point, find its epsilon-neighborhood. If it has enough neighbors it becomes a core point and starts a cluster; otherwise it is (initially) labeled as noise. Expand clusters by recursively adding density-reachable points.
import math
def dbscan(
X: list[list[float]], eps: float = 0.5, min_pts: int = 5
) -> list[int]:
n = len(X)
labels = [-1] * n # -1 means noise
visited = [False] * n
cluster_id = 0
def distance(a: list[float], b: list[float]) -> float:
return math.sqrt(sum((x - y) ** 2 for x, y in zip(a, b)))
def region_query(idx: int) -> list[int]:
neighbors = []
for j in range(n):
if distance(X[idx], X[j]) <= eps:
neighbors.append(j)
return neighbors
def expand_cluster(idx: int, neighbors: list[int], cid: int) -> None:
labels[idx] = cid
queue = list(neighbors)
i = 0
while i < len(queue):
q = queue[i]
if not visited[q]:
visited[q] = True
q_neighbors = region_query(q)
if len(q_neighbors) >= min_pts:
queue.extend(q_neighbors)
if labels[q] == -1:
labels[q] = cid
i += 1
for i in range(n):
if visited[i]:
continue
visited[i] = True
neighbors = region_query(i)
if len(neighbors) < min_pts:
labels[i] = -1 # noise (may be reassigned later)
else:
expand_cluster(i, neighbors, cluster_id)
cluster_id += 1
return labelseps.min_pts neighbors, it is a core point — start a new cluster.