Implement a Gaussian Naive Bayes classifier from scratch. Fit the model by estimating class priors and per-feature Gaussian parameters (mean and variance), then predict class labels for new data.
For each class, compute the prior probability and the mean/variance of each feature. At prediction time, apply Bayes' theorem using Gaussian likelihoods.
import math
class GaussianNaiveBayes:
def __init__(self) -> None:
self.classes: list[int] = []
self.priors: dict[int, float] = {}
self.means: dict[int, list[float]] = {}
self.variances: dict[int, list[float]] = {}
def fit(self, X: list[list[float]], y: list[int]) -> None:
n = len(X)
dim = len(X[0])
self.classes = sorted(set(y))
for c in self.classes:
members = [X[i] for i in range(n) if y[i] == c]
n_c = len(members)
self.priors[c] = n_c / n
self.means[c] = [sum(m[d] for m in members) / n_c for d in range(dim)]
self.variances[c] = [
sum((m[d] - self.means[c][d]) ** 2 for m in members) / n_c
for d in range(dim)
]
def _gaussian_log_likelihood(self, x: list[float], mean: list[float], var: list[float]) -> float:
log_prob = 0.0
for d in range(len(x)):
v = var[d] + 1e-9 # avoid division by zero
log_prob += -0.5 * math.log(2 * math.pi * v) - 0.5 * ((x[d] - mean[d]) ** 2) / v
return log_prob
def predict(self, X: list[list[float]]) -> list[int]:
predictions = []
for x in X:
best_class = self.classes[0]
best_log_prob = float("-inf")
for c in self.classes:
log_prob = math.log(self.priors[c]) + self._gaussian_log_likelihood(
x, self.means[c], self.variances[c]
)
if log_prob > best_log_prob:
best_log_prob = log_prob
best_class = c
predictions.append(best_class)
return predictions
def gaussian_naive_bayes(X_train, y_train, X_test):
model = GaussianNaiveBayes()
model.fit(X_train, y_train)
return model.predict(X_test)P(C) and estimate the Gaussian parameters (mean and variance) for each feature.log P(C) + sum of log P(x_d | C) for each class. The Gaussian PDF gives the likelihood of each feature value.