Implement a Bernoulli Naive Bayes classifier from scratch. This classifier works with binary/boolean features and uses Bernoulli distributions to model feature likelihoods.
import numpy as np
class BernoulliNaiveBayes:
def __init__(self, alpha: float = 1.0):
self.alpha = alpha # Laplace smoothing
self.class_priors = None
self.feature_probs = None
self.classes = None
def fit(self, X: np.ndarray, y: np.ndarray):
self.classes = np.unique(y)
n_classes = len(self.classes)
n_features = X.shape[1]
self.class_priors = np.zeros(n_classes)
self.feature_probs = np.zeros((n_classes, n_features))
for i, c in enumerate(self.classes):
X_c = X[y == c]
self.class_priors[i] = len(X_c) / len(X)
# P(x_j=1 | class=c) with Laplace smoothing
self.feature_probs[i] = (X_c.sum(axis=0) + self.alpha) / (len(X_c) + 2 * self.alpha)
return self
def predict(self, X: np.ndarray) -> np.ndarray:
log_priors = np.log(self.class_priors)
predictions = []
for x in X:
log_likelihoods = []
for i in range(len(self.classes)):
p = self.feature_probs[i]
# Bernoulli likelihood: prod(p^x * (1-p)^(1-x))
log_likelihood = np.sum(
x * np.log(p) + (1 - x) * np.log(1 - p)
)
log_likelihoods.append(log_priors[i] + log_likelihood)
predictions.append(self.classes[np.argmax(log_likelihoods)])
return np.array(predictions)p_j^x_j * (1 - p_j)^(1 - x_j), accounting for both presence (x=1) and absence (x=0).