Implement a bagging (Bootstrap Aggregating) classifier from scratch. Train multiple base classifiers on bootstrap samples of the training data and aggregate predictions using majority voting.
import numpy as np
from collections import Counter
class BaggingClassifier:
def __init__(self, base_classifier_class, n_estimators: int = 10,
sample_fraction: float = 1.0, **base_params):
self.base_classifier_class = base_classifier_class
self.n_estimators = n_estimators
self.sample_fraction = sample_fraction
self.base_params = base_params
self.estimators = []
self.oob_indices = []
def _bootstrap_sample(self, X: np.ndarray, y: np.ndarray):
n = len(y)
sample_size = int(n * self.sample_fraction)
indices = np.random.choice(n, size=sample_size, replace=True)
oob = list(set(range(n)) - set(indices))
return X[indices], y[indices], oob
def fit(self, X: np.ndarray, y: np.ndarray):
self.estimators = []
self.oob_indices = []
for _ in range(self.n_estimators):
X_boot, y_boot, oob = self._bootstrap_sample(X, y)
clf = self.base_classifier_class(**self.base_params)
clf.fit(X_boot, y_boot)
self.estimators.append(clf)
self.oob_indices.append(oob)
return self
def predict(self, X: np.ndarray) -> np.ndarray:
all_preds = np.array([clf.predict(X) for clf in self.estimators])
# all_preds shape: (n_estimators, n_samples)
final_preds = []
for i in range(X.shape[0]):
votes = all_preds[:, i].tolist()
counter = Counter(votes)
final_preds.append(counter.most_common(1)[0][0])
return np.array(final_preds)
def oob_score(self, X: np.ndarray, y: np.ndarray) -> float:
n = len(y)
oob_preds = {}
for est_idx, (clf, oob) in enumerate(zip(self.estimators, self.oob_indices)):
if len(oob) == 0:
continue
preds = clf.predict(X[oob])
for sample_idx, pred in zip(oob, preds):
if sample_idx not in oob_preds:
oob_preds[sample_idx] = []
oob_preds[sample_idx].append(pred)
correct = 0
total = 0
for idx, preds in oob_preds.items():
majority = Counter(preds).most_common(1)[0][0]
if majority == y[idx]:
correct += 1
total += 1
return correct / total if total > 0 else 0.0