Implement the fit method of the AdaBoost algorithm. Train an ensemble of weak classifiers (decision stumps) on weighted samples, updating sample weights after each round based on misclassification.
import numpy as np
def adaboost_fit(X, y, n_clf=10):
n_samples, n_features = X.shape
w = np.full(n_samples, 1 / n_samples)
classifiers = []
for _ in range(n_clf):
best_stump = None
best_error = float('inf')
best_preds = None
for feature_idx in range(n_features):
thresholds = np.unique(X[:, feature_idx])
for threshold in thresholds:
for polarity in [1, -1]:
preds = np.ones(n_samples)
if polarity == 1:
preds[X[:, feature_idx] < threshold] = -1
else:
preds[X[:, feature_idx] >= threshold] = -1
error = np.sum(w[preds != y])
if error < best_error:
best_error = error
best_stump = {
'feature': feature_idx,
'threshold': threshold,
'polarity': polarity,
}
best_preds = preds.copy()
eps = 1e-10
alpha = 0.5 * np.log((1 - best_error + eps) / (best_error + eps))
w *= np.exp(-alpha * y * best_preds)
w /= np.sum(w)
best_stump['alpha'] = alpha
classifiers.append(best_stump)
return classifiersalpha from the error rate using the AdaBoost formula.