Implement a stacking (stacked generalization) classifier. Train multiple base classifiers, use their predictions as features for a meta-classifier, and use cross-validation to prevent overfitting.
import numpy as np
from typing import List, Dict, Callable
def simple_knn_predict(X_train, y_train, X_test, k=3):
preds = []
for x in X_test:
dists = np.sqrt(np.sum((X_train - x) ** 2, axis=1))
idx = np.argsort(dists)[:k]
vals, counts = np.unique(y_train[idx], return_counts=True)
preds.append(vals[np.argmax(counts)])
return np.array(preds)
def simple_centroid_predict(X_train, y_train, X_test):
classes = np.unique(y_train)
centroids = {c: X_train[y_train == c].mean(axis=0) for c in classes}
preds = []
for x in X_test:
best = min(centroids, key=lambda c: np.linalg.norm(x - centroids[c]))
preds.append(best)
return np.array(preds)
def stacking_classifier(
X: np.ndarray,
y: np.ndarray,
X_test: np.ndarray,
n_folds: int = 5,
seed: int = 42
) -> Dict:
np.random.seed(seed)
n = len(y)
indices = np.arange(n)
np.random.shuffle(indices)
folds = np.array_split(indices, n_folds)
base_learners = [
lambda Xtr, ytr, Xte: simple_knn_predict(Xtr, ytr, Xte, k=3),
lambda Xtr, ytr, Xte: simple_knn_predict(Xtr, ytr, Xte, k=5),
lambda Xtr, ytr, Xte: simple_centroid_predict(Xtr, ytr, Xte),
]
n_base = len(base_learners)
# Generate meta-features via cross-validation
meta_train = np.zeros((n, n_base))
for fold_idx, val_idx in enumerate(folds):
train_idx = np.concatenate([f for i, f in enumerate(folds) if i != fold_idx])
X_tr, y_tr = X[train_idx], y[train_idx]
X_val = X[val_idx]
for b, learner in enumerate(base_learners):
meta_train[val_idx, b] = learner(X_tr, y_tr, X_val)
# Generate meta-features for test set (train on full data)
meta_test = np.zeros((len(X_test), n_base))
for b, learner in enumerate(base_learners):
meta_test[:, b] = learner(X, y, X_test)
# Meta-classifier: majority vote of meta-features
final_train_preds = np.array([
np.round(np.median(meta_train[i])).astype(int) for i in range(n)
])
final_test_preds = np.array([
np.round(np.median(meta_test[i])).astype(int) for i in range(len(X_test))
])
train_acc = np.mean(final_train_preds == y)
return {
"test_predictions": final_test_preds.tolist(),
"train_accuracy": round(float(train_acc), 4),
"n_base_learners": n_base,
"meta_features_shape": list(meta_train.shape)
}