#105 · Machine Learning · Hard
⊣ Solve on deep-ml.comTrain a softmax regression (multinomial logistic regression) model using gradient descent. Given features and multi-class labels, learn weight parameters that minimize the cross-entropy loss and return the trained weights.
import numpy as np
def softmax_regression(X: np.ndarray, y: np.ndarray, n_classes: int, lr: float = 0.1, epochs: int = 1000) -> np.ndarray:
n_samples, n_features = X.shape
W = np.zeros((n_features, n_classes))
# One-hot encode labels
Y_onehot = np.zeros((n_samples, n_classes))
Y_onehot[np.arange(n_samples), y.astype(int)] = 1
for _ in range(epochs):
# Compute softmax probabilities
logits = X @ W
logits -= np.max(logits, axis=1, keepdims=True) # numerical stability
exp_logits = np.exp(logits)
probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
# Gradient of cross-entropy loss
gradient = X.T @ (probs - Y_onehot) / n_samples
W -= lr * gradient
return Wexp(z_i) / sum(exp(z_j)). Subtract the max for numerical stability.X^T (probs - Y_onehot) / n, which is the generalization of the logistic regression gradient to multiple classes.