#47 · Machine Learning · Medium
⊣ Solve on deep-ml.comImplement gradient descent variants (batch, stochastic, and mini-batch) to optimize a linear regression model using MSE loss. Given features X, targets y, learning rate, and number of epochs, return the learned weights.
import numpy as np
def gradient_descent(X, y, lr=0.01, epochs=100, method='batch', batch_size=32, seed=None):
if seed is not None:
np.random.seed(seed)
X = np.array(X, dtype=np.float64)
y = np.array(y, dtype=np.float64).reshape(-1, 1)
n_samples, n_features = X.shape
weights = np.zeros((n_features, 1))
for _ in range(epochs):
if method == 'batch':
predictions = X @ weights
error = predictions - y
gradient = (2 / n_samples) * (X.T @ error)
weights -= lr * gradient
elif method == 'stochastic':
indices = np.arange(n_samples)
np.random.shuffle(indices)
for i in indices:
xi = X[i:i+1]
yi = y[i:i+1]
pred = xi @ weights
gradient = 2 * xi.T * (pred - yi)
weights -= lr * gradient
elif method == 'mini_batch':
indices = np.arange(n_samples)
np.random.shuffle(indices)
for start in range(0, n_samples, batch_size):
batch_idx = indices[start:start + batch_size]
X_batch = X[batch_idx]
y_batch = y[batch_idx]
pred = X_batch @ weights
gradient = (2 / len(batch_idx)) * (X_batch.T @ (pred - y_batch))
weights -= lr * gradient
return weights.flatten().tolist()(2/n) * X^T @ (X @ w - y).