Implement a custom dense (fully connected) layer. The layer performs a linear transformation output = X @ W + b and optionally applies an activation function. Implement both forward pass and backward pass (gradient computation).
import numpy as np
class DenseLayer:
def __init__(self, input_size, output_size, activation=None, seed=None):
if seed is not None:
np.random.seed(seed)
self.W = np.random.randn(input_size, output_size) * 0.01
self.b = np.zeros((1, output_size))
self.activation = activation
self.input = None
self.z = None
def relu(self, x):
return np.maximum(0, x)
def relu_deriv(self, x):
return (x > 0).astype(float)
def sigmoid(self, x):
return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
def sigmoid_deriv(self, x):
s = self.sigmoid(x)
return s * (1 - s)
def forward(self, X):
self.input = X
self.z = X @ self.W + self.b
if self.activation == 'relu':
return self.relu(self.z)
elif self.activation == 'sigmoid':
return self.sigmoid(self.z)
return self.z
def backward(self, d_out, lr=0.01):
if self.activation == 'relu':
d_out = d_out * self.relu_deriv(self.z)
elif self.activation == 'sigmoid':
d_out = d_out * self.sigmoid_deriv(self.z)
dW = self.input.T @ d_out / self.input.shape[0]
db = np.mean(d_out, axis=0, keepdims=True)
d_input = d_out @ self.W.T
self.W -= lr * dW
self.b -= lr * db
return d_inputz = X @ W + b, then apply the activation function if specified.