#109 · Machine Learning · Medium
⊣ Solve on deep-ml.comImplement Layer Normalization for sequence data. Given an input tensor of shape (batch_size, seq_len, d_model), normalize over the last dimension (features) for each position independently. Optionally apply learnable scale (gamma) and shift (beta) parameters.
import numpy as np
def layer_normalization(x: np.ndarray, gamma: np.ndarray = None, beta: np.ndarray = None, eps: float = 1e-5) -> np.ndarray:
# Normalize over last dimension
mean = np.mean(x, axis=-1, keepdims=True)
var = np.var(x, axis=-1, keepdims=True)
x_norm = (x - mean) / np.sqrt(var + eps)
if gamma is not None:
x_norm = x_norm * gamma
if beta is not None:
x_norm = x_norm + beta
return x_norm