#370 · Deep Learning · Medium
⊣ Solve on deep-ml.comImplement He (Kaiming) weight initialization for neural networks with ReLU activations. This method accounts for the fact that ReLU zeros out half the values, requiring a different scaling than Xavier initialization.
import numpy as np
def he_init(fan_in: int, fan_out: int, mode: str = "fan_in") -> np.ndarray:
if mode == "fan_in":
std = np.sqrt(2.0 / fan_in)
elif mode == "fan_out":
std = np.sqrt(2.0 / fan_out)
else:
raise ValueError(f"Unknown mode: {mode}")
return np.random.normal(0, std, (fan_in, fan_out))
def he_init_uniform(fan_in: int, fan_out: int, mode: str = "fan_in") -> np.ndarray:
if mode == "fan_in":
limit = np.sqrt(6.0 / fan_in)
elif mode == "fan_out":
limit = np.sqrt(6.0 / fan_out)
else:
raise ValueError(f"Unknown mode: {mode}")
return np.random.uniform(-limit, limit, (fan_in, fan_out))limit = sqrt(6/fan).fan_in mode preserves forward-pass variance; fan_out mode preserves backward-pass gradient variance.