#401 · Deep Learning · Easy
⊣ Solve on deep-ml.comImplement Exponential Moving Average (EMA) for diffusion model weights. EMA maintains a shadow copy of model parameters that is a smoothed version of the training parameters, which typically produces better samples.
import numpy as np
class EMA:
def __init__(self, decay: float = 0.999):
self.decay = decay
self.shadow_params = {}
def register(self, params: dict[str, np.ndarray]) -> None:
for name, param in params.items():
self.shadow_params[name] = param.copy()
def update(self, params: dict[str, np.ndarray]) -> None:
for name, param in params.items():
self.shadow_params[name] = (
self.decay * self.shadow_params[name] + (1 - self.decay) * param
)
def get_params(self) -> dict[str, np.ndarray]:
return {name: param.copy() for name, param in self.shadow_params.items()}
def apply_shadow(self, params: dict[str, np.ndarray]) -> dict[str, np.ndarray]:
backup = {name: param.copy() for name, param in params.items()}
for name in params:
params[name] = self.shadow_params[name].copy()
return backup
def restore(self, params: dict[str, np.ndarray], backup: dict[str, np.ndarray]) -> None:
for name in params:
params[name] = backup[name].copy()shadow = decay * shadow + (1 - decay) * param. A high decay (e.g., 0.999 or 0.9999) means the shadow changes slowly.