#335 · Deep Learning · Medium
⊣ Solve on deep-ml.comImplement a simplified Paris-style decentralized expert training step. In this setup, multiple expert models train on different data partitions, then periodically synchronize by averaging their parameters, simulating a decentralized learning approach.
import numpy as np
from typing import List, Dict
def init_expert(input_dim: int, output_dim: int, seed: int) -> Dict[str, np.ndarray]:
rng = np.random.RandomState(seed)
return {
"W": rng.randn(input_dim, output_dim) * 0.1,
"b": np.zeros(output_dim)
}
def expert_train_step(
expert: Dict[str, np.ndarray],
X: np.ndarray,
y: np.ndarray,
lr: float = 0.01
) -> Dict[str, np.ndarray]:
# Forward pass (linear model with MSE loss)
pred = X @ expert["W"] + expert["b"]
error = pred - y
n = X.shape[0]
# Gradients
grad_W = (X.T @ error) / n
grad_b = error.mean(axis=0)
# Update
expert["W"] = expert["W"] - lr * grad_W
expert["b"] = expert["b"] - lr * grad_b
return expert
def average_experts(experts: List[Dict[str, np.ndarray]]) -> Dict[str, np.ndarray]:
n = len(experts)
avg = {}
for key in experts[0]:
avg[key] = sum(e[key] for e in experts) / n
return avg
def train_paris_decentralized(
X_partitions: List[np.ndarray],
y_partitions: List[np.ndarray],
n_experts: int,
input_dim: int,
output_dim: int,
n_rounds: int = 10,
local_steps: int = 5,
lr: float = 0.01
) -> Dict[str, np.ndarray]:
experts = [init_expert(input_dim, output_dim, seed=i) for i in range(n_experts)]
for round_idx in range(n_rounds):
# Local training on each expert's partition
for i in range(n_experts):
for step in range(local_steps):
experts[i] = expert_train_step(
experts[i], X_partitions[i], y_partitions[i], lr
)
# Synchronize by averaging
avg_params = average_experts(experts)
for i in range(n_experts):
experts[i] = {k: v.copy() for k, v in avg_params.items()}
return experts[0]