Implement Mixed Precision Training utilities: convert a model's forward pass to use half precision (float16) for speed, while keeping the master weights in full precision (float32) for accuracy. Include loss scaling to prevent gradient underflow.
import numpy as np
def mixed_precision_step(weights_fp32: np.ndarray, inputs: np.ndarray,
targets: np.ndarray, lr: float,
loss_scale: float = 1024.0):
weights_fp16 = weights_fp32.astype(np.float16)
inputs_fp16 = inputs.astype(np.float16)
predictions = inputs_fp16 @ weights_fp16
loss = np.mean((predictions.astype(np.float32) - targets) ** 2)
error = 2.0 * (predictions.astype(np.float32) - targets) / len(targets)
scaled_error = (error * loss_scale).astype(np.float16)
grads_fp16 = inputs_fp16.T @ scaled_error
grads_fp32 = grads_fp16.astype(np.float32) / loss_scale
weights_fp32 = weights_fp32 - lr * grads_fp32
return weights_fp32, float(loss)