Compute the derivative of the softmax function with respect to its input logits. Given a vector of logits, return the Jacobian matrix of the softmax output.
import math
def softmax_derivative(logits: list[float]) -> list[list[float]]:
# Compute softmax
max_l = max(logits)
exps = [math.exp(x - max_l) for x in logits]
total = sum(exps)
s = [e / total for e in exps]
n = len(s)
jacobian = [[0.0] * n for _ in range(n)]
for i in range(n):
for j in range(n):
if i == j:
jacobian[i][j] = s[i] * (1 - s[i])
else:
jacobian[i][j] = -s[i] * s[j]
return jacobiandS_i/dz_i = S_i * (1 - S_i)dS_i/dz_j = -S_i * S_jdiag(S) - S * S^T.