Implement the XGBoost objective function calculation. Given predictions and true labels, compute the gradient and hessian for a specified loss function (squared error or logistic), then calculate the optimal leaf weight and the gain for a potential split.
import numpy as np
from typing import Dict
def sigmoid(x: np.ndarray) -> np.ndarray:
return 1.0 / (1.0 + np.exp(-np.clip(x, -500, 500)))
def compute_gradients(
y_true: np.ndarray,
y_pred: np.ndarray,
objective: str = "squared_error"
) -> Dict[str, np.ndarray]:
if objective == "squared_error":
grad = y_pred - y_true
hess = np.ones_like(y_true)
elif objective == "logistic":
p = sigmoid(y_pred)
grad = p - y_true
hess = p * (1 - p)
else:
raise ValueError(f"Unknown objective: {objective}")
return {"gradient": grad, "hessian": hess}
def compute_leaf_weight(
gradient: np.ndarray,
hessian: np.ndarray,
reg_lambda: float = 1.0
) -> float:
return -float(np.sum(gradient) / (np.sum(hessian) + reg_lambda))
def compute_split_gain(
gradient_left: np.ndarray,
hessian_left: np.ndarray,
gradient_right: np.ndarray,
hessian_right: np.ndarray,
reg_lambda: float = 1.0,
gamma: float = 0.0
) -> float:
def score(g, h):
return (np.sum(g) ** 2) / (np.sum(h) + reg_lambda)
gain = 0.5 * (
score(gradient_left, hessian_left) +
score(gradient_right, hessian_right) -
score(
np.concatenate([gradient_left, gradient_right]),
np.concatenate([hessian_left, hessian_right])
)
) - gamma
return float(gain)