Analyze the health of a canary deployment for a model rollout. Given metrics from a canary (new model) and a baseline (current model) — including latency, error rate, and a custom performance score — determine whether the canary is healthy, degraded, or unhealthy based on configurable thresholds.
Compare each metric between canary and baseline. If any metric exceeds the unhealthy threshold the deployment is unhealthy; if any exceeds the degraded threshold it is degraded; otherwise it is healthy.
def analyze_canary_health(
canary_metrics: dict,
baseline_metrics: dict,
thresholds: dict | None = None
) -> dict:
if thresholds is None:
thresholds = {
"latency_ratio_degraded": 1.1,
"latency_ratio_unhealthy": 1.5,
"error_rate_diff_degraded": 0.01,
"error_rate_diff_unhealthy": 0.05,
"score_drop_degraded": 0.02,
"score_drop_unhealthy": 0.05,
}
latency_ratio = canary_metrics["latency"] / max(baseline_metrics["latency"], 1e-9)
error_diff = canary_metrics["error_rate"] - baseline_metrics["error_rate"]
score_drop = baseline_metrics["score"] - canary_metrics["score"]
issues = []
status = "healthy"
# Check latency
if latency_ratio >= thresholds["latency_ratio_unhealthy"]:
status = "unhealthy"
issues.append(f"latency ratio {latency_ratio:.2f} >= {thresholds['latency_ratio_unhealthy']}")
elif latency_ratio >= thresholds["latency_ratio_degraded"]:
if status != "unhealthy":
status = "degraded"
issues.append(f"latency ratio {latency_ratio:.2f} >= {thresholds['latency_ratio_degraded']}")
# Check error rate
if error_diff >= thresholds["error_rate_diff_unhealthy"]:
status = "unhealthy"
issues.append(f"error rate diff {error_diff:.4f} >= {thresholds['error_rate_diff_unhealthy']}")
elif error_diff >= thresholds["error_rate_diff_degraded"]:
if status != "unhealthy":
status = "degraded"
issues.append(f"error rate diff {error_diff:.4f} >= {thresholds['error_rate_diff_degraded']}")
# Check score drop
if score_drop >= thresholds["score_drop_unhealthy"]:
status = "unhealthy"
issues.append(f"score drop {score_drop:.4f} >= {thresholds['score_drop_unhealthy']}")
elif score_drop >= thresholds["score_drop_degraded"]:
if status != "unhealthy":
status = "degraded"
issues.append(f"score drop {score_drop:.4f} >= {thresholds['score_drop_degraded']}")
return {
"status": status,
"latency_ratio": round(latency_ratio, 4),
"error_rate_diff": round(error_diff, 4),
"score_drop": round(score_drop, 4),
"issues": issues,
}