Calculate SLA (Service Level Agreement) compliance metrics for a model serving endpoint. Given request logs, determine uptime, latency SLA compliance, and availability metrics.
def sla_compliance_metrics(
latencies_ms: list[float],
timestamps: list[float],
errors: list[bool],
latency_sla_ms: float,
availability_sla: float,
window_seconds: float,
) -> dict:
"""
latencies_ms: latency per request (0 for errors)
timestamps: Unix timestamp per request
errors: True if request failed
latency_sla_ms: max acceptable latency (e.g., 200ms)
availability_sla: target availability (e.g., 0.999)
window_seconds: monitoring window in seconds
"""
n = len(latencies_ms)
if n == 0:
return {
"total_requests": 0,
"error_count": 0,
"availability": 1.0,
"availability_sla_met": True,
"latency_sla_compliance": 1.0,
"latency_sla_met": True,
"mean_latency_ms": 0.0,
"p99_latency_ms": 0.0,
}
error_count = sum(1 for e in errors if e)
successful = n - error_count
# Availability = successful requests / total requests
availability = successful / n
# Latency SLA: fraction of successful requests under the latency threshold
successful_latencies = [
latencies_ms[i] for i in range(n) if not errors[i]
]
if successful_latencies:
within_sla = sum(1 for l in successful_latencies if l <= latency_sla_ms)
latency_compliance = within_sla / len(successful_latencies)
sorted_lat = sorted(successful_latencies)
mean_lat = sum(sorted_lat) / len(sorted_lat)
p99_idx = max(0, int(0.99 * len(sorted_lat)) - 1)
p99_lat = sorted_lat[p99_idx]
else:
latency_compliance = 0.0
mean_lat = 0.0
p99_lat = 0.0
return {
"total_requests": n,
"error_count": error_count,
"availability": round(availability, 6),
"availability_sla_met": availability >= availability_sla,
"latency_sla_compliance": round(latency_compliance, 6),
"latency_sla_met": latency_compliance >= 0.95,
"mean_latency_ms": round(mean_lat, 4),
"p99_latency_ms": round(p99_lat, 4),
}