#449 · Machine Learning · Medium
⊣ Solve on deep-ml.comBuild an autoscaling replica simulator with SLA tracking. Given a time series of request rates, a per-replica throughput capacity, scale-up and scale-down thresholds (utilization-based), scaling cooldown periods, and an SLA latency target, simulate the autoscaler and report per-interval replica counts, utilization, and SLA violations.
def autoscaling_simulator(
request_rates: list[float],
replica_capacity: float,
scale_up_threshold: float,
scale_down_threshold: float,
cooldown_steps: int,
min_replicas: int = 1,
max_replicas: int = 100
) -> dict:
replicas = min_replicas
cooldown_remaining = 0
history = []
sla_violations = 0
for t, rate in enumerate(request_rates):
total_capacity = replicas * replica_capacity
utilization = rate / total_capacity if total_capacity > 0 else 1.0
violated = rate > total_capacity
if violated:
sla_violations += 1
dropped = rate - total_capacity
else:
dropped = 0
history.append({
"step": t,
"request_rate": rate,
"replicas": replicas,
"utilization": round(min(utilization, 1.0), 4),
"sla_violated": violated,
"dropped_requests": round(dropped, 2)
})
if cooldown_remaining > 0:
cooldown_remaining -= 1
continue
if utilization > scale_up_threshold:
needed = int(rate / (replica_capacity * scale_up_threshold)) + 1
new_replicas = min(needed, max_replicas)
if new_replicas > replicas:
replicas = new_replicas
cooldown_remaining = cooldown_steps
elif utilization < scale_down_threshold:
needed = max(int(rate / (replica_capacity * scale_up_threshold)) + 1, min_replicas)
new_replicas = max(needed, min_replicas)
if new_replicas < replicas:
replicas = new_replicas
cooldown_remaining = cooldown_steps
total_intervals = len(request_rates)
violation_rate = sla_violations / total_intervals if total_intervals > 0 else 0.0
return {
"history": history,
"total_sla_violations": sla_violations,
"violation_rate": round(violation_rate, 4)
}request_rate / (replicas * replica_capacity).