Implement a KV cache memory budget manager with eviction policies. Given a fixed memory budget (in bytes), manage KV cache entries for multiple sequences. Support eviction policies: LRU (least recently used), LFU (least frequently used), and a token-importance-based policy. Track cache utilization and eviction statistics.
def kv_cache_manager(
memory_budget_bytes: int,
entry_size_bytes: int,
operations: list[dict],
eviction_policy: str = "lru"
) -> dict:
"""
operations: list of {"type": "read"|"write"|"evict_all", "key": str, ...}
entry_size_bytes: bytes per KV cache entry (per token per layer)
"""
max_entries = memory_budget_bytes // entry_size_bytes
cache = {} # key -> {"last_access": int, "frequency": int, "importance": float}
access_counter = 0
stats = {"hits": 0, "misses": 0, "evictions": 0, "writes": 0}
def evict_one():
if not cache:
return
if eviction_policy == "lru":
victim = min(cache, key=lambda k: cache[k]["last_access"])
elif eviction_policy == "lfu":
victim = min(cache, key=lambda k: (cache[k]["frequency"], cache[k]["last_access"]))
elif eviction_policy == "importance":
victim = min(cache, key=lambda k: cache[k]["importance"])
else:
victim = min(cache, key=lambda k: cache[k]["last_access"])
del cache[victim]
stats["evictions"] += 1
results = []
for op in operations:
access_counter += 1
if op["type"] == "read":
key = op["key"]
if key in cache:
cache[key]["last_access"] = access_counter
cache[key]["frequency"] += 1
stats["hits"] += 1
results.append({"key": key, "status": "hit"})
else:
stats["misses"] += 1
results.append({"key": key, "status": "miss"})
elif op["type"] == "write":
key = op["key"]
importance = op.get("importance", 1.0)
if key in cache:
cache[key]["last_access"] = access_counter
cache[key]["frequency"] += 1
cache[key]["importance"] = importance
else:
while len(cache) >= max_entries:
evict_one()
cache[key] = {
"last_access": access_counter,
"frequency": 1,
"importance": importance
}
stats["writes"] += 1
results.append({"key": key, "status": "written"})
elif op["type"] == "evict_all":
stats["evictions"] += len(cache)
cache.clear()
results.append({"status": "cleared"})
total_ops = stats["hits"] + stats["misses"]
hit_rate = stats["hits"] / total_ops if total_ops > 0 else 0.0
return {
"stats": stats,
"hit_rate": round(hit_rate, 4),
"cache_utilization": round(len(cache) / max_entries * 100, 2) if max_entries > 0 else 0,
"current_entries": len(cache),
"max_entries": max_entries,
"memory_used_bytes": len(cache) * entry_size_bytes,
"eviction_policy": eviction_policy
}