#443 · Machine Learning · Medium
⊣ Solve on deep-ml.comMeasure the quality of embedding quantization by computing the cosine similarity between original full-precision embeddings and their quantized-then-dequantized versions. Given a batch of embedding vectors, a target bit-width, and a quantization scheme (symmetric uniform), return the per-vector and average cosine similarity.
def embedding_quantization_quality(
embeddings: list[list[float]],
bit_width: int
) -> dict:
import math
def dot(a: list[float], b: list[float]) -> float:
return sum(x * y for x, y in zip(a, b))
def norm(a: list[float]) -> float:
return math.sqrt(sum(x * x for x in a))
def cosine_sim(a: list[float], b: list[float]) -> float:
n_a = norm(a)
n_b = norm(b)
if n_a == 0 or n_b == 0:
return 0.0
return dot(a, b) / (n_a * n_b)
def quantize_dequantize(vec: list[float], bits: int) -> list[float]:
if not vec:
return vec
max_abs = max(abs(x) for x in vec)
if max_abs == 0:
return vec[:]
qmax = (1 << (bits - 1)) - 1
scale = max_abs / qmax
quantized = [max(-qmax, min(qmax, round(x / scale))) for x in vec]
return [q * scale for q in quantized]
similarities = []
for emb in embeddings:
dequant = quantize_dequantize(emb, bit_width)
sim = cosine_sim(emb, dequant)
similarities.append(round(sim, 6))
avg_sim = sum(similarities) / len(similarities) if similarities else 0.0
return {
"per_vector_similarity": similarities,
"average_similarity": round(avg_sim, 6)
}scale = max_abs / (2^(bits-1) - 1).dot(a, b) / (||a|| * ||b||).