Implement INT8 quantization for neural network weights. Convert floating-point weights to 8-bit integers using a scale and zero-point, and implement dequantization to recover approximate float values.
import numpy as np
def quantize_int8(weights: np.ndarray) -> dict:
w_min = weights.min()
w_max = weights.max()
qmin = -128
qmax = 127
scale = (w_max - w_min) / (qmax - qmin)
if scale == 0:
scale = 1.0
zero_point = int(np.round(qmin - w_min / scale))
zero_point = np.clip(zero_point, qmin, qmax)
quantized = np.round(weights / scale + zero_point).astype(np.int8)
quantized = np.clip(quantized, qmin, qmax).astype(np.int8)
return {"quantized": quantized, "scale": float(scale), "zero_point": int(zero_point)}
def dequantize_int8(quantized: np.ndarray, scale: float, zero_point: int) -> np.ndarray:
return (quantized.astype(np.float32) - zero_point) * scale