Implement the LoRA (Low-Rank Adaptation) forward pass. Given a frozen pre-trained weight matrix W and two low-rank matrices A and B, compute the adapted output: y = (W + alpha/r * B @ A) @ x.
def lora_forward(
x: list[list[float]],
W: list[list[float]],
A: list[list[float]],
B: list[list[float]],
alpha: float,
r: int,
) -> list[list[float]]:
def matmul(a, b):
rows_a, cols_a = len(a), len(a[0])
cols_b = len(b[0])
result = [[0.0] * cols_b for _ in range(rows_a)]
for i in range(rows_a):
for k in range(cols_a):
for j in range(cols_b):
result[i][j] += a[i][k] * b[k][j]
return result
def add_mat(a, b):
return [[a[i][j] + b[i][j] for j in range(len(a[0]))] for i in range(len(a))]
def scale_mat(m, s):
return [[m[i][j] * s for j in range(len(m[0]))] for i in range(len(m))]
# Frozen output
Wx = matmul(W, x)
# LoRA delta: (alpha / r) * B @ A @ x
Ax = matmul(A, x)
BAx = matmul(B, Ax)
delta = scale_mat(BAx, alpha / r)
return add_mat(Wx, delta)y = W @ x + (alpha / r) * B @ A @ x.alpha / r controls the magnitude of the low-rank adaptation.