Implement learned positional embeddings for a transformer model. Given a maximum sequence length and embedding dimension, create a learnable embedding table that maps each position to a vector that is added to token embeddings.
import numpy as np
class LearnedPositionalEmbedding:
def __init__(self, max_len: int, d_model: int):
self.max_len = max_len
self.d_model = d_model
# Initialize with small random values
self.embedding = np.random.normal(0, 0.02, (max_len, d_model))
def forward(self, seq_len: int) -> np.ndarray:
return self.embedding[:seq_len]
def __call__(self, token_embeddings: np.ndarray) -> np.ndarray:
seq_len = token_embeddings.shape[-2]
pos_emb = self.embedding[:seq_len]
return token_embeddings + pos_emb