Implement the InfoNCE contrastive loss used in SimCLR-style self-supervised learning. Given a batch of paired embeddings (positive pairs from augmentations), compute the loss that pulls positive pairs together while pushing apart negatives.
import numpy as np
def contrastive_loss(features: np.ndarray, temperature: float = 0.5) -> float:
# features shape: (2N, d) where first N and last N form positive pairs
n = features.shape[0] // 2
# Normalize features
features = features / np.linalg.norm(features, axis=1, keepdims=True)
# Compute similarity matrix
sim_matrix = features @ features.T / temperature
# Create labels: positive pair for i is i+n (and vice versa)
total = 2 * n
loss = 0.0
for i in range(total):
pos_idx = (i + n) % total
# Mask out self-similarity
mask = np.ones(total, dtype=bool)
mask[i] = False
logits = sim_matrix[i][mask]
# Find position of positive in masked array
pos_in_masked = pos_idx if pos_idx < i else pos_idx - 1
# Log-softmax for the positive
log_sum_exp = np.log(np.sum(np.exp(logits - np.max(logits)))) + np.max(logits)
loss += -logits[pos_in_masked] + log_sum_exp
return float(loss / total)