#432 · Machine Learning · Medium
⊣ Solve on deep-ml.comBuild an n-gram speculation dictionary from a corpus, then use it for draft token prediction. The dictionary maps each n-gram context to the most frequently observed next tokens with their frequencies. At inference time, look up the current context in the dictionary to generate draft tokens without a neural model.
def build_ngram_dict(
corpus_tokens: list[int],
n: int = 3,
max_candidates: int = 5
) -> dict:
ngram_counts = {}
for i in range(len(corpus_tokens) - n):
context = tuple(corpus_tokens[i:i + n])
next_token = corpus_tokens[i + n]
if context not in ngram_counts:
ngram_counts[context] = {}
ngram_counts[context][next_token] = ngram_counts[context].get(next_token, 0) + 1
# Build dictionary with top candidates per context
ngram_dict = {}
for context, counts in ngram_counts.items():
total = sum(counts.values())
sorted_candidates = sorted(counts.items(), key=lambda x: -x[1])[:max_candidates]
ngram_dict[context] = [
{"token": tok, "count": cnt, "prob": round(cnt / total, 4)}
for tok, cnt in sorted_candidates
]
return ngram_dict
def ngram_speculate(
ngram_dict: dict,
context_tokens: list[int],
n: int = 3,
num_draft: int = 5
) -> list[int]:
draft_tokens = []
current = list(context_tokens)
for _ in range(num_draft):
if len(current) < n:
break
context = tuple(current[-n:])
if context not in ngram_dict:
break
# Pick the most likely next token
best = ngram_dict[context][0]["token"]
draft_tokens.append(best)
current.append(best)
return draft_tokens
def ngram_speculation_pipeline(
corpus_tokens: list[int],
context_tokens: list[int],
n: int = 3,
num_draft: int = 5,
max_candidates: int = 5
) -> dict:
ngram_dict = build_ngram_dict(corpus_tokens, n, max_candidates)
draft = ngram_speculate(ngram_dict, context_tokens, n, num_draft)
return {
"dictionary_size": len(ngram_dict),
"draft_tokens": draft,
"num_drafted": len(draft),
"context_used": list(context_tokens[-n:]) if len(context_tokens) >= n else list(context_tokens)
}