Implement the Apriori algorithm for mining frequent itemsets. Given a list of transactions and a minimum support threshold, find all itemsets that appear in at least the specified fraction of transactions.
from itertools import combinations
def apriori(transactions: list[set], min_support: float) -> list[tuple]:
n_transactions = len(transactions)
min_count = min_support * n_transactions
# Count support for an itemset
def get_support_count(itemset):
count = 0
itemset_set = set(itemset)
for t in transactions:
if itemset_set.issubset(t):
count += 1
return count
# Find frequent 1-itemsets
all_items = set()
for t in transactions:
all_items.update(t)
frequent_itemsets = []
current_frequent = []
for item in all_items:
count = get_support_count([item])
if count >= min_count:
current_frequent.append(frozenset([item]))
frequent_itemsets.append((frozenset([item]), count / n_transactions))
k = 2
while current_frequent:
# Generate candidate k-itemsets from (k-1)-itemsets
candidates = set()
items_list = list(current_frequent)
for i in range(len(items_list)):
for j in range(i + 1, len(items_list)):
union = items_list[i] | items_list[j]
if len(union) == k:
candidates.add(union)
# Prune: all (k-1)-subsets must be frequent
freq_set = set(current_frequent)
pruned = []
for c in candidates:
all_sub_frequent = True
for item in c:
subset = c - frozenset([item])
if subset not in freq_set:
all_sub_frequent = False
break
if all_sub_frequent:
pruned.append(c)
# Count support
current_frequent = []
for itemset in pruned:
count = get_support_count(itemset)
if count >= min_count:
current_frequent.append(itemset)
frequent_itemsets.append((itemset, count / n_transactions))
k += 1
return frequent_itemsets