Implement a Decision Tree classifier from scratch using information gain (entropy-based) to select the best feature splits.
import math
from collections import Counter
def decision_tree(examples: list[dict], features: list[str], target: str) -> dict:
# Get all target values
labels = [ex[target] for ex in examples]
label_counts = Counter(labels)
# If all examples have the same label, return that label
if len(label_counts) == 1:
return labels[0]
# If no features left, return majority label
if not features:
return label_counts.most_common(1)[0][0]
# Calculate entropy of current set
def entropy(data):
total = len(data)
if total == 0:
return 0
counts = Counter(d[target] for d in data)
return -sum((c / total) * math.log2(c / total) for c in counts.values() if c > 0)
# Find best feature by information gain
current_entropy = entropy(examples)
best_gain = -1
best_feature = None
for feature in features:
values = set(ex[feature] for ex in examples)
weighted_entropy = 0
for val in values:
subset = [ex for ex in examples if ex[feature] == val]
weighted_entropy += (len(subset) / len(examples)) * entropy(subset)
gain = current_entropy - weighted_entropy
if gain > best_gain:
best_gain = gain
best_feature = feature
# Build tree
tree = {best_feature: {}}
remaining_features = [f for f in features if f != best_feature]
values = set(ex[best_feature] for ex in examples)
for val in values:
subset = [ex for ex in examples if ex[best_feature] == val]
if not subset:
tree[best_feature][val] = label_counts.most_common(1)[0][0]
else:
tree[best_feature][val] = decision_tree(subset, remaining_features, target)
return tree