Solve the Gambler's Problem using value iteration. A gambler bets on coin flips: heads wins the stake, tails loses it. The gambler wins if they reach a target capital, loses if they go broke. Find the optimal value function and policy.
import numpy as np
def gamblers_problem(goal: int = 100, p_head: float = 0.4,
theta: float = 1e-10, max_iter: int = 10000):
V = np.zeros(goal + 1)
V[goal] = 1.0
for _ in range(max_iter):
delta = 0.0
for s in range(1, goal):
max_stake = min(s, goal - s)
values = []
for stake in range(1, max_stake + 1):
win_state = s + stake
lose_state = s - stake
val = p_head * V[win_state] + (1 - p_head) * V[lose_state]
values.append(val)
v = max(values)
delta = max(delta, abs(v - V[s]))
V[s] = v
if delta < theta:
break
policy = np.zeros(goal + 1, dtype=int)
for s in range(1, goal):
max_stake = min(s, goal - s)
best_action = 1
best_value = -1.0
for stake in range(1, max_stake + 1):
val = p_head * V[s + stake] + (1 - p_head) * V[s - stake]
if val > best_value:
best_value = val
best_action = stake
policy[s] = best_action
return V, policymin(s, goal - s) dollars.p_head capital increases by the stake; with probability 1 - p_head it decreases.V(s) = max_a [p * V(s+a) + (1-p) * V(s-a)].