Implement black jack as bottom up

This commit is contained in:
Etienne Zind 2023-02-25 08:50:21 +01:00
parent c6ab274222
commit 50e5223f48

View File

@ -23,6 +23,8 @@ hand of at least 17, then stop.
The players strategy in our case is really just choosing how many times
they will hit.
Improvements: see "TODO" tags
"""
from typing import NamedTuple
@ -35,45 +37,28 @@ import itertools
class Card(NamedTuple):
"""A playing card."""
label: str
"""A string representation of a card: 'A ♣', '10 ♠', 'K ♦', ..."""
value: int
Deck = list[Card]
#
# Constant construction
#
SUITES = "♦ ♣ ♥ ♠".split()
LOW = "A 2 3 4 5 6 7 8 9 10".split()
HIGH = "J Q K".split()
DECKS = 4
"""Total number of decks used."""
CARDS: Deck = [
Card((rank + suite), value)
for value, rank in itertools.chain(
((i, rank) for i, rank in enumerate(LOW, 1)),
((10, rank) for rank in HIGH))
for suite in SUITES
for _ in range(DECKS)
]
"""DECKS lots of standard decks of cards."""
N = len(CARDS)
"""Total number of cards."""
"""The value associated with the card. (see "Evaluating hand's score")."""
class Hand(NamedTuple):
"""Hand value accumulable."""
points: int = 0
"""Current hand value."""
hard: bool = False
"""Whether it contains an ace."""
@classmethod
def new(cls, card1, card2):
"""Create a new hand from first two cards."""
hand = Hand().draw(card1).draw(card2)
return Hand(hand.points, hand.hard)
def evaluate(self) -> tuple[int, bool]:
"""Return (score, hard) for that hand.
@ -94,120 +79,226 @@ class Hand(NamedTuple):
return score
def draw(self, card: Card):
return Hand(
self.points + card.value,
self.hard | (card.value == 1)
)
"""Implement the action of drawing a card."""
return Hand(self.points + card.value, self.hard | (card.value == 1))
def cmp(a, b):
class SubGame(NamedTuple):
""" "A sub-problem game."""
hits: int
"""The player's hit count."""
player_score: int
"""The player's score."""
dealer_score: int
"""The dealers's score."""
outcome: int
"""The outcome {-1, 0, 1}."""
class SubResult(NamedTuple):
""" "A sub-problem result node."""
gain: int
"""Total gain of the sequence."""
parent_pointer: int
"""Pointer to previous node."""
sub_game: SubGame
"""The sub-problem game."""
Shoe = list[Card]
class Game(NamedTuple):
"""A human readable game representation."""
hits: int
"""Number of player hits."""
player_cards: list[str]
"""Player's cards."""
player_score: int
"""Player's score."""
dealer_cards: list[str]
"""Dealers's cards."""
dealer_score: int
"""Dealers's score."""
outcome: int
"""The outcome {-1, 0, 1}."""
#
# Constant construction
#
SUITES = "♦ ♣ ♥ ♠".split()
LOW = "A 2 3 4 5 6 7 8 9 10".split()
HIGH = "J Q K".split()
CARDS: list[Card] = [
Card((rank + " " + suite), value)
for value, rank in itertools.chain(
((i, rank) for i, rank in enumerate(LOW, 1)), ((10, rank)
for rank in HIGH)
)
for suite in SUITES
]
"""Standard deck of cards."""
def random_shoe(decks: int):
"""Return shuffled `decks` lots decks."""
shoe = decks * CARDS
random.shuffle(shoe)
return shoe
def cmp(a: int, b: int):
"""Compare two values, return from {-1, 0, 1}."""
return (a > b) - (a < b)
def black_jack(deck: Deck):
""""""
def dealers_turn_at(shoe: Shoe, k: int, dealer_hand: Hand):
"""Compute dealer's score when start hitting on `k`.
memo: dict[int, int] = {}
stack: dict[int, tuple[tuple[int, int], int] | None] = {}
Return:
dealer's score: int
next deck index: int
"""
for l in itertools.count(k):
dealer_hand = dealer_hand.draw(shoe[l])
# TODO: implement hard/soft 17
# Some black-jack rule variations asks the dealer to hit on a "hard 17", i.e.
# a 17 made from an Ace evaluated at 11.
dealer_score, _hard = dealer_hand.evaluate()
if 0 < dealer_score < 17:
continue
else:
# warning `l + 1` might be out of shoe by one
return dealer_score, l + 1
def dealer_hits(k: int, dealer_hand: Hand):
"""Compute dealer's score when start hitting on `k`.
# this never happens, for type hinting only
raise NotImplementedError
Return:
dealer's score: int
next `k`: int
"""
for l in itertools.count(k):
dealer_hand = dealer_hand.draw(deck[l])
# TODO: implement hard/soft 17
dealer_score, _hard = dealer_hand.evaluate()
if 0 < dealer_score < 17:
continue
def iter_sub_games_at(shoe: Shoe, i: int):
"""Iterate over possible games from index `i`.
Yields:
sub-problem game: SubGame
next index `i`: int
"""
try:
# an index counter
ci = itertools.count(i)
# dealer and player get 2 cards each
player_hand = Hand.new(shoe[next(ci)], shoe[next(ci)])
dealer_hand = Hand.new(shoe[next(ci)], shoe[next(ci)])
# player will incrementally hit (start with no hits)
for hits in itertools.count(0):
# TODO: immediate win on natural black jack
player_score = player_hand.evaluate_score()
dealer_score, j = dealers_turn_at(shoe, k := next(ci), dealer_hand)
# yield game with that number of hits
outcome = cmp(player_score, dealer_score)
yield SubGame(hits, player_score, dealer_score, outcome), j
if 0 < player_score < 21:
# player hits (from saved card index)
player_hand = player_hand.draw(shoe[k])
else:
return dealer_score, l + 1
# player stop hitting
break
# this never happens, for type hinting only
raise NotImplementedError
except IndexError:
# we made it to the end of the deck: no more game
return
def games(i: int):
"""Iterator over possible games from index `i`.
Yields:
game's outcome: {-1, 0, 1}
player's hit count: int
next index `i`: int
"""
try:
# an index counter
ci = itertools.count(i)
def black_jack(shoe: Shoe):
"""Solve the full knowledge black jack problem."""
N = len(shoe)
# dealer and player get 2 cards each
player_hand = Hand().draw(deck[next(ci)]).draw(deck[next(ci)])
dealer_hand = Hand().draw(deck[next(ci)]).draw(deck[next(ci)])
# The sub results chain
#
# We add a node that will be forever `None` as it is possible for
# the `iter_sub_games_at()` to return a "next index" of N.
# Adding that node relieves us of having `Optional[int]` values and the
# checks coming with them.
sub_results: list[SubResult | None] = [None] * (N + 1)
# player will incrementally hit (start with no hits)
for hits in itertools.count(0):
player_score = player_hand.evaluate_score()
dealer_score, j = dealer_hits(k := next(ci), dealer_hand)
# yield outcome of that choice of hit count
yield cmp(player_score, dealer_score), hits, j
if 0 < player_score < 21:
# player hits (from saved card index)
player_hand = player_hand.draw(deck[k])
else:
# player stop hitting
break
except IndexError:
# we made it to the end of the deck: no more game
return
def dp(i: int):
"""Recursive subproblem: return memoized max gain for deck[i:]."""
# check if memoized yet
max_gain = memo.get(i, None)
if max_gain is not None:
return max_gain
# base case, returned if no game is possible (deck is depleted)
# Bottom-up dynamic programming implementation
for i in reversed(range(N)):
# Base case: returned if no game is possible (deck is depleted)
max_gain = 0
record = None
sub_result = None
# compute actual value, making "free" recursive calls
for outcome, hits, j in games(i):
gain = outcome + dp(j)
# Try all possible games starting at `i` and keep the one maximizing
# overall gain using pre-computed results (`j` always greater than `i`)
for sub_game, next_index in iter_sub_games_at(shoe, i):
prev_node = sub_results[next_index]
gain = sub_game.outcome + (prev_node[0] if prev_node else 0)
if gain > max_gain:
max_gain = gain
record = (outcome, hits), j
sub_result = SubResult(gain, next_index, sub_game)
# set the memoization register and stack
memo[i] = max_gain
stack[i] = record
sub_results[i] = sub_result
return max_gain
# Job is done, now we visit the results chain from 0 to reconstruct the games sequence.
games: list[Game] = []
# call base case
dp(0)
i = 0
while sub_result := sub_results[i]:
_gain, next_index, sub_game = sub_result
parent_pointer = 0
while record := stack[parent_pointer]:
game, parent_pointer = record
yield game
player_dealt = range(i, i + 2)
dealer_dealt = range(player_dealt.stop, player_dealt.stop + 2)
player_hit = range(dealer_dealt.stop,
dealer_dealt.stop + sub_game.hits)
dealer_hit = range(player_hit.stop, next_index)
games.append(
Game(
sub_game.hits,
[shoe[j].label for j in itertools.chain(
player_dealt, player_hit)],
sub_game.player_score,
[shoe[j].label for j in itertools.chain(
dealer_dealt, dealer_hit)],
sub_game.dealer_score,
sub_game.outcome
)
)
i = next_index
return games
deck = CARDS[:]
random.shuffle(deck)
gain = 0
games = 0
if __name__ == "__main__":
OUTCOME_LABELS = {-1: "looses 😞", 0: "ties 😕", 1: "wins 😀"}
print(f"Games:")
for i, (outcome, hits) in enumerate(black_jack(deck), 1):
print(f"{i:2}. {hits} {outcome}")
gain += outcome
games = i
shoe = random_shoe(4)
print("Shoe:")
print(f"Gains: {gain} ({gain/games}/games)")
gain = 0
games = black_jack(shoe)
print(f"Games:")
for i, (
hits,
player_cards,
player_score,
dealer_cards,
dealer_score,
outcome,
) in enumerate(games, 1):
print(f"{i:2}. player hits {hits} and {OUTCOME_LABELS[outcome]}")
print(f" {' '.join(player_cards)} ({player_score})")
print(f" {' '.join(dealer_cards)} ({dealer_score})")
gain += outcome
print(f"Gain: {gain/len(games):0.2f} per game on average")