Implement black jack as bottom up
This commit is contained in:
parent
c6ab274222
commit
50e5223f48
327
black_jack.py
327
black_jack.py
|
@ -23,6 +23,8 @@ hand of at least 17, then stop.
|
|||
|
||||
The players strategy in our case is really just choosing how many times
|
||||
they will hit.
|
||||
|
||||
Improvements: see "TODO" tags
|
||||
"""
|
||||
|
||||
from typing import NamedTuple
|
||||
|
@ -35,45 +37,28 @@ import itertools
|
|||
|
||||
|
||||
class Card(NamedTuple):
|
||||
"""A playing card."""
|
||||
|
||||
label: str
|
||||
"""A string representation of a card: 'A ♣', '10 ♠', 'K ♦', ..."""
|
||||
value: int
|
||||
|
||||
|
||||
Deck = list[Card]
|
||||
|
||||
|
||||
#
|
||||
# Constant construction
|
||||
#
|
||||
SUITES = "♦ ♣ ♥ ♠".split()
|
||||
|
||||
LOW = "A 2 3 4 5 6 7 8 9 10".split()
|
||||
HIGH = "J Q K".split()
|
||||
|
||||
DECKS = 4
|
||||
"""Total number of decks used."""
|
||||
|
||||
CARDS: Deck = [
|
||||
Card((rank + suite), value)
|
||||
for value, rank in itertools.chain(
|
||||
((i, rank) for i, rank in enumerate(LOW, 1)),
|
||||
((10, rank) for rank in HIGH))
|
||||
for suite in SUITES
|
||||
for _ in range(DECKS)
|
||||
]
|
||||
"""DECKS lots of standard decks of cards."""
|
||||
|
||||
N = len(CARDS)
|
||||
"""Total number of cards."""
|
||||
"""The value associated with the card. (see "Evaluating hand's score")."""
|
||||
|
||||
|
||||
class Hand(NamedTuple):
|
||||
"""Hand value accumulable."""
|
||||
|
||||
points: int = 0
|
||||
"""Current hand value."""
|
||||
hard: bool = False
|
||||
"""Whether it contains an ace."""
|
||||
|
||||
@classmethod
|
||||
def new(cls, card1, card2):
|
||||
"""Create a new hand from first two cards."""
|
||||
hand = Hand().draw(card1).draw(card2)
|
||||
return Hand(hand.points, hand.hard)
|
||||
|
||||
def evaluate(self) -> tuple[int, bool]:
|
||||
"""Return (score, hard) for that hand.
|
||||
|
||||
|
@ -94,120 +79,226 @@ class Hand(NamedTuple):
|
|||
return score
|
||||
|
||||
def draw(self, card: Card):
|
||||
return Hand(
|
||||
self.points + card.value,
|
||||
self.hard | (card.value == 1)
|
||||
)
|
||||
"""Implement the action of drawing a card."""
|
||||
return Hand(self.points + card.value, self.hard | (card.value == 1))
|
||||
|
||||
|
||||
def cmp(a, b):
|
||||
class SubGame(NamedTuple):
|
||||
""" "A sub-problem game."""
|
||||
|
||||
hits: int
|
||||
"""The player's hit count."""
|
||||
player_score: int
|
||||
"""The player's score."""
|
||||
dealer_score: int
|
||||
"""The dealers's score."""
|
||||
outcome: int
|
||||
"""The outcome {-1, 0, 1}."""
|
||||
|
||||
|
||||
class SubResult(NamedTuple):
|
||||
""" "A sub-problem result node."""
|
||||
|
||||
gain: int
|
||||
"""Total gain of the sequence."""
|
||||
parent_pointer: int
|
||||
"""Pointer to previous node."""
|
||||
sub_game: SubGame
|
||||
"""The sub-problem game."""
|
||||
|
||||
|
||||
Shoe = list[Card]
|
||||
|
||||
|
||||
class Game(NamedTuple):
|
||||
"""A human readable game representation."""
|
||||
|
||||
hits: int
|
||||
"""Number of player hits."""
|
||||
player_cards: list[str]
|
||||
"""Player's cards."""
|
||||
player_score: int
|
||||
"""Player's score."""
|
||||
dealer_cards: list[str]
|
||||
"""Dealers's cards."""
|
||||
dealer_score: int
|
||||
"""Dealers's score."""
|
||||
outcome: int
|
||||
"""The outcome {-1, 0, 1}."""
|
||||
|
||||
|
||||
#
|
||||
# Constant construction
|
||||
#
|
||||
SUITES = "♦ ♣ ♥ ♠".split()
|
||||
|
||||
LOW = "A 2 3 4 5 6 7 8 9 10".split()
|
||||
HIGH = "J Q K".split()
|
||||
|
||||
CARDS: list[Card] = [
|
||||
Card((rank + " " + suite), value)
|
||||
for value, rank in itertools.chain(
|
||||
((i, rank) for i, rank in enumerate(LOW, 1)), ((10, rank)
|
||||
for rank in HIGH)
|
||||
)
|
||||
for suite in SUITES
|
||||
]
|
||||
"""Standard deck of cards."""
|
||||
|
||||
|
||||
def random_shoe(decks: int):
|
||||
"""Return shuffled `decks` lots decks."""
|
||||
shoe = decks * CARDS
|
||||
random.shuffle(shoe)
|
||||
return shoe
|
||||
|
||||
|
||||
def cmp(a: int, b: int):
|
||||
"""Compare two values, return from {-1, 0, 1}."""
|
||||
return (a > b) - (a < b)
|
||||
|
||||
|
||||
def black_jack(deck: Deck):
|
||||
""""""
|
||||
def dealers_turn_at(shoe: Shoe, k: int, dealer_hand: Hand):
|
||||
"""Compute dealer's score when start hitting on `k`.
|
||||
|
||||
memo: dict[int, int] = {}
|
||||
stack: dict[int, tuple[tuple[int, int], int] | None] = {}
|
||||
Return:
|
||||
dealer's score: int
|
||||
next deck index: int
|
||||
"""
|
||||
for l in itertools.count(k):
|
||||
dealer_hand = dealer_hand.draw(shoe[l])
|
||||
# TODO: implement hard/soft 17
|
||||
# Some black-jack rule variations asks the dealer to hit on a "hard 17", i.e.
|
||||
# a 17 made from an Ace evaluated at 11.
|
||||
dealer_score, _hard = dealer_hand.evaluate()
|
||||
if 0 < dealer_score < 17:
|
||||
continue
|
||||
else:
|
||||
# warning `l + 1` might be out of shoe by one
|
||||
return dealer_score, l + 1
|
||||
|
||||
def dealer_hits(k: int, dealer_hand: Hand):
|
||||
"""Compute dealer's score when start hitting on `k`.
|
||||
# this never happens, for type hinting only
|
||||
raise NotImplementedError
|
||||
|
||||
Return:
|
||||
dealer's score: int
|
||||
next `k`: int
|
||||
"""
|
||||
for l in itertools.count(k):
|
||||
dealer_hand = dealer_hand.draw(deck[l])
|
||||
# TODO: implement hard/soft 17
|
||||
dealer_score, _hard = dealer_hand.evaluate()
|
||||
if 0 < dealer_score < 17:
|
||||
continue
|
||||
|
||||
def iter_sub_games_at(shoe: Shoe, i: int):
|
||||
"""Iterate over possible games from index `i`.
|
||||
|
||||
Yields:
|
||||
sub-problem game: SubGame
|
||||
next index `i`: int
|
||||
"""
|
||||
try:
|
||||
# an index counter
|
||||
ci = itertools.count(i)
|
||||
|
||||
# dealer and player get 2 cards each
|
||||
player_hand = Hand.new(shoe[next(ci)], shoe[next(ci)])
|
||||
dealer_hand = Hand.new(shoe[next(ci)], shoe[next(ci)])
|
||||
|
||||
# player will incrementally hit (start with no hits)
|
||||
for hits in itertools.count(0):
|
||||
# TODO: immediate win on natural black jack
|
||||
player_score = player_hand.evaluate_score()
|
||||
dealer_score, j = dealers_turn_at(shoe, k := next(ci), dealer_hand)
|
||||
|
||||
# yield game with that number of hits
|
||||
outcome = cmp(player_score, dealer_score)
|
||||
yield SubGame(hits, player_score, dealer_score, outcome), j
|
||||
|
||||
if 0 < player_score < 21:
|
||||
# player hits (from saved card index)
|
||||
player_hand = player_hand.draw(shoe[k])
|
||||
else:
|
||||
return dealer_score, l + 1
|
||||
# player stop hitting
|
||||
break
|
||||
|
||||
# this never happens, for type hinting only
|
||||
raise NotImplementedError
|
||||
except IndexError:
|
||||
# we made it to the end of the deck: no more game
|
||||
return
|
||||
|
||||
def games(i: int):
|
||||
"""Iterator over possible games from index `i`.
|
||||
|
||||
Yields:
|
||||
game's outcome: {-1, 0, 1}
|
||||
player's hit count: int
|
||||
next index `i`: int
|
||||
"""
|
||||
try:
|
||||
# an index counter
|
||||
ci = itertools.count(i)
|
||||
def black_jack(shoe: Shoe):
|
||||
"""Solve the full knowledge black jack problem."""
|
||||
N = len(shoe)
|
||||
|
||||
# dealer and player get 2 cards each
|
||||
player_hand = Hand().draw(deck[next(ci)]).draw(deck[next(ci)])
|
||||
dealer_hand = Hand().draw(deck[next(ci)]).draw(deck[next(ci)])
|
||||
# The sub results chain
|
||||
#
|
||||
# We add a node that will be forever `None` as it is possible for
|
||||
# the `iter_sub_games_at()` to return a "next index" of N.
|
||||
# Adding that node relieves us of having `Optional[int]` values and the
|
||||
# checks coming with them.
|
||||
sub_results: list[SubResult | None] = [None] * (N + 1)
|
||||
|
||||
# player will incrementally hit (start with no hits)
|
||||
for hits in itertools.count(0):
|
||||
player_score = player_hand.evaluate_score()
|
||||
dealer_score, j = dealer_hits(k := next(ci), dealer_hand)
|
||||
|
||||
# yield outcome of that choice of hit count
|
||||
yield cmp(player_score, dealer_score), hits, j
|
||||
|
||||
if 0 < player_score < 21:
|
||||
# player hits (from saved card index)
|
||||
player_hand = player_hand.draw(deck[k])
|
||||
else:
|
||||
# player stop hitting
|
||||
break
|
||||
|
||||
except IndexError:
|
||||
# we made it to the end of the deck: no more game
|
||||
return
|
||||
|
||||
def dp(i: int):
|
||||
"""Recursive subproblem: return memoized max gain for deck[i:]."""
|
||||
|
||||
# check if memoized yet
|
||||
max_gain = memo.get(i, None)
|
||||
if max_gain is not None:
|
||||
return max_gain
|
||||
|
||||
# base case, returned if no game is possible (deck is depleted)
|
||||
# Bottom-up dynamic programming implementation
|
||||
for i in reversed(range(N)):
|
||||
# Base case: returned if no game is possible (deck is depleted)
|
||||
max_gain = 0
|
||||
record = None
|
||||
sub_result = None
|
||||
|
||||
# compute actual value, making "free" recursive calls
|
||||
for outcome, hits, j in games(i):
|
||||
gain = outcome + dp(j)
|
||||
# Try all possible games starting at `i` and keep the one maximizing
|
||||
# overall gain using pre-computed results (`j` always greater than `i`)
|
||||
for sub_game, next_index in iter_sub_games_at(shoe, i):
|
||||
prev_node = sub_results[next_index]
|
||||
gain = sub_game.outcome + (prev_node[0] if prev_node else 0)
|
||||
if gain > max_gain:
|
||||
max_gain = gain
|
||||
record = (outcome, hits), j
|
||||
sub_result = SubResult(gain, next_index, sub_game)
|
||||
|
||||
# set the memoization register and stack
|
||||
memo[i] = max_gain
|
||||
stack[i] = record
|
||||
sub_results[i] = sub_result
|
||||
|
||||
return max_gain
|
||||
# Job is done, now we visit the results chain from 0 to reconstruct the games sequence.
|
||||
games: list[Game] = []
|
||||
|
||||
# call base case
|
||||
dp(0)
|
||||
i = 0
|
||||
while sub_result := sub_results[i]:
|
||||
_gain, next_index, sub_game = sub_result
|
||||
|
||||
parent_pointer = 0
|
||||
while record := stack[parent_pointer]:
|
||||
game, parent_pointer = record
|
||||
yield game
|
||||
player_dealt = range(i, i + 2)
|
||||
dealer_dealt = range(player_dealt.stop, player_dealt.stop + 2)
|
||||
player_hit = range(dealer_dealt.stop,
|
||||
dealer_dealt.stop + sub_game.hits)
|
||||
dealer_hit = range(player_hit.stop, next_index)
|
||||
|
||||
games.append(
|
||||
Game(
|
||||
sub_game.hits,
|
||||
[shoe[j].label for j in itertools.chain(
|
||||
player_dealt, player_hit)],
|
||||
sub_game.player_score,
|
||||
[shoe[j].label for j in itertools.chain(
|
||||
dealer_dealt, dealer_hit)],
|
||||
sub_game.dealer_score,
|
||||
sub_game.outcome
|
||||
)
|
||||
)
|
||||
|
||||
i = next_index
|
||||
|
||||
return games
|
||||
|
||||
|
||||
deck = CARDS[:]
|
||||
random.shuffle(deck)
|
||||
gain = 0
|
||||
games = 0
|
||||
if __name__ == "__main__":
|
||||
OUTCOME_LABELS = {-1: "looses 😞", 0: "ties 😕", 1: "wins 😀"}
|
||||
|
||||
print(f"Games:")
|
||||
for i, (outcome, hits) in enumerate(black_jack(deck), 1):
|
||||
print(f"{i:2}. {hits} {outcome}")
|
||||
gain += outcome
|
||||
games = i
|
||||
shoe = random_shoe(4)
|
||||
print("Shoe:")
|
||||
|
||||
print(f"Gains: {gain} ({gain/games}/games)")
|
||||
gain = 0
|
||||
games = black_jack(shoe)
|
||||
|
||||
print(f"Games:")
|
||||
for i, (
|
||||
hits,
|
||||
player_cards,
|
||||
player_score,
|
||||
dealer_cards,
|
||||
dealer_score,
|
||||
outcome,
|
||||
) in enumerate(games, 1):
|
||||
print(f"{i:2}. player hits {hits} and {OUTCOME_LABELS[outcome]}")
|
||||
print(f" {' '.join(player_cards)} ({player_score})")
|
||||
print(f" {' '.join(dealer_cards)} ({dealer_score})")
|
||||
gain += outcome
|
||||
|
||||
print(f"Gain: {gain/len(games):0.2f} per game on average")
|
||||
|
|
Loading…
Reference in New Issue
Block a user