Implement black jack as bottom up

2023-02-25 08:50:21 +01:00 · 2023-02-25 08:50:21 +01:00 · 50e5223f48
commit 50e5223f48
parent c6ab274222
1 changed files with 209 additions and 118 deletions
--- a/black_jack.py
+++ b/black_jack.py
@ -23,6 +23,8 @@ hand of at least 17, then stop.

 The players strategy in our case is really just choosing how many times
 they will hit.
+
+Improvements: see "TODO" tags
 """

 from typing import NamedTuple
@ -35,45 +37,28 @@ import itertools


 class Card(NamedTuple):
+    """A playing card."""
+
    label: str
+    """A string representation of a card: 'A ♣', '10 ♠', 'K ♦', ..."""
    value: int
-
-
-Deck = list[Card]
-
-
-#
-# Constant construction
-#
-SUITES = "♦ ♣ ♥ ♠".split()
-
-LOW = "A 2 3 4 5 6 7 8 9 10".split()
-HIGH = "J Q K".split()
-
-DECKS = 4
-"""Total number of decks used."""
-
-CARDS: Deck = [
-    Card((rank + suite), value)
-    for value, rank in itertools.chain(
-        ((i, rank) for i, rank in enumerate(LOW, 1)),
-        ((10, rank) for rank in HIGH))
-    for suite in SUITES
-    for _ in range(DECKS)
-]
-"""DECKS lots of standard decks of cards."""
-
-N = len(CARDS)
-"""Total number of cards."""
+    """The value associated with the card. (see "Evaluating hand's score")."""


 class Hand(NamedTuple):
    """Hand value accumulable."""
+
    points: int = 0
    """Current hand value."""
    hard: bool = False
    """Whether it contains an ace."""

+    @classmethod
+    def new(cls, card1, card2):
+        """Create a new hand from first two cards."""
+        hand = Hand().draw(card1).draw(card2)
+        return Hand(hand.points, hand.hard)
+
    def evaluate(self) -> tuple[int, bool]:
        """Return (score, hard) for that hand.

@ -94,120 +79,226 @@ class Hand(NamedTuple):
        return score

    def draw(self, card: Card):
-        return Hand(
-            self.points + card.value,
-            self.hard | (card.value == 1)
-        )
+        """Implement the action of drawing a card."""
+        return Hand(self.points + card.value, self.hard | (card.value == 1))


-def cmp(a, b):
+class SubGame(NamedTuple):
+    """ "A sub-problem game."""
+
+    hits: int
+    """The player's hit count."""
+    player_score: int
+    """The player's score."""
+    dealer_score: int
+    """The dealers's score."""
+    outcome: int
+    """The outcome {-1, 0, 1}."""
+
+
+class SubResult(NamedTuple):
+    """ "A sub-problem result node."""
+
+    gain: int
+    """Total gain of the sequence."""
+    parent_pointer: int
+    """Pointer to previous node."""
+    sub_game: SubGame
+    """The sub-problem game."""
+
+
+Shoe = list[Card]
+
+
+class Game(NamedTuple):
+    """A human readable game representation."""
+
+    hits: int
+    """Number of player hits."""
+    player_cards: list[str]
+    """Player's cards."""
+    player_score: int
+    """Player's score."""
+    dealer_cards: list[str]
+    """Dealers's cards."""
+    dealer_score: int
+    """Dealers's score."""
+    outcome: int
+    """The outcome {-1, 0, 1}."""
+
+
+#
+# Constant construction
+#
+SUITES = "♦ ♣ ♥ ♠".split()
+
+LOW = "A 2 3 4 5 6 7 8 9 10".split()
+HIGH = "J Q K".split()
+
+CARDS: list[Card] = [
+    Card((rank + " " + suite), value)
+    for value, rank in itertools.chain(
+        ((i, rank) for i, rank in enumerate(LOW, 1)), ((10, rank)
+                                                       for rank in HIGH)
+    )
+    for suite in SUITES
+]
+"""Standard deck of cards."""
+
+
+def random_shoe(decks: int):
+    """Return shuffled `decks` lots decks."""
+    shoe = decks * CARDS
+    random.shuffle(shoe)
+    return shoe
+
+
+def cmp(a: int, b: int):
    """Compare two values, return from {-1, 0, 1}."""
    return (a > b) - (a < b)


-def black_jack(deck: Deck):
-    """"""
+def dealers_turn_at(shoe: Shoe, k: int, dealer_hand: Hand):
+    """Compute dealer's score when start hitting on `k`.

-    memo: dict[int, int] = {}
-    stack: dict[int, tuple[tuple[int, int], int] | None] = {}
+    Return:
+        dealer's score: int
+        next deck index: int
+    """
+    for l in itertools.count(k):
+        dealer_hand = dealer_hand.draw(shoe[l])
+        # TODO: implement hard/soft 17
+        # Some black-jack rule variations asks the dealer to hit on a "hard 17", i.e.
+        # a 17 made from an Ace evaluated at 11.
+        dealer_score, _hard = dealer_hand.evaluate()
+        if 0 < dealer_score < 17:
+            continue
+        else:
+            # warning `l + 1` might be out of shoe by one
+            return dealer_score, l + 1

-    def dealer_hits(k: int, dealer_hand: Hand):
-        """Compute dealer's score when start hitting on `k`.
+    # this never happens, for type hinting only
+    raise NotImplementedError

-        Return:
-            dealer's score: int
-            next `k`: int
-        """
-        for l in itertools.count(k):
-            dealer_hand = dealer_hand.draw(deck[l])
-            # TODO: implement hard/soft 17
-            dealer_score, _hard = dealer_hand.evaluate()
-            if 0 < dealer_score < 17:
-                continue
+
+def iter_sub_games_at(shoe: Shoe, i: int):
+    """Iterate over possible games from index `i`.
+
+    Yields:
+        sub-problem game: SubGame
+        next index `i`: int
+    """
+    try:
+        # an index counter
+        ci = itertools.count(i)
+
+        # dealer and player get 2 cards each
+        player_hand = Hand.new(shoe[next(ci)], shoe[next(ci)])
+        dealer_hand = Hand.new(shoe[next(ci)], shoe[next(ci)])
+
+        # player will incrementally hit (start with no hits)
+        for hits in itertools.count(0):
+            # TODO: immediate win on natural black jack
+            player_score = player_hand.evaluate_score()
+            dealer_score, j = dealers_turn_at(shoe, k := next(ci), dealer_hand)
+
+            # yield game with that number of hits
+            outcome = cmp(player_score, dealer_score)
+            yield SubGame(hits, player_score, dealer_score, outcome), j
+
+            if 0 < player_score < 21:
+                # player hits (from saved card index)
+                player_hand = player_hand.draw(shoe[k])
            else:
-                return dealer_score, l + 1
+                # player stop hitting
+                break

-        # this never happens, for type hinting only
-        raise NotImplementedError
+    except IndexError:
+        # we made it to the end of the deck: no more game
+        return

-    def games(i: int):
-        """Iterator over possible games from index `i`.

-        Yields:
-            game's outcome: {-1, 0, 1}
-            player's hit count: int
-            next index `i`: int
-        """
-        try:
-            # an index counter
-            ci = itertools.count(i)
+def black_jack(shoe: Shoe):
+    """Solve the full knowledge black jack problem."""
+    N = len(shoe)

-            # dealer and player get 2 cards each
-            player_hand = Hand().draw(deck[next(ci)]).draw(deck[next(ci)])
-            dealer_hand = Hand().draw(deck[next(ci)]).draw(deck[next(ci)])
+    # The sub results chain
+    #
+    # We add a node that will be forever `None` as it is possible for
+    # the `iter_sub_games_at()` to return a "next index" of N.
+    # Adding that node relieves us of having `Optional[int]` values and the
+    # checks coming with them.
+    sub_results: list[SubResult | None] = [None] * (N + 1)

-            # player will incrementally hit (start with no hits)
-            for hits in itertools.count(0):
-                player_score = player_hand.evaluate_score()
-                dealer_score, j = dealer_hits(k := next(ci), dealer_hand)
-
-                # yield outcome of that choice of hit count
-                yield cmp(player_score, dealer_score), hits, j
-
-                if 0 < player_score < 21:
-                    # player hits (from saved card index)
-                    player_hand = player_hand.draw(deck[k])
-                else:
-                    # player stop hitting
-                    break
-
-        except IndexError:
-            # we made it to the end of the deck: no more game
-            return
-
-    def dp(i: int):
-        """Recursive subproblem: return memoized max gain for deck[i:]."""
-
-        # check if memoized yet
-        max_gain = memo.get(i, None)
-        if max_gain is not None:
-            return max_gain
-
-        # base case, returned if no game is possible (deck is depleted)
+    # Bottom-up dynamic programming implementation
+    for i in reversed(range(N)):
+        # Base case: returned if no game is possible (deck is depleted)
        max_gain = 0
-        record = None
+        sub_result = None

-        # compute actual value, making "free" recursive calls
-        for outcome, hits, j in games(i):
-            gain = outcome + dp(j)
+        # Try all possible games starting at `i` and keep the one maximizing
+        # overall gain using pre-computed results (`j` always greater than `i`)
+        for sub_game, next_index in iter_sub_games_at(shoe, i):
+            prev_node = sub_results[next_index]
+            gain = sub_game.outcome + (prev_node[0] if prev_node else 0)
            if gain > max_gain:
                max_gain = gain
-                record = (outcome, hits), j
+                sub_result = SubResult(gain, next_index, sub_game)

-        # set the memoization register and stack
-        memo[i] = max_gain
-        stack[i] = record
+        sub_results[i] = sub_result

-        return max_gain
+    # Job is done, now we visit the results chain from 0 to reconstruct the games sequence.
+    games: list[Game] = []

-    # call base case
-    dp(0)
+    i = 0
+    while sub_result := sub_results[i]:
+        _gain, next_index, sub_game = sub_result

-    parent_pointer = 0
-    while record := stack[parent_pointer]:
-        game, parent_pointer = record
-        yield game
+        player_dealt = range(i, i + 2)
+        dealer_dealt = range(player_dealt.stop, player_dealt.stop + 2)
+        player_hit = range(dealer_dealt.stop,
+                           dealer_dealt.stop + sub_game.hits)
+        dealer_hit = range(player_hit.stop, next_index)
+
+        games.append(
+            Game(
+                sub_game.hits,
+                [shoe[j].label for j in itertools.chain(
+                    player_dealt, player_hit)],
+                sub_game.player_score,
+                [shoe[j].label for j in itertools.chain(
+                    dealer_dealt, dealer_hit)],
+                sub_game.dealer_score,
+                sub_game.outcome
+            )
+        )
+
+        i = next_index
+
+    return games


-deck = CARDS[:]
-random.shuffle(deck)
-gain = 0
-games = 0
+if __name__ == "__main__":
+    OUTCOME_LABELS = {-1: "looses 😞", 0: "ties 😕", 1: "wins 😀"}

-print(f"Games:")
-for i, (outcome, hits) in enumerate(black_jack(deck), 1):
-    print(f"{i:2}. {hits} {outcome}")
-    gain += outcome
-    games = i
+    shoe = random_shoe(4)
+    print("Shoe:")

-print(f"Gains: {gain} ({gain/games}/games)")
+    gain = 0
+    games = black_jack(shoe)
+
+    print(f"Games:")
+    for i, (
+        hits,
+        player_cards,
+        player_score,
+        dealer_cards,
+        dealer_score,
+        outcome,
+    ) in enumerate(games, 1):
+        print(f"{i:2}. player hits {hits} and {OUTCOME_LABELS[outcome]}")
+        print(f"  {' '.join(player_cards)} ({player_score})")
+        print(f"  {' '.join(dealer_cards)} ({dealer_score})")
+        gain += outcome
+
+    print(f"Gain: {gain/len(games):0.2f} per game on average")