;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; CSCI 4150 Intro to Artificial Intelligence, Fall 2005 ; Assignment 7 ; ; State calculation example ; (load "a7code") ; For this example, here is how I decided to set up the states. These ; states depend only on the value of the player's hand and do not ; consider the dealer's hand. The states used for this example are ; not a particularly effective choice of how to transform the game ; state into a reinforcement learning state, but they should give you ; the general idea of what you can do. ; ; 0 = 14 or less, still playing hand ; 1 = 15-21, still playing hand ; 2 = stand on 14 or less ; 3 = stand on 15-21 ; 4 = doubled down on 14 or less (total doesn't include new card) ; 5 = doubled down on 15-21 (total doesn't include new card) ; 6 = busted ; determine the initial rl-state ; (define (calc-initial-state player-hand dealer-hand) (if (<= (bj-value player-hand) 14) 0 1)) ; determine the rl-state that results from an action taken from the ; previous rl-state ; (define (calc-new-state previous-rl-state action terminal? reward player-hand dealer-hand) (let ((hand-value (bj-value player-hand))) (cond ((equal? action 'stand) (+ previous-rl-state 2)) ((equal? action 'double-down) (+ previous-rl-state 4)) (terminal? ; did we bust? 6) ((<= hand-value 14) ; otherwise, it's a nonterminal state 0) (else 1)))) ; you have to tell the support code which states are terminal states. ; in return, the support code automatically sets the utility of a ; terminal state to be the average reward for that state. ; (define (terminal-state? rl-state) (> rl-state 1)) ; initialize the tables - you have to declare how many states you're ; using. i've taken advantage of the default to initialize the ; utility of nonterminal states to 0. ; (define (init-tables) (create-tables 7)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; stuff you need in order to play blackjack ; ; "actions" will be a list of actions permitted from the current state ; ; this is just a simple strategy for this example ; (define (random-strategy rl-state actions) (list-ref actions (random (length actions)))) ; for this example, i'm not doing any learning, but this is where ; you'd implement temporal differencing. ; (define (non-learning-procedure fs a ts) '()) ; a "player" ; (define (player-bob) (list "Bob" random-strategy non-learning-procedure)) ; BTW, we define a player as a procedure because as you develop your ; code, when you redefine a procedure, the player will have a pointer ; to the old procedure. This way, whenever you call this procedure to ; create the player, it is created using the current version of the ; strategy and learning procedures. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; you play blackjack using this player by doing the following: ; ; (load "a7example") ; (init-tables) ; (play-match 10 (player-bob)) ;