;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; CSCI 4150 Intro to Artificial Intelligence, Fall 2004 ; Assignment 7 ; ; State calculation example ; (load "a7code") ; For this example, here is how I decided to set up the states. These ; states depend only on the value of the player's hand and do not ; consider the dealer's hand. The states used for this example are ; not a particularly effective choice of how to transform the game ; state into a reinforcement learning state, but they should give you ; the general idea of what you can do. ; ; 0 = 14 or less, still playing hand ; 1 = 15-21, still playing hand ; 2 = stand on 14 or less ; 3 = stand on 15-21 ; 4 = doubled down on 14 or less (total doesn't include new card) ; 5 = doubled down on 15-21 (total doesn't include new card) ; 6 = busted, either from a hit or double-down ; two possible initial states depending only on the value of the ; player's hand ; (define (calc-initial-state player-hand dealer-hand) (let ((hand-value (bj-value player-hand))) (cond ((<= hand-value 14) 0) (else 1)))) ; an action will take us to a new state. see the assignment handout ; for detailed descriptions of the arguments. ; (define (calc-new-state previous-rl-state action terminal? reward player-hand dealer-hand) (let ((hand-value (bj-value player-hand))) (cond ((equal? action 'stand) (+ previous-rl-state 2)) ((and (equal? action 'double-down) (<= hand-value 21)) (+ previous-rl-state 4)) (terminal? ; did we bust, either from double-down or hit? 6) ; could have checked (> hand-value 21) instead ((<= hand-value 14) ; otherwise, it's a nonterminal state 0) (else 1)))) ; you have to tell the support code which states are terminal states. ; for my example, this is pretty simple. ; (define (terminal-state? rl-state) (> rl-state 1)) ; here, you have to declare how many states you have. you can also ; use the optional argument to create-tables to initialize nonterminal ; utilities to, for example, random values. ; (define (init-tables) (create-tables 7)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; To use the support code, you need to have a "player" ; ; A "player" is a list of three things: a name (as a double-quoted ; string), a "strategy procedure", and a "learning procedure". ; (define (random-player) (list "Bob" random-strategy non-learning-procedure)) ; BTW, we define a player as a procedure because as you develop your ; code, when you redefine a procedure, the player will have a pointer ; to the old procedure. This way, whenever you call this procedure to ; create the player, it is created using the current version of the ; strategy and learning procedures. ; here is a very simple strategy procedure. you will write strategy ; procedures that actually use the learned utility values to make ; decisions. ; ; note that "actions" is a list of the actions that the player may ; take from the current state. ; (define (random-strategy state-num actions) (list-ref actions (random (length actions)))) ; i'm not doing any learning for this example, but you will implement ; temporal differecing in a learning function for this assignment. ; (define (non-learning-procedure fs a ts) '()) ; ; you play blackjack using this player by doing the following: ; ; (init-tables) ; (play-match 10 (random-player)) ;