;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; CSCI 4150 Intro to Artificial Intelligence, Fall 2005
; Assignment 7
; 
; State calculation example
; 
(load "a7code")

; For this example, here is how I decided to set up the states.  These
; states depend only on the value of the player's hand and do not
; consider the dealer's hand.  The states used for this example are
; not a particularly effective choice of how to transform the game
; state into a reinforcement learning state, but they should give you
; the general idea of what you can do.
; 
; 0 = 14 or less, still playing hand
; 1 = 15-21, still playing hand
; 2 = stand on 14 or less
; 3 = stand on 15-21
; 4 = doubled down on 14 or less (total doesn't include new card)
; 5 = doubled down on 15-21 (total doesn't include new card)
; 6 = busted


; determine the initial rl-state
; 
(define (calc-initial-state player-hand dealer-hand)    
  (if (<= (bj-value player-hand) 14)
      0
      1))

; determine the rl-state that results from an action taken from the
; previous rl-state
;
(define (calc-new-state previous-rl-state action terminal? reward
			player-hand dealer-hand)
  (let ((hand-value (bj-value player-hand)))
    (cond ((equal? action 'stand) 
	   (+ previous-rl-state 2))
	  ((equal? action 'double-down)
	   (+ previous-rl-state 4))
	  (terminal? ; did we bust?
	   6)
	  ((<= hand-value 14)  ; otherwise, it's a nonterminal state
	   0)
	  (else 1))))
     
; you have to tell the support code which states are terminal states.
; in return, the support code automatically sets the utility of a
; terminal state to be the average reward for that state.
; 
(define (terminal-state? rl-state)
  (> rl-state 1))

; initialize the tables - you have to declare how many states you're
; using.  i've taken advantage of the default to initialize the
; utility of nonterminal states to 0.
; 
(define (init-tables)
  (create-tables 7))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; stuff you need in order to play blackjack
;


; "actions" will be a list of actions permitted from the current state
;
; this is just a simple strategy for this example
; 
(define (random-strategy rl-state actions)
  (list-ref actions (random (length actions))))

; for this example, i'm not doing any learning, but this is where
; you'd implement temporal differencing.
; 
(define (non-learning-procedure fs a ts)
  '())

; a "player"
; 
(define (player-bob)
  (list "Bob" random-strategy non-learning-procedure))

; BTW, we define a player as a procedure because as you develop your
; code, when you redefine a procedure, the player will have a pointer
; to the old procedure.  This way, whenever you call this procedure to
; create the player, it is created using the current version of the
; strategy and learning procedures.


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; you play blackjack using this player by doing the following:
;
; (load "a7example")
; (init-tables)
; (play-match 10 (player-bob))
;