;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; CSCI 4150 Intro to Artificial Intelligence, Fall 2004
; Assignment 7
; 
; State calculation example
; 
(load "a7code")

; For this example, here is how I decided to set up the states.  These
; states depend only on the value of the player's hand and do not
; consider the dealer's hand.  The states used for this example are
; not a particularly effective choice of how to transform the game
; state into a reinforcement learning state, but they should give you
; the general idea of what you can do.
; 
; 0 = 14 or less, still playing hand
; 1 = 15-21, still playing hand
; 2 = stand on 14 or less
; 3 = stand on 15-21
; 4 = doubled down on 14 or less (total doesn't include new card)
; 5 = doubled down on 15-21 (total doesn't include new card)
; 6 = busted, either from a hit or double-down


; two possible initial states depending only on the value of the
; player's hand
; 
(define (calc-initial-state player-hand dealer-hand)    
  (let ((hand-value (bj-value player-hand)))
    (cond ((<= hand-value 14) 0)
	  (else 1))))

; an action will take us to a new state.  see the assignment handout
; for detailed descriptions of the arguments.
;
(define (calc-new-state previous-rl-state action terminal? reward
			player-hand dealer-hand)
  (let ((hand-value (bj-value player-hand)))
    (cond  ((equal? action 'stand) 
	    (+ previous-rl-state 2))
	   ((and (equal? action 'double-down)
		 (<= hand-value 21))
	    (+ previous-rl-state 4))
	   (terminal? ; did we bust, either from double-down or hit?
	    6)        ; could have checked (> hand-value 21) instead
	   ((<= hand-value 14)  ; otherwise, it's a nonterminal state
	    0)
	   (else 1))))
     
; you have to tell the support code which states are terminal states.
; for my example, this is pretty simple.
; 
(define (terminal-state? rl-state)
  (> rl-state 1))

; here, you have to declare how many states you have.  you can also
; use the optional argument to create-tables to initialize nonterminal
; utilities to, for example, random values.
; 
(define (init-tables)
  (create-tables 7))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;
; To use the support code, you need to have a "player"
; 
; A "player" is a list of three things: a name (as a double-quoted
; string), a "strategy procedure", and a "learning procedure".
; 
(define (random-player)
  (list "Bob" random-strategy non-learning-procedure))

; BTW, we define a player as a procedure because as you develop your
; code, when you redefine a procedure, the player will have a pointer
; to the old procedure.  This way, whenever you call this procedure to
; create the player, it is created using the current version of the
; strategy and learning procedures.


; here is a very simple strategy procedure.  you will write strategy
; procedures that actually use the learned utility values to make
; decisions.
;
; note that "actions" is a list of the actions that the player may
; take from the current state.
; 
(define (random-strategy state-num actions)
  (list-ref actions (random (length actions))))

; i'm not doing any learning for this example, but you will implement
; temporal differecing in a learning function for this assignment.
; 
(define (non-learning-procedure fs a ts)
  '())

; 
; you play blackjack using this player by doing the following:
;
; (init-tables)
; (play-match 10 (random-player))
;