; CSCI 4150 Introduction to Artificial Intelligence, Fall 2003
; Assignment 7 example
;

; here is the sample transform-state procedure from the handout
; 
(define (transform-state game-state)
  (let* ((my-hand (second game-state))
	 (value (bj-value my-hand)))
    (cond ((<= value 11) 0)
	  ((<= value 16) 1)
	  ((<= value 21) 2)
	  (else 3))))

; and its associated init-tables procedure
;
; this creates storage for all the tables assuming 4 states (0-3).
; The "hit utilities" are each initialized by calling the given
; procedure.
; 
(define (init-tables)
  (create-tables 4  (lambda () (- (random 2.0) 1.0))))

; here are the transition table (used to calculate transition
; probabilities), rewards, and utilities that were learned on a run of
; 1000 hand using temporal differencing.  Note: these are probably not
; the optimal utilities, correct rewards, or correct transition
; probabilities.  These are simply to give you an example to try out.
;
; by loading this file, you will initialize these tables, so don't run
; the init-tables procedure or else you'll erase them!
; 
(define number-of-states 4)
(define TT-TABLE
  #(#(#(12 28 19 0) #(60 0 0 0) #(15 29 27 0))
    #(#(0 25 55 63) #(0 196 0 0) #(0 41 79 83))
    #(#(0 4 18 60) #(0 0 181 0) #(0 7 39 111))
    #(#(0 0 0 0) #(0 0 0 0) #(0 0 0 0))))
(define HIT-UTILITIES
  #(-.18788221048941328 -.37712882244228896 -.4166208226407792 -1.))
(define HIT-REWARDS
  #((0. 71) (0. 203) (0. 157) (-1. 194)))
(define STAND-REWARDS
  #((-.18333333333333326 120) (-.1479591836734694 392)
    (.17679558011049723 362) (0 0)))
(define DD-REWARDS
  #((-.22535211267605634 71) (-.12999999999999995 200)
    (.3793103448275862 174) (-2. 123)))