; CSCI 4150 Intro to AI, fall 2003 ; Assignment 7 support code header, Version 1.0.2 ; ; This file contains source code headers to the compiled code ; functions so that you get better messages while debugging. ; ; (load "a7code") ; example player (define (random-player) (list "Bob" random-strategy (lambda (fs a ts) '()))) ; random strategy (define (random-strategy game-state actions) (list-ref actions (random (length actions)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; 'headers' to the compiled code (define (play-match num-hands player) ; validate arguments (if (not (and (integer? num-hands) (>= num-hands 0))) (error "ERROR: play-match must play a nonnegative integral number of hands" num-hands)) (validate-player "play-match" player) ; play matches (c$play-match num-hands player)) (define (play-hand player) (validate-player "play-hand" player) (c$play-hand player)) (define (create-tables n-states . args) ; validate arguments (if (not (and (integer? n-states) (>= n-states 0) (or (= (length args) 0) (and (= (length args) 1) (procedure? (car args)))))) (error "ERROR: Improper arguments given to create-tables" (cons n-states args))) ; create tables (apply c$create-tables (cons n-states args))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; public interface to utilities, rewards, and transition probability tables ; ; check index bounds when accessing tables (define check-table-args #t) ; set this to #f to turn it off (define (get-reward state-num action) ; validate arguments (cond (check-table-args (check-table-state-number state-num "get-reward") (check-table-action action "get-reward"))) ; get reward (car (reward-element state-num action))) ; returns a list containing the average observed reward for all the ; "action" states 0 through the maximum state number ; (define (get-reward-vector action) ; validate arguments (cond (check-table-args (check-table-action action "get-reward-vector"))) ; get reward vector (actually a list) (map car (vector->list (case action ((hit) HIT-REWARDS) ((stand) STAND-REWARDS) ((double-down) DD-REWARDS))))) (define (get-utility-element state-num) ; validate arguments (cond (check-table-args (check-table-state-number state-num "get-utility-element"))) ; get utility (vector-ref HIT-UTILITIES state-num)) (define (get-utility-vector) (vector->list HIT-UTILITIES)) (define (set-utility-element state-num u) ; validate arguments (cond (check-table-args (check-table-state-number state-num "get-utility-element") (if (not (real? u)) (error (string-append "ERROR: second argument to " " set-utility-element must be a number") u)))) ; set utility (vector-set! HIT-UTILITIES state-num u)) ; get the list of transition probabilities from "from-state" under ; action "act" to all possible "to-states" ; (define (get-transition-vector fs-num action) ; validate arguments (cond (check-table-args (check-table-state-number fs-num "get-transition-vector") (check-table-action action "get-transition-vector"))) ; get (calculate) transition probabilities (let* ((v (vector->list (ttt-get-vector fs-num action))) (tot (exact->inexact (apply + v)))) (map (lambda (x) (/ x tot)) v))) (define (get-transition-element fs-num action ts-num) ; validate arguments (cond (check-table-args (check-table-state-number fs-num "get-transition-element") (check-table-action action "get-transition-element") (check-table-state-number ts-num "get-transition-element"))) ; get (calculate) transition probability (let* ((v (vector->list (ttt-get-vector fs-num action))) (tot (exact->inexact (apply + v)))) (/ (list-ref v ts-num) tot))) ; number of times that this state has been visited ; (define (get-visits-element state-num action) ; validate argument (cond (check-table-args (check-table-state-number state-num "get-visits-element") (check-table-action action "get-visits-element"))) ; get number of visits (second (get-reward-element state-num action))) ; number of times that the given action has been taken from the given state ; (define (get-action-state-visits state-num action) ; v is a list with the number of times we've transitioned from this ; state under action to each of the other hit states, so just add ; all the elements to know how many times we've taken this action ; from this state. (let ((v (vector->list (ttt-get-vector state-num action)))) (apply + v))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; Here is the implementation of the tables for transition ; probabilities, rewards, and utilities. You do not need to look at ; this code --- it is here for your reference only or in case you ; really need to access it more directly. If you don't really need to ; access the state directly, use the "public interface" procedure ; above. The support code accesses and changes these tables through ; accessors not in this file, so you should not change any of this ; code. ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Tables (actually vectors) for recording information ; ; TT-TABLE is a vector of length #states whose elements are ; vectors of length 3 (one for each action) whose elements are ; vectors of length #states. ; ; it keeps a count of the number of times that we have transitioned ; from state X under action A to state Y. ; ; UTILITY-TABLE is a vector of length #states whose elements are ; numbers. The learning algorithm will store its utility values for ; the "hit" states here. ; ; HIT-REWARDS, STAND-REWARDS, and DD-REWARDS are vectors of length ; #states whose elements are lists. These lists contain two numbers: ; the first is the average reward received in this state and the ; second is the number of times this state has been visited. This is ; necessary to maintain the running reward average, but it is also ; needed as an argument to the exploration function. (Although this ; information can be recovered from the TT-TABLE.) ; (define TT-TABLE '()) (define HIT-UTILITIES '()) (define HIT-REWARDS '()) (define STAND-REWARDS '()) (define DD-REWARDS '()) (define ACTION-LIST '(hit stand double-down)) ; turn an action symbol into a number betweeen 0 and (- (length ACTION-LIST) 1) (define (translate-action act) (let ((v (member act ACTION-LIST))) (if (null? v) (error "translate-action: invalid action" act) (- (length v) 1)))) ; access to raw transition tally table (define (ttt-get-vector from-state-num act) (vector-ref (vector-ref TT-TABLE from-state-num) (translate-action act))) (define (ttt-get-element from-state-num act to-state-num) (vector-ref (ttt-get-vector from-state-num act) to-state-num)) ; access to raw reward table (define (reward-element state-num action) (vector-ref (case action ((hit) HIT-REWARDS) ((stand) STAND-REWARDS) ((double-down) DD-REWARDS)) state-num)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; procedures for checking validity of inputs (define (validate-player proc-name player) (if (not (and (list? player) (= (length player) 3))) (error (string-append "ERROR: player for " proc-name " must be a list of 3 elements") player)) (if (not (string? (car player))) (error (string-append "ERROR: player for " proc-name " must have a string (its name) as element 0") player)) (if (not (and (procedure? (second player)) (can-take-args (second player) 2))) (error (string-append "ERROR: player strategy proc. for " proc-name " must be a procedure of 2 arguments") player)) (if (not (and (procedure? (second player)) (can-take-args (third player) 3))) (error (string-append "ERROR: player learning proc. for " proc-name " must be a procedure of 3 arguments") player)) 'ok) ; can the procedure "proc" take "num-args" args? ; ; properly handles procedure that take a variable number of arguments ; (define (can-take-args proc num-args) (let ((a (procedure-arity proc))) (if (null? (cdr a)) (>= num-args (car a)) (<= (car a) num-args (cdr a))))) (define (check-table-state-number n proc-name) (if (not (and (integer? n) (< -1 n number-of-states))) (error (string-append "ERROR: state-number argument to " proc-name "\n; is out of range (0 to " (number->string (- number-of-states 1)) ")." (if (zero? number-of-states) " Looks like you didn't create the tables.\n" "\n") "; Argument was: ") n))) (define (check-table-action action proc-name) (cond ((not (member action ACTION-LIST)) (print "Action argument to " proc-name " is not one of: hit, stand, double-down" 'newline) (error "ERROR --- action was: " action))))