;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; Copyright (c) 1999 Wesley H. Huang. All rights reserved ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; Support for decision trees ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; classify ; ; This function classifies an example given a decision tree where the ; example has no missing attributes ; ; the optional third argument is a default value which is returned if ; an unknown value for an attribute is encountered ; (define (classify example dtree . default-value) (if (list? dtree) ; check an attribute and recurse (let* ((a-number (car dtree)) (a-val (list-ref example a-number)) (match (assoc a-val (cdr dtree)))) (if (not match) default-value (classify example (cadr match)))) ; return the value of the goal predicate dtree)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; split-td ; ; splits the training data according to the given attribute (number). ; considers only the attribute values that appear in the given ; training data. ; (define (split-td td attribute-no) (define (se-helper td split-list) (if (null? td) split-list (let* ((example (car td)) (a-value (list-ref (cadr example) attribute-no)) (split (assoc a-value split-list))) (if (equal? split #f) ; there is no split for the attribute value of this example ; create a new one, add it to the split list, and recurse (se-helper (cdr td) (cons (list a-value (list example)) split-list)) ; there is a split for the attribute value. add this example ; to the list and recurse (se-helper (cdr td) (assoc-list-subst (list (car split) (cons example (cadr split))) split-list)))))) (validate-td td "split-td") (if (or (not (number? attribute-no)) (>= attribute-no (length (cadar td))) (< attribute-no 0)) (begin (display "Invalid attribute number: ") (display attribute-no) (display "\npassed to split-td; should be between 0 and ") (display (length (cadar td))) (display "\n") (error "Invalid attribute number."))) (se-helper td '())) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; tally-td ; ; tally up the number of examples for each goal attribute value. ; considers only the goal attribute values that appear in the training ; data. ; (define (tally-td td) (define (tally-helper td tallies) (if (null? td) tallies (let* ((goal-predicate (caar td)) (tally (assoc goal-predicate tallies))) (if (equal? tally #f) (tally-helper (cdr td) (cons (list goal-predicate 1) tallies)) (tally-helper (cdr td) (assoc-list-subst (list (car tally) (+ (cadr tally) 1)) tallies)))))) (validate-td td "tally-td") (tally-helper td '())) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; classify-list ; (define (classify-list example-list dt) (map (lambda (e) (classify e dt)) example-list)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; test-dt ; (define (test-dt dt test-td) (let* ((examples (map cadr test-td)) (answers (map car test-td)) (classification (classify-list examples dt)) (matches (count-matches classification answers)) (unknowns (length (list-transform-positive classification null?))) (no-examples (length examples))) (display "\nOut of ") (display no-examples) (display " examples, ") (display matches) (display " were correctly classified,\n") (display (- no-examples matches unknowns)) (display " were incorrectly classified, and\n") (display unknowns) (display " had attribute values not seen before.\n\n"))) (define (count-matches a b) (define (cmh a b m) (if (null? a) m (cmh (cdr a) (cdr b) (if (equal? (car a) (car b)) (+ m 1) m)))) (cmh a b 0)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; support functions for learning decision trees with a discretize ; function ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (test-disc-dt discretize-fn dt test-td) (let* ((examples (map (lambda (x) (discretize-fn (cadr x))) test-td)) (answers (map car test-td)) (classification (classify-list examples dt)) (matches (count-matches classification answers)) (unknowns (length (list-transform-positive classification null?))) (no-examples (length examples))) (display "\nOut of ") (display no-examples) (display " examples, ") (display matches) (display " were correctly classified,\n") (display (- no-examples matches unknowns)) (display " were incorrectly classified, and\n") (display unknowns) (display " had attribute values not seen before.\n\n"))) ; ; this function calls YOUR learn-dt procedure ; (define (learn-disc-dt discretize-fn training-data) (learn-dt (map (lambda (x) (list (car x) (discretize-fn (cadr x)))) training-data))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; pick-majority ; ; given a tally (say from tally-td), picks the majority value ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define (pick-majority tally) (define (pm-helper tally best) (if (null? tally) (car best) (if (or (null? best) (> (cadar tally) (cadr best))) (pm-helper (cdr tally) (car tally)) (pm-helper (cdr tally) best)))) (pm-helper tally '())) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; general utility functions ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; only removes the first instance of e in l ; (define (remove-element e l) (if (null? l) '() (if (equal? e (car l)) (cdr l) (cons (car l) (remove-element e (cdr l)))))) (define (zero-to-n-1 n) (if (= n 0) '() (append (zero-to-n-1 (- n 1)) (list (- n 1))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ; print-dt ; ; prints out a decision tree nicely ; ; optional second argument specifies a list of attribute names ; (define (print-dt dt . anames) (define (numsym-len e) (if (symbol? e) (string-length (symbol->string e)) (if (number? e) (string-length (number->string e)) 5))) (define (pdt-helper dt indent) (if (list? dt) (begin (display "(") (display (car dt)) (display " ") (pdt-list (cdr dt) (+ indent 2 (numsym-len (car dt)))) (display ")")) (display dt))) (define (pdt-list lst indent) (if (null? lst) '() (begin (display "(") (display (caar lst)) (display " ") (pdt-helper (cadar lst) (+ indent 2 (numsym-len (caar lst)))) (display ")") (if (not (null? (cdr lst))) (begin (display "\n") (display (make-string indent #\space)))) (pdt-list (cdr lst) indent)))) (newline) (validate-dt dt "print-dt") (pdt-helper (if (null? anames) dt (make-readable-dt dt (car anames))) 0)) (define (make-readable-dt dt attribute-names) (if (list? dt) (append (list (list-ref attribute-names (car dt))) (map (lambda (x)(list (car x) (make-readable-dt (cadr x) attribute-names))) (cdr dt))) dt)) ; ; function used by split-td and tally-td ; ; substitutes an element (pair) into an association list ; (define (assoc-list-subst pair pair-list) (if (null? pair-list) '() (if (equal? (car pair) (caar pair-list)) (cons pair (cdr pair-list)) (cons (car pair-list) (assoc-list-subst pair (cdr pair-list)))))) ; ; validates that the argument is valid training data ; (define (validate-td td procname) (define (vtd-helper td nattributes) (if (null? td) #t (if (and (list? (car td)) (= (length (car td)) 2) (list? (cadar td)) (= nattributes (length (cadar td)))) (vtd-helper (cdr td) nattributes) (begin (display "Invalid training data given to ") (display procname) (display "\nThe expression: ") (display (car td)) (display "\nis not a valid training example or has the wrong number of attributes\n") (error "Invalid training example"))))) (if (and (list? td) (not (null? td)) (list? (car td)) (= (length (car td)) 2) (list? (cadar td))) (vtd-helper td (length (cadar td))) (begin (display "Invalid training data given to ") (display procname) (display "\nThe expression: ") (display td) (display "\nis not valid training data\n") (error "Invalid training data")))) (define (validate-dt dt procname) (define (validate-elements elst) (if (null? elst) #t (if (and (list? (car elst)) (= (length (car elst)) 2) (symbol? (caar elst)) (vdt-helper (cadar elst))) (validate-elements (cdr elst)) (begin (display "Invalid decision tree given to ") (display procname) (display "\nThe element: ") (display (car elst)) (display " is not valid.\n") (error "Invalid decision tree"))))) (define (vdt-helper dt) (if (symbol? dt) #t (if (and (list? dt) (> (length dt) 1) (or (number? (car dt)) (symbol? (car dt))) (validate-elements (cdr dt))) #t #f))) (newline) (vdt-helper dt))