;; Huffman Encoding
;; ADT for Huffman leaf
(define (make-leaf symbol weight)
(list 'leaf symbol weight))
(define (leaf? obj)
(and (pair? obj)
(eq? (car obj) 'leaf)))
(define symbol-leaf cadr)
(define weight-leaf caddr)
(define (make-code-tree left right)
(list left
right
(append (symbols left) (symbols right))
(+ (weight left) (weight right))))
(define left-branch car)
(define right-branch cadr)
(define (symbols tree)
(if (leaf? tree)
(list (symbol-leaf tree))
(caddr tree)))
(define (weight tree)
(if (leaf? tree)
(weight-leaf tree)
(cadddr tree)))
(define (decode bits tree)
(define (decode-1 bits current-branch)
(if (null? bits)
'()
(let ((next-branch
(choose-branch (car bits) current-branch)))
(if (leaf? next-branch)
(cons (symbol-leaf next-branch)
(decode-1 (cdr bits) tree))
(decode-1 (cdr bits) next-branch)))))
(decode-1 bits tree))
(define (choose-branch bit branch)
(cond ((= bit 0) (left-branch branch))
((= bit 1) (right-branch branch))
(else (error "bad bit" bit))))
(define (adjoin-set x set)
(cond ((null? set) (list x))
((< (weight x) (weight (car set))) (cons x set))
(else (cons (car set)
(adjoin-set x (cdr set))))))
(define (make-leaf-set pairs)
(if (null? pairs)
'()
(let ((pair (car pairs)))
(adjoin-set (make-leaf (car pair) (cadr pair))
(make-leaf-set (cdr pairs))))))
(define ex-pairs '((a 4) (b 2) (c 1) (d 1)))
(define sample-tree
(make-code-tree (make-leaf 'a 4)
(make-code-tree
(make-leaf 'b 2)
(make-code-tree
(make-leaf 'd 1)
(make-leaf 'c 1)))))
(define sample-msg '(0 1 1 0 0 1 0 1 0 1 1 1 0))
(define (encode message tree)
(if (null? message)
'()
(append (encode-symbol (car message) tree)
(encode (cdr message) tree))))
(define (encode-symbol symbol tree)
(define (iter branch code)
(cond ((leaf? branch) (reverse! code))
((memq symbol (symbols (left-branch branch)))
(iter (left-branch branch) (cons 0 code)))
((memq symbol (symbols (right-branch branch)))
(iter (right-branch branch) (cons 1 code)))
(else (error "symbol can't be encoded" symbol))))
(iter tree '()))
(define (generate-huffman-tree pairs)
(successive-merge (make-leaf-set pairs)))
(define (successive-merge leaf-set)
(cond ((null? leaf-set) (error "bug in Huffman construction"))
((null? (cdr leaf-set)) (car leaf-set))
(else
(successive-merge
(adjoin-set (make-code-tree
(car leaf-set)
(cadr leaf-set))
(cddr leaf-set))))))
(define h1 (generate-huffman-tree ex-pairs))
(define m1 (encode '(a b c d c b a) h1))
(define d1 (decode m1 h1))
(define text1 "The algorithm for generating a Huffman tree is very simple. \
The idea is to arrange the tree so that the symbols with the lowest frequency \
appear farthest away from the root. Begin with the set of leaf nodes, \
containing symbols and their frequencies, as determined by the initial data \
from which the code is to be constructed. Now find tow leaves with the \
lowest weights and merge them to produce a node that has these two nodes as \
its left and right branches. The weight of the new node is the sume of the \
two weights. Remove the two leaves from the original set and replace them by \
this new node. Now continue this process. At each step, merge two nodes \
with the smallest weights, removing them from the set and replacing them with \
a node that has these two as its left and right branches. The process stops \
when there is only one node left, which is the root of the entire tree.")
(define (symbolize str)
(map (lambda (c) (string->symbol (string c)))
(string->list str)))
(define (stats text)
(let ((counts '()))
(for-each (lambda (s)
(let ((bind (assq s counts)))
(cond (bind (set-car! (cdr bind) (+ 1 (cadr bind))))
(else (set! counts (cons (list s 1) counts))))))
(symbolize text))
counts))