Also available as CountWords.java

/**
 * Title: MapPlay
 * Description: Comparison of HashMaps and TreeMaps
 * 
 * counts word frequencies in a text file using a map
 * @author hollingd@cs.rpi.edu
 */
import java.util.*;
import java.io.*;


/**
 * <p>CountWords is a demonstration class used for comparing the performance of 
 * HashMap and TreeMap classes with respect to the lookup of map elements.</p>
 *
 * <p>The main() method builds a hashmap containing the word frequiences of
 * a file named on the command line, then performs many map accesses 
 * (a random sequence of accesses). The accesses are timed and the result
 * is printed out. The entire process is then repeated, but the second time
 * using a TreeMap instead of a HashMap.</p>
 */
public class CountWords {

  static final int NUM_ACCESSES=100000;
  Map words;

  // Constructor takes a map object
  CountWords(Map w) {
        words = w;
  }

  // countWordsInFile reads from a file and counts the frequency of each word
  // found in the file. uses the map words to keep track of
  // the count.
  //
  // assumes that map named words already exists!

  void countWordsInFile(String filename) throws IOException {

        BufferedReader in = new BufferedReader( new FileReader(filename));
        String s = in.readLine();
        while (s != null) {
          // split the line in to words
          String[] tokens = s.split("[^a-zA-Z0-9]");
          for (int i=0;i<tokens.length;i++) {
                if (words.containsKey(tokens[i]) ) {
                  int cnt = ((Integer)words.get(tokens[i])).intValue();
                  cnt++;
                  words.put(tokens[i],new Integer(cnt));
                } else {
                  words.put(tokens[i],new Integer(1));
                }
          }
          s = in.readLine();
        }
  }


  public static void main(String[] args) {
        long starttime,diff;
        CountWords x;


        // first try with a hashmap

        x=  new CountWords(new HashMap());

        try {
          x.countWordsInFile(args[0]);
        } catch (Exception e) {
          e.printStackTrace();
        }
        starttime = System.currentTimeMillis();

        x.randomAccessTest();

        diff = System.currentTimeMillis()-starttime;
        System.out.println("hashmap took " + diff + " ms.");


        // try with a treemap

        x = new CountWords(new TreeMap());
        try {
          x.countWordsInFile(args[0]);
        } catch (Exception e) {
          e.printStackTrace();
        }

        starttime = System.currentTimeMillis();
        // access a bunch of elements in the map
        x.randomAccessTest();

        diff = System.currentTimeMillis()-starttime;
        System.out.println("tree took " + diff + " ms.");

  }


  // this method accesses values in the map by selecting
  // a random sequence of keys to use. The idea is to
  // time this to get some idea of the relative time it 
  // takes to access keys (in no particular order) in
  // various type of Maps.

  //

  void randomAccessTest() {
        // get a set of the keys in the map named words
        Object[] keys = words.keySet().toArray();
        int numkeys = keys.length;

        Random r = new Random();

        for (int i=0;i<NUM_ACCESSES;i++) {
          String thekey = (String) keys[r.nextInt(numkeys)];
          Object o = words.get(thekey);
        }
  }


  // Prints out a list of the word count for each word found.
  // If the map used is sorted (like a TreeMap), this list will
  // appear sorted (the list will be alphabetical).

  void printWordList() {
        // get a set of the keys in the map named words
        Set keys = words.keySet();

        // create an iterator we can use to go through the
        // keys in the set
        Iterator i = keys.iterator();

        while (i.hasNext()) {
          String wrd = (String) i.next();
          System.out.println(wrd + ": " + words.get(wrd));
        }
  }

}