/**
 * Title: MapPlay
 * Description: Comparison of HashMaps and TreeMaps
 * 
 * counts word frequencies in a text file using a map
 * @author hollingd@cs.rpi.edu
 */
import java.util.*;
import java.io.*;


/**
 * <p>CountWords is a demonstration class used for comparing the performance of 
 * HashMap and TreeMap classes with respect to the lookup of map elements.</p>
 *
 * <p>The main() method builds a hashmap containing the word frequiences of
 * a file named on the command line, then performs many map accesses 
 * (a random sequence of accesses). The accesses are timed and the result
 * is printed out. The entire process is then repeated, but the second time
 * using a TreeMap instead of a HashMap.</p>
 */
public class CountWords {

  static final int NUM_ACCESSES=100000;
  Map words;

  // Constructor takes a map object
  CountWords(Map w) {
	words = w;
  }

  // countWordsInFile reads from a file and counts the frequency of each word
  // found in the file. uses the map words to keep track of
  // the count.
  //
  // assumes that map named words already exists!

  void countWordsInFile(String filename) throws IOException {

	BufferedReader in = new BufferedReader( new FileReader(filename));
	String s = in.readLine();
	while (s != null) {
	  // split the line in to words
	  String[] tokens = s.split("[^a-zA-Z0-9]");
	  for (int i=0;i<tokens.length;i++) {
		if (words.containsKey(tokens[i]) ) {
		  int cnt = ((Integer)words.get(tokens[i])).intValue();
		  cnt++;
		  words.put(tokens[i],new Integer(cnt));
		} else {
		  words.put(tokens[i],new Integer(1));
		}
	  }
	  s = in.readLine();
	}
  }


  public static void main(String[] args) {
	long starttime,diff;
	CountWords x;


	// first try with a hashmap

	x=  new CountWords(new HashMap());

	try {
	  x.countWordsInFile(args[0]);
	} catch (Exception e) {
	  e.printStackTrace();
	}
	starttime = System.currentTimeMillis();

	x.randomAccessTest();

	diff = System.currentTimeMillis()-starttime;
	System.out.println("hashmap took " + diff + " ms.");


	// try with a treemap

	x = new CountWords(new TreeMap());
	try {
	  x.countWordsInFile(args[0]);
	} catch (Exception e) {
	  e.printStackTrace();
	}

	starttime = System.currentTimeMillis();
	// access a bunch of elements in the map
	x.randomAccessTest();

	diff = System.currentTimeMillis()-starttime;
	System.out.println("tree took " + diff + " ms.");

  }


  // this method accesses values in the map by selecting
  // a random sequence of keys to use. The idea is to
  // time this to get some idea of the relative time it 
  // takes to access keys (in no particular order) in
  // various type of Maps.

  //

  void randomAccessTest() {
	// get a set of the keys in the map named words
	Object[] keys = words.keySet().toArray();
	int numkeys = keys.length;

	Random r = new Random();

	for (int i=0;i<NUM_ACCESSES;i++) {
	  String thekey = (String) keys[r.nextInt(numkeys)];
	  Object o = words.get(thekey);
	}
  }


  // Prints out a list of the word count for each word found.
  // If the map used is sorted (like a TreeMap), this list will
  // appear sorted (the list will be alphabetical).

  void printWordList() {
	// get a set of the keys in the map named words
	Set keys = words.keySet();

	// create an iterator we can use to go through the
	// keys in the set
	Iterator i = keys.iterator();

	while (i.hasNext()) {
	  String wrd = (String) i.next();
	  System.out.println(wrd + ": " + words.get(wrd));
	}
  }

}



