/**
* Title: MapPlay
* Description: Comparison of HashMaps and TreeMaps
*
* counts word frequencies in a text file using a map
* @author hollingd@cs.rpi.edu
*/
import java.util.*;
import java.io.*;
/**
* <p>CountWords is a demonstration class used for comparing the performance of
* HashMap and TreeMap classes with respect to the lookup of map elements.</p>
*
* <p>The main() method builds a hashmap containing the word frequiences of
* a file named on the command line, then performs many map accesses
* (a random sequence of accesses). The accesses are timed and the result
* is printed out. The entire process is then repeated, but the second time
* using a TreeMap instead of a HashMap.</p>
*/
public class CountWords {
static final int NUM_ACCESSES=100000;
Map words;
// Constructor takes a map object
CountWords(Map w) {
words = w;
}
// countWordsInFile reads from a file and counts the frequency of each word
// found in the file. uses the map words to keep track of
// the count.
//
// assumes that map named words already exists!
void countWordsInFile(String filename) throws IOException {
BufferedReader in = new BufferedReader( new FileReader(filename));
String s = in.readLine();
while (s != null) {
// split the line in to words
String[] tokens = s.split("[^a-zA-Z0-9]");
for (int i=0;i<tokens.length;i++) {
if (words.containsKey(tokens[i]) ) {
int cnt = ((Integer)words.get(tokens[i])).intValue();
cnt++;
words.put(tokens[i],new Integer(cnt));
} else {
words.put(tokens[i],new Integer(1));
}
}
s = in.readLine();
}
}
public static void main(String[] args) {
long starttime,diff;
CountWords x;
// first try with a hashmap
x= new CountWords(new HashMap());
try {
x.countWordsInFile(args[0]);
} catch (Exception e) {
e.printStackTrace();
}
starttime = System.currentTimeMillis();
x.randomAccessTest();
diff = System.currentTimeMillis()-starttime;
System.out.println("hashmap took " + diff + " ms.");
// try with a treemap
x = new CountWords(new TreeMap());
try {
x.countWordsInFile(args[0]);
} catch (Exception e) {
e.printStackTrace();
}
starttime = System.currentTimeMillis();
// access a bunch of elements in the map
x.randomAccessTest();
diff = System.currentTimeMillis()-starttime;
System.out.println("tree took " + diff + " ms.");
}
// this method accesses values in the map by selecting
// a random sequence of keys to use. The idea is to
// time this to get some idea of the relative time it
// takes to access keys (in no particular order) in
// various type of Maps.
//
void randomAccessTest() {
// get a set of the keys in the map named words
Object[] keys = words.keySet().toArray();
int numkeys = keys.length;
Random r = new Random();
for (int i=0;i<NUM_ACCESSES;i++) {
String thekey = (String) keys[r.nextInt(numkeys)];
Object o = words.get(thekey);
}
}
// Prints out a list of the word count for each word found.
// If the map used is sorted (like a TreeMap), this list will
// appear sorted (the list will be alphabetical).
void printWordList() {
// get a set of the keys in the map named words
Set keys = words.keySet();
// create an iterator we can use to go through the
// keys in the set
Iterator i = keys.iterator();
while (i.hasNext()) {
String wrd = (String) i.next();
System.out.println(wrd + ": " + words.get(wrd));
}
}
}