/** * Dictionary generator for the text/HTML word counter. * @author Robert J Morton * @version 04 September 2000 * @copyright Sep 2000 Robert J Morton (all rights reserved) */ /* This program accepts a word from from wc.class. If the word is not currently in the dictionary, it adds it to the dictionary. If the word is already in the dictionary, it increments its number of occurrences figure. It produces the dictionary as a file called dic.txt. This lists words and their numbers of occurrences in both alphabetical order and in descending order of occurrences. */ import java.io.*; // for saving dictionary class dic { private dicEnt // reference to an array of dictionary entries AE[], // for alphabetic dictionary NE[]; // for occurrences dictionary private int N = 0, // current number of entries n = 0; // current entry number private String W, // for word which is being submitted to the dictionary w; // the above cast to lower case dic() { // Construct the dictionary array AE = new dicEnt[100000]; // array of references to dictionary entries NE = new dicEnt[100000]; // ditto for sorting by number of occurrences } /* FIND WORD IN DICTIONARY BY BINARY SLICE SEARCH Called from only one place in submit(). */ private boolean find() { if(N == 0) // If there's nothing in the dictionary, return false; // go straight to enter new word. int x; // 3-state string comparison variable n = N >> 1; // start with the keyword half way up the index int j = n; // jump size /* While the jump size, having been halved, > zero, compare the sub- mitted word with the nth word in dictionary. If the submitted word, w > the retrieved word, we are too far down the index; so split the partition. Else if the submitted word, w < the retrieved word, we are too far up the index; so split the partition. Otherwise, we've found the exact word to be in the dictionary already, so return TRUE. */ while((j >>= 1) > 0) { x = w.compareTo(AE[n].getword()); if(x > 0) n += j; else if(x < 0) n -= j; else return true; } /* We're now too close to the matching entry to continue using the binary search. */ boolean u = false, // true indicates going up! d = false; // true indicates going down! /* While we've not yet reversed our direction of search along the index, compare the submitted word with the 'n'th word in the dictionary. */ while(!(u && d)) { x = w.compareTo(AE[n].getword()); /* If w > retrieved word, we are too far down the index, so: If, while stepping up, we overshoot the beginning of of the index, return with 'n' pointing at blank entry above current highest. If we have just reversed the direction of search return with 'n' pointing at the position where 'w' must be inserted. In either case, set the 'u' flag to show that we have just moved up the index. */ if(x > 0) { if(++n == N) break; if(d) break; u = true; } /* Else w must be < retrieved word, so we are too far up the index, so: If, while stepping down, we overshoot the end of the index, return with 'n' pointing at the position where w must be inserted. */ else if(x < 0) { if(u) break; if(--n < 0){ n = 0; break; } d = true; // In either case, set 'd' to indicate that } // we have just moved down the index. /* if submitted word = nth word in dictionary (exact match found), return true with n pointing at the matching dictionary entry. */ else return true; } return false; // keyword cannot be found so return false } /* Submit a word to the dictionary together with its capitalisation status. Called from only one place in wordCnt.wordCounter(). */ void submit(String q) { W = q; // make new submitted word visible to all methods w = q.toLowerCase(); // forced lower case version of the submitted word if(find()) { // if word already in the dictionary /* If word already in dictionary starts with a capital and identical submitted word starts with a small letter the small letter version takes precidence, so store it. */ if(Character.isUpperCase(AE[n].getWord().charAt(0)) && Character.isLowerCase(W.charAt(0))) AE[n].putWord(W); AE[n].incr(); // increment its number of occurrence } /* Else the word does not yet appear in the dictionary, so provided there is still room in dictionary entries reference array, move all higher entries up by one and insert the new entry and incre- ment the total number of entries in the dictionary. */ else if(N < AE.length) { for(int i = N; i > n; i--) AE[i] = AE[i - 1]; AE[n] = new dicEnt(W); N++; } // Else we need to enlarge the dictionary array and recompile. else System.out.println("DICTIONARY ENTRY REFERENCES ARRAY OVERFLOW!"); } /* AN ADAPTATION OF C.A.R. HOARE'S QUICK-SORT ALGORITHM Called only by itself and from only one place in save(). */ private void qs(int LO, int HI) { int lo = LO, // set moving lo to LO end of partition hi = HI; // set moving hi to HI end of partition if(HI <= LO) return; // exit if the partition contains nothing /* Get the content ofthe mid element of the partition, then loop through array until indices cross. */ int mid = NE[(LO + HI) >> 1].getOcc(); while(lo <= hi) { /* While lowest keyword < midway keyword, push the lower sort boundary up by one element. While highest keyword > midway keyword, pull the upper sort boundary down by one element. */ while(lo < HI && NE[lo].getOcc() < mid) lo++; while(hi > LO && NE[hi].getOcc() > mid) hi--; /* IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' AS FOLLOWS: Get index of lo element, put index of hi element in lo element, put index of lo element in hi element, push lower sort boundary up by one element, pull upper sort boundary down by one element. */ if(lo <= hi) { dicEnt x = NE[lo]; NE[lo] = NE[hi]; NE[hi] = x; lo++; hi--; } } // end of while() loop if(LO < hi) // If 'hi' has not yet reached the start of the file, qs(LO,hi); // sort the lower partition. if(lo < HI) // If 'lo' has not yet reached the end of the file, qs(lo,HI); // sort the upper partition. } /* SAVE DICTIONARY TO FILE dic.txt Called from only one place in wc.main(). */ void save() { /* Copy all the dictionary entry references to the 'number of occurrences' array.*/ for(int i = 0; i < N; i++) NE[i] = AE[i]; System.out.print("Sorting dictionary in descending "); System.out.print("order of word occurrences ......"); qs(0,N - 1); // do a C.A.R. Hoare QuickSort on NE[] System.out.println(" done."); // advise console when sort is finished try { // for capturing file save IO Exceptions System.out.print("Saving dictionary to file 'dic.txt' ... "); // Create a buffered file output stream for dic.txt BufferedWriter o = new BufferedWriter( new OutputStreamWriter( new FileOutputStream("dic.txt") ) ); /* Write column headings to the output file dic.txt terminated by a system-specific 'new-line' */ String s = "WORDS ALPHABETICAL OCCURRENCES " + "WORDS BY OCCURRENCE OCCURRENCES"; o.write(s,0,s.length()); o.newLine(); /* For each entry in dictionary, format the [next] alphabetic entry, add a separator, format [next] occurrence entry, write the line to the file and terminate it with a system-specific 'new-line'. */ for(int i = 0; i < N; i++) { s = AE[i].format() + " " + NE[N - i - 1].format(); o.write(s,0,s.length()); o.newLine(); } /* Miss a line, then write the TOTALS line, terminated by a carriage- return, to the output file dic.txt then close the file. */ o.newLine(); s = "TOTAL VOCABULARY = " + N + "words."; o.write(s,0,s.length()); o.newLine(); o.close(); System.out.println(" done."); // advise colsole that dictionary stored } /* Advise console of location and nature of any malfunc- tions that may have occurred while saving the file. */ catch(IOException e) { System.out.println("Save: " + e); } } } class dicEnt { // Class of dictionary entries private String W; // the word as it appears private String w; // the word in lower case for sorting purposes private int o; // number of occurrences of word within all files dicEnt(String s) { // Construct a new dictionary entry W = s; // write submitted new word into class instance w = s.toLowerCase(); // store the word in lower case form o = 1; // this is the first occurrence of this word } void incr() {o++;} // increment the word's occurrence count String getWord() {return W;} // return the word as is String getword() {return w;} // return the word in lower case int getOcc() {return o;} // return its number of occurrences so far // overwrite word in this entry with W void putWord(String W) {this.W = W;} String format() { // convert this dictionary entry to a formatted string String s = W; // get the word string // extend it with spaces to make it 21 characters long for(int i = s.length(); i < 21; i++) s += " "; // convert its number of occurrences to a string String t = Integer.toString(o); // pad it with leading spaces to make it 11 characters long for(int i = t.length(); i < 11; i++) t = " " + t; return s + t; // return the concatenated strings } }