/**
* Dictionary generator for the text/HTML word counter.
* @author Robert J Morton
* @version 04 September 2000
* @copyright Sep 2000 Robert J Morton (all rights reserved) */
/* This program accepts a word from from wc.class. If the word is not
currently in the dictionary, it adds it to the dictionary. If the word
is already in the dictionary, it increments its number of occurrences
figure. It produces the dictionary as a file called dic.txt. This lists
words and their numbers of occurrences in both alphabetical order and
in descending order of occurrences. */
import java.io.*; // for saving dictionary
class dic {
private dicEnt // reference to an array of dictionary entries
AE[], // for alphabetic dictionary
NE[]; // for occurrences dictionary
private int
N = 0, // current number of entries
n = 0; // current entry number
private String
W, // for word which is being submitted to the dictionary
w; // the above cast to lower case
dic() { // Construct the dictionary array
AE = new dicEnt[100000]; // array of references to dictionary entries
NE = new dicEnt[100000]; // ditto for sorting by number of occurrences
}
/* FIND WORD IN DICTIONARY BY BINARY SLICE SEARCH
Called from only one place in submit(). */
private boolean find() {
if(N == 0) // If there's nothing in the dictionary,
return false; // go straight to enter new word.
int x; // 3-state string comparison variable
n = N >> 1; // start with the keyword half way up the index
int j = n; // jump size
/* While the jump size, having been halved, > zero, compare the sub-
mitted word with the nth word in dictionary. If the submitted word,
w > the retrieved word, we are too far down the index; so split the
partition. Else if the submitted word, w < the retrieved word, we are
too far up the index; so split the partition. Otherwise, we've found
the exact word to be in the dictionary already, so return TRUE. */
while((j >>= 1) > 0) {
x = w.compareTo(AE[n].getword());
if(x > 0)
n += j;
else if(x < 0)
n -= j;
else
return true;
}
/* We're now too close to the matching entry
to continue using the binary search. */
boolean
u = false, // true indicates going up!
d = false; // true indicates going down!
/* While we've not yet reversed our direction of search along the index,
compare the submitted word with the 'n'th word in the dictionary. */
while(!(u && d)) {
x = w.compareTo(AE[n].getword());
/* If w > retrieved word, we are too far down the index, so: If, while
stepping up, we overshoot the beginning of of the index, return with
'n' pointing at blank entry above current highest. If we have just
reversed the direction of search return with 'n' pointing at the
position where 'w' must be inserted. In either case, set the 'u'
flag to show that we have just moved up the index. */
if(x > 0) {
if(++n == N)
break;
if(d)
break;
u = true;
}
/* Else w must be < retrieved word, so we are too far up the index,
so: If, while stepping down, we overshoot the end of the index, return
with 'n' pointing at the position where w must be inserted. */
else if(x < 0) {
if(u)
break;
if(--n < 0){
n = 0;
break;
}
d = true; // In either case, set 'd' to indicate that
} // we have just moved down the index.
/* if submitted word = nth word in dictionary (exact match found),
return true with n pointing at the matching dictionary entry. */
else
return true;
}
return false; // keyword cannot be found so return false
}
/* Submit a word to the dictionary together with its capitalisation
status. Called from only one place in wordCnt.wordCounter(). */
void submit(String q) {
W = q; // make new submitted word visible to all methods
w = q.toLowerCase(); // forced lower case version of the submitted word
if(find()) { // if word already in the dictionary
/* If word already in dictionary starts with a capital
and identical submitted word starts with a small letter
the small letter version takes precidence, so store it. */
if(Character.isUpperCase(AE[n].getWord().charAt(0))
&& Character.isLowerCase(W.charAt(0)))
AE[n].putWord(W);
AE[n].incr(); // increment its number of occurrence
}
/* Else the word does not yet appear in the dictionary, so provided
there is still room in dictionary entries reference array, move
all higher entries up by one and insert the new entry and incre-
ment the total number of entries in the dictionary. */
else if(N < AE.length) {
for(int i = N; i > n; i--)
AE[i] = AE[i - 1];
AE[n] = new dicEnt(W);
N++;
}
// Else we need to enlarge the dictionary array and recompile.
else System.out.println("DICTIONARY ENTRY REFERENCES ARRAY OVERFLOW!");
}
/* AN ADAPTATION OF C.A.R. HOARE'S QUICK-SORT ALGORITHM
Called only by itself and from only one place in save(). */
private void qs(int LO, int HI) {
int
lo = LO, // set moving lo to LO end of partition
hi = HI; // set moving hi to HI end of partition
if(HI <= LO) return; // exit if the partition contains nothing
/* Get the content ofthe mid element of the partition,
then loop through array until indices cross. */
int mid = NE[(LO + HI) >> 1].getOcc();
while(lo <= hi) {
/* While lowest keyword < midway keyword, push the lower sort
boundary up by one element. While highest keyword > midway
keyword, pull the upper sort boundary down by one element. */
while(lo < HI && NE[lo].getOcc() < mid) lo++;
while(hi > LO && NE[hi].getOcc() > mid) hi--;
/* IF LOW INDEX <= HIGH INDEX SWAP THEIR 'CONTENTS' AS FOLLOWS: Get
index of lo element, put index of hi element in lo element, put
index of lo element in hi element, push lower sort boundary up
by one element, pull upper sort boundary down by one element. */
if(lo <= hi) {
dicEnt x = NE[lo];
NE[lo] = NE[hi];
NE[hi] = x;
lo++;
hi--;
}
} // end of while() loop
if(LO < hi) // If 'hi' has not yet reached the start of the file,
qs(LO,hi); // sort the lower partition.
if(lo < HI) // If 'lo' has not yet reached the end of the file,
qs(lo,HI); // sort the upper partition.
}
/* SAVE DICTIONARY TO FILE dic.txt
Called from only one place in wc.main(). */
void save() {
/* Copy all the dictionary entry references
to the 'number of occurrences' array.*/
for(int i = 0; i < N; i++)
NE[i] = AE[i];
System.out.print("Sorting dictionary in descending ");
System.out.print("order of word occurrences ......");
qs(0,N - 1); // do a C.A.R. Hoare QuickSort on NE[]
System.out.println(" done."); // advise console when sort is finished
try { // for capturing file save IO Exceptions
System.out.print("Saving dictionary to file 'dic.txt' ... ");
// Create a buffered file output stream for dic.txt
BufferedWriter
o = new BufferedWriter(
new OutputStreamWriter(
new FileOutputStream("dic.txt")
)
);
/* Write column headings to the output file dic.txt
terminated by a system-specific 'new-line' */
String s = "WORDS ALPHABETICAL OCCURRENCES "
+ "WORDS BY OCCURRENCE OCCURRENCES";
o.write(s,0,s.length());
o.newLine();
/* For each entry in dictionary, format the [next] alphabetic entry,
add a separator, format [next] occurrence entry, write the line
to the file and terminate it with a system-specific 'new-line'. */
for(int i = 0; i < N; i++) {
s = AE[i].format() + " " + NE[N - i - 1].format();
o.write(s,0,s.length());
o.newLine();
}
/* Miss a line, then write the TOTALS line, terminated by a carriage-
return, to the output file dic.txt then close the file. */
o.newLine();
s = "TOTAL VOCABULARY = " + N + "words.";
o.write(s,0,s.length());
o.newLine();
o.close();
System.out.println(" done."); // advise colsole that dictionary stored
}
/* Advise console of location and nature of any malfunc-
tions that may have occurred while saving the file. */
catch(IOException e) {
System.out.println("Save: " + e);
}
}
}
class dicEnt { // Class of dictionary entries
private String W; // the word as it appears
private String w; // the word in lower case for sorting purposes
private int o; // number of occurrences of word within all files
dicEnt(String s) { // Construct a new dictionary entry
W = s; // write submitted new word into class instance
w = s.toLowerCase(); // store the word in lower case form
o = 1; // this is the first occurrence of this word
}
void incr() {o++;} // increment the word's occurrence count
String getWord() {return W;} // return the word as is
String getword() {return w;} // return the word in lower case
int getOcc() {return o;} // return its number of occurrences so far
// overwrite word in this entry with W
void putWord(String W) {this.W = W;}
String format() { // convert this dictionary entry to a formatted string
String s = W; // get the word string
// extend it with spaces to make it 21 characters long
for(int i = s.length(); i < 21; i++)
s += " ";
// convert its number of occurrences to a string
String t = Integer.toString(o);
// pad it with leading spaces to make it 11 characters long
for(int i = t.length(); i < 11; i++)
t = " " + t;
return s + t; // return the concatenated strings
}
}