/*
 * File: Dictionary.java
 * @author R. Morelli
 * Date: 6/4/2002
 *
 * This file implements a searchable dictionary that can be used
 *  by cryptanalysis objects. Each entry in the dictionary consists
 *  of a word and its relative frequency. The frequency is stored
 *  as an int represented as a String. For example,
 *    "the" "38940"
 *    "a"  "32901"
 *
 *  To Test: java Dictionary /usr/share/lib/dict/words
 */

package hcrypto.analyzer;

import java.util.*;
import java.io.*;

public class Dictionary {

    protected static final float LOAD_FACTOR = (float) 0.5;
    public static final int BIG_DICT = 0;
    public static final int KUCERA_340 = 1;
    public static final int KUCERA_100 = 2;
    public static final int KUCERA_50 = 3;
    public static final int KUCERA_3500 =4;

    public static final int MIN_FREQ = 0;  // Cutoff for words in dictionary

    protected Hashtable dict;             // = new Hashtable(200000, (float)0.5);

    protected int min_freq = MIN_FREQ;
    protected double total_freq = 0;

    /**
     * The default constructor reads words from the Unix
     *  word list.
     */
    public Dictionary() {       
	dict = new Hashtable(200000, LOAD_FACTOR);
    }

    public Dictionary(String filename) {
	this();
	init(filename, true, min_freq);
    }

    /**
     * This constructor creates a dictionary from the named file.
     *  If linebreaks is true, it assumes the words are listed one
     *  per line. Otherwise, it assumes words are listed several per line.
     * @param filename a String giving the name of the dictionary file
     * @param linebreaks set to true iff the words are listed one per line
     */
    public Dictionary(String filename, boolean linebreaks) {
	this();
        init(filename, linebreaks, min_freq);
    }


    public Dictionary(String filename, boolean linebreaks, int minfreq) {
	this();
        init(filename, linebreaks, minfreq);
    }

    private void init(String filename, boolean linebreaks, int minfreq) {
        System.out.println("DICTIONARY= "+ filename);
	StringTokenizer st = null;
	String line = null, word = null, freqSt = "0";
	int freq = minfreq;
	total_freq = 0;

        try {
            BufferedReader inStream = 
                new BufferedReader(new FileReader(filename));
            line = inStream.readLine();
            if (linebreaks) {
                while (line != null) {
		    st = new StringTokenizer(line);    // JUST IN CASE FORMAT IS: WORD FREQ
		    word = st.nextToken().toLowerCase();
		    //		    System.out.println("Word= " + word);
		    if (st.hasMoreTokens()) {
			freqSt = st.nextToken();
			freq = Integer.parseInt(freqSt);
			total_freq += freq;
		    }
		    if (freq >= minfreq) {
			dict.put(word, freqSt);
			//			System.out.println("Inserted " + word + " freq= " + freq);
		    }
                    line = inStream.readLine();
                }
            } else {  // NO LINE BREAKS
                while (line != null) {
		    System.out.println("Line= " + line);
                    st = new StringTokenizer(line);
                    while (st.hasMoreTokens()) {
                        String s = st.nextToken();
//                        System.out.println("Inserting " + s);
                        dict.put(s.toLowerCase(), s.toLowerCase());
                    }
                    line = inStream.readLine();
                }
	    }
            inStream.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        System.out.println("Dictionary size = " + dict.size() + " with total frequencies " + total_freq);
    }

    public boolean contains (String word) {
	return dict.containsKey(word);
    }

    public double getFreq (String word) {
	String freqStr = (String)dict.get(word);
	if (freqStr != null)
	    return Integer.parseInt(freqStr) / total_freq;
	else
	    return 0;
    }

    public int size() {
        return dict.size();
    }

    public String getDescriptor(int i) {
	switch (i) {
	case 1: return "Kucera340";
	case 2: return "Kucera100";
	case 3: return "Kucera50";
	case 4: return "Kucera3500";
	case 5: return "Kucera10";
	default: return "Invalid Dictionary number: " + i;
	}

    }

    public static void main(String args[]) {
        Dictionary d;
        if (args.length < 1) {
            System.out.println("Usage: java Dictionary filename [nolinebreaks]");
            d = new Dictionary();
        } else {
            if (args.length == 2)
		d = new Dictionary(args[0], false);
            else
		d = new Dictionary(args[0], true);
        }
        System.out.println("The dictionary has " + d.size() + " words");

        try {
            BufferedReader input = new BufferedReader(new InputStreamReader(System.in));
            String word;
            System.out.print("Search for >>");
            word = input.readLine();
            while (word.length() != 0) {
                if (d.contains(word))
                    System.out.println(word + " is a word with frequency " + d.getFreq(word));
                else            
                    System.out.println(word + " is not found");
                System.out.print("Search for >>");
                word = input.readLine();
            } 
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
