/*
 * File: WordBasedGaPopulation.java
 * @author R. Morelli <ralph.morelli@trincoll.edu>
 * @author R. Walde <rwalde@nyc.rr.com>
 *
 * Description: This class implements an instance of a GaPopulation
 *  by providing implementations of the init()
 *  and run() methods for a WordBasedGaAnalyzer.
 */

package hcrypto.analyzer;

import java.util.*;
import java.text.*;

public class WordBasedGaPopulation extends GaPopulation {
    private static final int NO_CHANGE_LIMIT = 20;
    private static final int N_SHUFFLES = 50;

    /**
     * These variables are used by word-based Population.
     */
    protected PatternDictionary seed_dict;   // The seeding dictionary
    protected Dictionary eval_dict;     // The evaluation dictionary

    private Token tokens[];           // An array of the tokens in the message
    private WordPairs pairs[];        // Pairs of crypto/plain from dictionary
    private int nPairs;
    private int nTokens, nWords;       // Number of words and tokens in message
    private int nWordsSolution;        // Number of words in the solution, if available
    private String message_unused;     // Alphabet letters not used in the message
    private int wordpairPtr;           // Points to next word pair in dict

    private int counter[];
    private boolean pairchecker[];
    private int nTweaks = 0;
    private NumberFormat num;
    private int keyCount = 0;
    private int charErrs = 0;
    private double percentTokens = 0;

    /**
     * WordBasedGaPopulation() constructor
     * @param text -- the text to be cryptanalyzed
     * @param params -- a collection of parameters to control the run
     */
    public WordBasedGaPopulation (String text, String solution, GaParameters params) {
	super(text, solution, params);
	//	System.out.println("CLEANTEXT= " + cleantext + "ENDCLEANTEXT");
	num = NumberFormat.getInstance();
	num.setMaximumFractionDigits(2);
    }

    /**
     * This method initializes the analyzer. It sorts the tokens in the
     *  cryptogram to give preference to those with the smallest (non empty)
     *  pattern sets. It then creates the population, giving each individual
     *  an encryption key based on a pairing of a token from the cryptogram
     *  and a word from the pattern word dictionary.
     */
    public void init() {
	System.out.println("SEED DICTIONARY");
	seed_dict = new PatternDictionary(params.seed_dict);
	System.out.println("EVAL DICTIONARY");
	if (params.eval_dict == Dictionary.BIG_DICT)
	    eval_dict = new Dictionary(params.dict_name);
	else
	    eval_dict = new PatternDictionary(params.eval_dict);

        nTokens = TextUtilities.countTokens(cleantext);
        nWords = TextUtilities.countWords(eval_dict, cleantext); 
	nWordsSolution = TextUtilities.countWords(eval_dict, solution.toLowerCase());
	System.out.println("SOLUTION: (" + nWordsSolution + ") " + solution);
        message_unused = TextUtilities.getUnusedLetters(cleantext);
	//        System.out.println("NODUPS=" + cleantext + " nwords= " + nWords + " ntokens= " + nTokens);
	tokens = initWordArray(cleantext);
	java.util.Arrays.sort(tokens);      // Arrange tokens by pattern value
	//	for (int k = 0; k < tokens.length; k++)
	//	    System.out.println(tokens[k].getToken() + " " + tokens[k].getPattern() + "(" + tokens[k].getScore() + ")");
	pairs = makeWordPairs(tokens, nTokens * seed_dict.nWords());
	if (nPairs == 0)
	    seeding = GaParameters.NO_SEEDING;   // Guard against an empty pattern sets.

	//        individual = new WordBasedGaIndividual[size];     // CHILDREN REPLACE PARENTS
        individual = new WordBasedGaIndividual[size * 2];      // PARENTS AND CHILDREN COMPETE
	System.out.println("NPAIRS = " + nPairs + " POPULATION SIZE = " + size);

	String key = null;

	wordpairPtr = 0;
	counter = new int[10];
	pairchecker = new boolean[nPairs];

	switch (seeding) {
	case GaParameters.RANDOM_SEEDING:
	    System.out.println("USING RANDOM SEEDING"); break;
	case GaParameters.NO_SEEDING:
	    System.out.println("USING NO SEEDING"); break;
	default:
	    System.out.println("USING DETERMINISTIC SEEDING"); break;
	}

	for (int k = 0; k < individual.length; k++) {
	    key = makeKey();
            individual[k] = new WordBasedGaIndividual(cleantext, key, eval_dict, params);
	    ++keyCount;
	    //	    System.out.println("individual[" + k + "]: " + individual[k].getKey() + " (" + individual[k].getFitness() + ")");
	}
    
        // THE FOLLOWING CODE PRINTS OUT ANALYSIS OF THE SEEDING MECHANISM
	/*************
	System.out.println("No of Keys having K Seeds");
	for (int k=0; k < counter.length; k++) {
	    System.out.print (k + "=" + counter[k] + " ");
	}
	System.out.println();
	System.out.println("The following pairs were NOT used in KEYS");
	int sum = 0;
	for (int k=0; k < pairchecker.length; k++) {
	    if (!pairchecker[k]) {
		System.out.println(k + " " + pairs[k].crypto + " " + pairs[k].plain);
		++sum;
	    }
	}
	System.out.println("Pairs not used = " + sum + " out of " + nPairs);
	**************/
    }

    /**
     * This method converts its parameter into an array of Tokens. It
     * uses a StringTokenizer that filters most punctuation from the text.
     * @param text a String representing the text.
     */
    private Token[] initWordArray(String text) {
        StringTokenizer st = new StringTokenizer(text);
        Token tokens[] = new Token[st.countTokens()];
        int k = 0;
	//        System.out.println("THE CLEAN MESSAGE: " + st.countTokens() + " tokens");
        while (st.hasMoreTokens()) {
	    tokens[k] = new Token(st.nextToken(), seed_dict);
	    //            System.out.print(tokens[k].getToken() + " ");
            ++k;
	}
	//	System.out.println();
        return tokens;
    }


    /**
     * This method creates an array of all word pairs (c,p),
     *  where c is a token from the cryptogram and p is a plaintext
     *  word from c's pattern set.  
     * @param tokens is an array of Tokens from the cryptogram
     * @param size is the bound on the size of the array
     */
    private WordPairs[] makeWordPairs(Token[] tokens, int size) {
	WordPairs pairs[] = new WordPairs[size];
	nPairs = 0;
	int k = 0;
	int t = 0;
	Token token = tokens[t++]; // Get the next token
	StringTokenizer st = new StringTokenizer(seed_dict.getWordList(token.getToken()));
//	System.out.println(seed_dict.getWordList(token.getToken()));
	while (st != null && k < size && t < tokens.length) {
	    if (!st.hasMoreTokens()) {
		token = tokens[t++];      // Get the next token
		st = new StringTokenizer(seed_dict.getWordList(token.getToken()));
//		System.out.println(seed_dict.getWordList(token.getToken()));
	    }
	    if (st != null && st.hasMoreTokens()) {
		String w = st.nextToken();
//		System.out.println("makePairs: crypto = " + token.getToken() + " plain " + w);
		pairs[k] = new WordPairs(token.getToken(), w);
		++k;
	    }
	}
	nPairs = k;
	//	System.out.println("nPairs = " + nPairs + " size = " + size);
	return pairs;
    }

    /**
     * This method creates an encryption key that is then used to
     *  seed the individual members of the population. The word pairs
     *  are created during initialization and stored in pairs[]. 
     *  Pairs of crypto,plain are selected at random.
     */
    private String makeKey() {
	switch (seeding) {
	case GaParameters.NO_SEEDING:
	    return randomKey();
	default:
	    return seedKey(seeding);
	}
    }

    /**
     * Returns a random permutation of "a..z"
     */
    private String randomKey() {
	String alpha = new String("abcdefghijklmnopqrstuvwxyz");
	return shuffle(alpha);
    }


    private String seedKey(int seedtype) {
	StringBuffer alpha = new StringBuffer("abcdefghijklmnopqrstuvwxyz");
	StringBuffer key =   new StringBuffer("**************************");
	StringBuffer used = new StringBuffer();
	String crypto = null;
	String plain = null;
	int p = 0;
	int nWords = 0;
	boolean fits = true;

	if (seedtype == GaParameters.RANDOM_SEEDING) {
	    do {
		p = (int)(Math.random() * nPairs);  // RANDOM VERSION
		crypto = pairs[p].crypto;
		plain = pairs[p].plain;
		//	    System.out.println("SEEDING: " + crypto + " " + plain);
		if (fits = tokenWordPairFits(key, crypto, plain)) {
		    pairchecker[p] = true;
		    ++nWords;
		    for (int j = 0; j < crypto.length(); j++)
			key.setCharAt(plain.charAt(j) - 'a', crypto.charAt(j));
		    used.append(crypto);
		}
	    } while (fits);

	} else { /****************  USE DETERMINISTIC SEEDING  ***********/
	    //	    do {
		p = wordpairPtr;                            // DETERMINISTIC SEEDING
		//		do {
		wordpairPtr = (wordpairPtr + 1) % nPairs;       // DETERMINISTIC SEEDING
		//		} while (pairs[p].crypto.equals(pairs[wordpairPtr].crypto));

		crypto = pairs[p].crypto;
		plain = pairs[p].plain;
		//	    System.out.println("SEEDING: " + crypto + " " + plain);
		if (fits = tokenWordPairFits(key, crypto, plain)) {
		    pairchecker[p] = true;
		    ++nWords;
		    for (int j = 0; j < crypto.length(); j++)
			key.setCharAt(plain.charAt(j) - 'a', crypto.charAt(j));
		    used.append(crypto);
		}
		//	    } while (fits);
	}
	//	System.out.println("alpha: " + alpha + "\n  key: " + key + " (" + nWords + ")");
	counter[nWords]++;
	finalizeKey(key, used);
	//	System.out.println("key = " + key.toString());
	return key.toString();
    }

    /**
     * This method determines if a randomly select token-word pair
     *  fits into the partially constructed key.
     * @param key a StringBuffer holding the key
     */
    private boolean tokenWordPairFits (StringBuffer key, String crypto, String plain) {
	boolean fits = true;
	//    	    System.out.println("makeKey()  " + crypto + " " + plain);
	for (int k = 0; k < crypto.length(); k++) {
	    char w1 = crypto.charAt(k);
	    char t1 = plain.charAt(k);
	    char et1 = key.charAt(t1 - 'a');  // Encrypt of t1
	    if (et1 != '*' && et1 != w1) {
		fits = false;
	    }
	    if (fits) {
		int indx = key.toString().indexOf(w1);
		char dw1 = '*';
		if (indx != -1)
		    dw1 = (char)('a' + indx);        // Decrypt of w1
		//	    System.out.println("w1= " + w1 + " t1= " + t1 + " et1= " + et1 + " dw1= " + dw1);
		if (dw1 != '*' && dw1 != t1) {
		    fits = false;
		}
	    }
	}
	return fits;
    }

    /**
     * This method fills in the partially constructed key with those
     *  letters that its missing. 
     */
    private void finalizeKey(StringBuffer key, StringBuffer used) {
	String usedStr = used.toString();	// Find the unused characters.
	StringBuffer unused = new StringBuffer();
	for (char ch = 'z'; ch >= 'a'; ch--)        // Generate unused chars
	    if (usedStr.indexOf(ch) == -1)
		unused.append(ch);

	shuffle(unused);                          // Shuffle them a little
	
//	System.out.println("Used: " + usedStr + " unused: " + unused.toString());

	int m = 0;                             // Fill in rest of key
	for (int k = 0; k < key.length(); k++) {
	    if (key.charAt(k) == '*') {
		key.setCharAt(k, unused.charAt(m));
		++m;
	    }
	}
    }

    /**
     * This method implements the abstract run() method inherited from
     *  GaPopulation.
     */
    public void run() {
	printStartMessage();
	if (selection_policy == GaParameters.ELITIST_SELECTION) 
	    System.out.print("ELITIST SELECTION ");
	else 
	    System.out.print("PROPORTIONAL SELECTION ");
	if (mutate_policy == GaParameters.RANDOM_MUTATION) 
	    System.out.println("RANDOM MUTATION");
	else 
	    System.out.println("ELITIST MUTATION ");

	java.util.Arrays.sort(individual);    // SELECTION: Sort the population
        do {
	    //	    if (tweakingOn && iterations - lastScoreChange > tweak_at) {
	    //		tweak();
	    //		++nTweaks;
		//	    if (tweakingOn && iterations - lastScoreChange > tweak_at && iterations + (2 * tweak_at) < maxtrials) {
		//		tweak();
	    //	    }

	    int ptr = size;
	    for (int k = 0; k < size/2; k++) {             // Perform crosses on k and j
		//		int j = size/2 + (int)(Math.random() * size/2);
		int j = (int)(Math.random() * size);
		double rand = Math.random();
                if (rand <= cross_rate) {
		    GaIndividual i1 = new WordBasedGaIndividual(individual[k]);
		    GaIndividual i2 = new WordBasedGaIndividual(individual[j]);
		    i1.cross(i2);
		    //		    individual[k] = i1;       // CHILDREN REPLACE PARENTS
		    //		    individual[j] = i2;
		    individual[ptr++] = i1;   // CHILDREN AND PARENTS COMPETE
		    individual[ptr++] = i2;
		    
		    //		    displayCrossData(individual[k], i1);
		    //		    displayCrossData(individual[j], i2);
		    //		    if (isBetter(i1, individual[k]))       // Select fitter
		    //			individual[k] = i1;
		    //		    if (isBetter(i2, individual[j]))
		    //			individual[j] = i2;
		}
	    }
	    mutateAll();                          // Mutate all
            ++iterations;

	    java.util.Arrays.sort(individual);    // ELITE SELECTION: Sort the population
	    //	    displayAll();
	    if (selection_policy == GaParameters.PROPORTIONAL_SELECTION) 
		selectProportional();             // PROPORTIONAL SELECTION
	    //	    if (iterations % 10 == 0) 
	    //		displayAll();
	    updateScore();
	    keyCount += individual.length;
	    // COUNT ERRORS
	    percentTokens = TextUtilities.percentWords(solution,individual[0].getDecrypt());
	    charErrs = TextUtilities.countInCorrectChars(solution,individual[0].getDecrypt());

	    if (verbose) 
		displayBest();
	    if (display != null)
		display.setText(individual[0].getDecrypt());
	    //	    if (nWords > 0 && individual[0].getFitness() >= nWords && gotall==0)
	    //		gotall = iterations;
		
	    //        } while (iterations < maxtrials && !success(individual[0]));
	    //        } while (iterations < maxtrials && (iterations - lastScoreChange) < 10);
        } while (iterations < maxtrials);
	//        } while (iterations < maxtrials && charErrs > 0);
	//        } while (iterations < maxtrials && !success(individual[0]) && (display == null || !display.threadIsStopped()));
	displaySummary();
	System.out.println("Unused = " + ((WordBasedGaIndividual)individual[0]).unused_decrypt + "=" + message_unused);
    }


    private boolean success(GaIndividual indy) {
	if (bestScore == individual[individual.length/2].getFitness()) {
	    return true;
	}
	return false;
	    
	//	return ((WordBasedGaIndividual)indy).unused_decrypt.length() == message_unused.length();  // RAM 5/10/03
	//	    || ((WordBasedGaIndividual)indy).getFitness() >= nWords && nWords > 1
	    //	 return ((WordBasedGaIndividual)indy).getFitness() >= nWordsSolution && !solution.equals("");
	    //	 || (nTweaks > 0 && ((WordBasedGaIndividual)indy).getFitness() >= nWordsSolution && !solution.equals(""));
    }

    public GaIndividual getFittest(int n) {
        return individual[n];
    }

    /**
     * shuffle() randomly shuffles the characters in a StringBuffer.
     */
    private void shuffle(StringBuffer sb) {
	sb = new StringBuffer(shuffle(sb.toString()));
    }

    /**
     * shuffle() randomly shuffles the characters in a String.
     */
    private String shuffle(String s) {
	StringBuffer sb = new StringBuffer(s);
	for (int k = 0; k < N_SHUFFLES; k++) {
            int a = (int)(Math.random() * sb.length());
            int b = (int)(Math.random() * sb.length());
            char ch = sb.charAt(a);
            sb.setCharAt(a, sb.charAt(b));
            sb.setCharAt(b, ch);
	}
	return sb.toString();
    }

    private void tweak() {
	if (verbose)
	    System.out.println("$$$$$$$$$$$$$$$$ Tweaking $$$$$$$$$$$$$$$$$$$$");
	for (int k = 0; k < size; k++)   {      // REPLACE THE ENTIRE POPULATION
	    individual[k] = new WordBasedGaIndividual(cleantext, makeKey(), eval_dict, params);
	}

	java.util.Arrays.sort(individual);    // Sort the population
	//	displayAll();
	lastScoreChange = iterations;
	previousBestScore = 0;
	bestScore = individual[0].getFitness();
    }

    private void displayCrossData(GaIndividual parent, GaIndividual child) {
	System.out.println("    " + "abcdefghijklmnopqrstuvwxyz");
	System.out.println("P : " + parent.displayCrossData());
	System.out.println("C : " + child.displayCrossData());
    }

    private void printStartMessage() {
        System.out.println("STARTING ANALYSIS" +
                           "\t nTokens " + nTokens + 
                           "\t nWords " + nWords +
                           "\t nWordsSolution " + nWordsSolution +
                           "\t nChars " + cleantext.length() + 
                           "\t nIndivs " + size + 
                           "\nTEXT: " + cleantext + "\n");
    }

    private boolean isBetter(GaIndividual child, GaIndividual parent) {
	double  better = child.getFitness() - parent.getFitness();
	if (better > 0)
	    improved++;
	else if (better < 0)
	    worsened++;
	else
	    nochange++;
	return better > 0;
    }

    private void displayAll() {
	for (int k = 0; k < size; k++)
	    System.out.println("---------------- " + k + " " + individual[k].displayCrossData());
    }

    private void mutateAll() {
	mutated=0;
	for (int k = 0; k < size * 2; k++) { // Perform Mutations
	    mutated += individual[k].mutate(mutate_rate);
	}
    }

    public void displayBest() {
	System.out.println(iterations + " " +  num.format(bestScore) + " " + num.format(percentTokens) + "% " + charErrs + " ERRS " 
			   + keyCount + " KEYS "
			   + individual[0].getKey()
			   //			   + individual[0].toString()
			   //			   + " (" + improved + "," + worsened + "," + nochange + ")"
			   + " mutated=" + mutated
			   + " median = " + num.format(individual[individual.length/4].getFitness())
			   + " worst = " + num.format(individual[individual.length/2].getFitness())
			   );
    }

    public void displaySummary() {
        System.out.println("Finished: Iterations = " + iterations + " Best score is " + num.format(bestScore) + " KeyCount = " + keyCount);
	//        System.out.println("Finished: Iterations = " + iterations + " Best score is " + num.format(bestScore));
	//        System.out.println("Finished: Iterations = " + iterations + " Tweaks = " + nTweaks + " Max score is " + nWords);
	//        System.out.println("Got all " + nWords + " words at iteration number = " + gotall);
        System.out.println("Crosses improved= " + improved + " worsened= " + worsened + " nochange= " + nochange);
    }

    private void updateScore() {
	bestScore = individual[0].getFitness();
	if (bestScore > previousBestScore) {
	    previousBestScore = bestScore;
	    lastScoreChange = iterations;
	}
    }

    public PatternDictionary getSeedDict() { return seed_dict; }
    public Dictionary getEvalDict() { return eval_dict; }

    
}
