/*
 * File: TypeIIAnalyzer.java
 * @author R. Morelli <ralph.morelli@trincoll.edu> 
 * 
 * Description: This class assumes that the text is
 *  encrypted with a polyalphabetic Alberti cipher. It uses the
 *  Index of Coincidence (IC) to cryptanalyze it. This version
 *  attacks the "easy" Alberti---that is, it assumes that each
 *  plaintext characters is first substituted from the permutation
 *  alphabet and then shifted.  In the easy version, the IC can
 *  be used to determine when the correct set of shifts have been
 *  found, as outlined in the algorithm below.
 *
 * The "hard" version of Alberti is really the decrypt function, which
 *  first shifts each plaintext character and then substitutes
 *  for it from the permutation alphabet.  In the hard case,
 *  each plaintext character is encrypted by a composition 
 *  consisting of a substitution and a shift. This is equivalent
 *  to a double substitution. It is not possible to use IC
 *  to identify the correct set of shifts. 
 * 
 * Algorithm: A three step algorithm is used:
 *  1. Find the length of the shift keyword, k, using the IC
 *  2. Break the text into k simple substitution subtexts and use IC 
 *     to determine the shift of each subtext. These correspond to
 *     the letters in the shift keyword.
 *  3. Shift each subtext and merge them into a single message that will
 *     now be a simple subsitution cipher. 
 *  4. Solve the simple substitution cipher using NgramAnalyzer or some
 *     other analyzer.
 *
 *  To compile and run from the TestCryptAnalyzer application:
 *
 *  cd ~crypto/hcryptoj/1.4/applications/testanalyzer
 *  javac -classpath ../../classes -d ../../classes TypeIIAnalyzer.java
 *  java -classpath ../../classes:. TestCryptAnalyzer analyzers.TypeIIAnalyzer ga_paramfiles/albertiparam.txt
 */

package analyzers;

import hcrypto.analyzer.*;
import hcrypto.cipher.*;
import hcrypto.engines.*;
import hcrypto.provider.*;

import java.util.*;
import java.io.*;

public class TypeIIAnalyzer extends CryptoAnalyzer implements Analyzer {
    private final int KEYWD_MAX = 34;
    private final int MAX_TRIES = 7;            // Step 2   Magic numbers determined experimentally
    private final double IC_EPSILON = 0.008;    // Step 2
    private final int MAX_RETRIES = 15;         // Step 3
    //    private final double EVAL_EPSILON = 0.42;   // Step 3  How we know when text is decrypted
    private final double EVAL_EPSILON = 0.25; // 0.35;   // Step 3  How we know when text is decrypted
    private final double EVAL_DELTA = 0.0018;   // Step 3  How we know if a retry has improved decryption

    private IndexOfCoincidence IC;
    java.text.NumberFormat nf = java.text.NumberFormat.getNumberInstance();

    private String actualKeywd = "";  // The actual secret shift keyword 
    private int  actualKeywdLen;
    private String cleantext = "";
    private int shiftLen = 0;
    private int keyCount = 0;


    /**
     * TypeIIAnalyzer() -- Default constructor
     */
    public TypeIIAnalyzer() {
	super();
    }

    /**
     * TypeIIAnalyzer() -- this constructor is given an object containing parameter settings
     * @param params -- an object containing param1=val1 param2=val2 ...
     */
    public TypeIIAnalyzer(GaParameters params) {
	super(params);
    }

    /**
     * setup() sets up the text and the parameters.. 
     */
    public void setup(String text) {
	super.setup(text);
	if (params == null) {
	    params = new GaParameters(); // Default GaParameters
	}
    }

    /**
     * findShiftLength() determines the length of the Alberti shift key through
     *  an exhaustive search. For each possible length, up to KEYWD_MAX, we break
     *  the text into columns and compute the average IC of the columns.  The
     *  length with the greatest IC is returned.  As an optimization, we break
     *  out of the search when we find an IC within 0.008 of 0.066. This works well
     *  for shifts that are not prime numbers, whose multiples would frequently
     *  show a higher IC than the correct lower value. 
     */
    private int findShiftLength(String text) {
        double ioc[] = new double[KEYWD_MAX];   // Index of Coincidence of shifts of 1..10
	boolean done = false;
	int best = 1;
        double bestval = ioc[0];
        for (int k = 0; k < KEYWD_MAX; k++) {
	    double iclist[] = getICs(getKColumns(text,k+1));
	    ioc[k] = mean(iclist);
	    //            System.out.println("SUBTEXT["+(k+1)+"]\n " + subtext);
	    //	    if (params.verbose) 
		System.out.println("IC["+(k+1)+"] =" + ioc[k] + " Variance= " + variance(iclist,ioc[k]) + " Diff= " + Math.abs(ioc[k]-0.066));  // + "\t" + subtext);
            if (Math.abs(ioc[k]-0.066)  <  IC_EPSILON || ioc[k] > 0.066) {
		best = k+1;
		bestval = ioc[k];
		done = true;
		break;
	    }
	}
        for (int k = 1; !done && k < KEYWD_MAX; k++) { 	// If best value not already found, use the highest value
            if (ioc[k] >  bestval) {
                best = k+1;
                bestval = ioc[k];
	    }
	}
        return best;
    }

    /**
     * returns an array of the ICs for each string in the
     *  array, s.
     */
    private double[] getICs(String[] s) {
	double ic[] = new double[s.length];
	for (int k = 0; k < s.length; k++) {
	    IndexOfCoincidence IC = new IndexOfCoincidence(s[k]);
	    ic[k] = IC.getIOC();
	}
	return ic;
    }
    /**
     * returns the mean of an array 
     */
    private double mean(double[] a) {
	double sum = 0;
	for (int k = 0; k < a.length; k++)
	    sum += a[k];
	return sum/a.length;
    }
    /**
     * returns the variance of an array
     */
    private double variance(double[] a, double mean) {
	double v = 0;
	for (int k = 0; k < a.length; k++)
	    v += (a[k]-mean) * (a[k]-mean);
	return v/a.length;
    }


    /** 
     * getSumIC() concatenates the columns and returns their IC
     */
    private double getSumIC(String[] s) {
	String concat = "";	
	for (int k = 0; k < s.length; k++) 
	    concat += s[k];
	IndexOfCoincidence IC = new IndexOfCoincidence(concat);
        return IC.getIOC();
    }

    /**
     * getKColumns() converts the text, s, into an array of k columns. If k
     *  corresponds to the period of the Alberti cipher, then each column
     *  is a simple substitution cipher.
     * @param s -- the ciphertext
     * @param k -- the period
     */
    private String[] getKColumns(String s, int k) {
        String columns[] = new String[k];
        if (k <= 0) 
            throw new IllegalArgumentException("Skip size must be positive");
	for (int j = 0; j < k; j++) {
            StringBuffer sb = new StringBuffer();
            for (int i = j; i < s.length(); i+=k) 
                sb.append(s.charAt(i));
            columns[j] = sb.toString();
	    //            System.out.println("Column " + j + ":" + columns[j]);
	}
        return columns;
    }

    /**
     * mergeColumns() merges the columns into a single string of text.
     *  This method is used to reconstruct the original message
     * @param columns -- an array of the k simple substitution subtexts
     *  of the original message.
     */
    private String mergeColumns(String columns[]) {
	int d = Math.min(columns[0].length(), columns[columns.length-1].length());
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < d; k++) 
            for (int j = 0; j < columns.length; j++) 
                sb.append(columns[j].charAt(k));
	for (int j = 0; j < columns.length; j++)
	    if (columns[j].length() > d)
		sb.append(columns[j].charAt(d));
        return sb.toString();
    }

    /**
     * complementWord() computes the 26-ch complement of a word
     */
    private String complementWord(String s) {
	StringBuffer result = new StringBuffer();             
	for (int k = 0; k < s.length(); k++) 
	    result.append((char)('a' + ((26 - s.charAt(k) + 'a') % 26) )); // Complement of s
	return result.toString();
    }

    /**
     * Repairs s1, the shiftword we found. This is for development use.
     */
    private String repairShiftword(String s1, String s2) {
	StringBuffer sb = new StringBuffer(s1);
	int diffcount[] = new int[26];        // Each possible difference
	int diffs[] = new int[s1.length()];   // Each letter-pair

	int maxdiffcount = 0;
	int maxdiff = 0;
	for (int k = 0; k < s1.length(); k++) {
	    int diff = (26 + (s1.charAt(k)-'a') - (s2.charAt(k)-'a')) % 26;
	    //	    System.out.println(s1.charAt(k) + " - " + s2.charAt(k) + " = " + diff);
	    diffs[k] = diff;    // Store the difference for this letter pair
	    ++diffcount[diff];  // And how many times this occurs
	    if (diffcount[diff] > maxdiffcount) {
		maxdiffcount = diffcount[diff];
		maxdiff = diff;
	    }
	}
	for (int j = 0; j < diffs.length; j++)
	    if (diffs[j] != maxdiff) {
		char ch = (char)('a' + ((sb.charAt(j) -'a' + 26 - diffs[j] + maxdiff)%26));   // Repair characters
		char ch_decrypt = (char)('a' + (26-ch+'a'));
		System.out.print("Error at k=" + j + " ch='" + ch_decrypt + "'. In encryption keyword '" + s1 + "', difference should be " + maxdiff + " instead of " + diffs[j]);
		System.out.print(", '" +  s1.charAt(j) + "' should be '" + ch);
		sb.setCharAt(j,(char)('a' + ((sb.charAt(j) -'a' + 26 - diffs[j] + maxdiff) % 26)));
		System.out.println("' giving= '" + sb.toString() +"'");  
	    }
	return sb.toString();
    }

    /**
     * substitute() applies the permutation alphabet to a cryptotext
     */
    private String substitute(String s, String alpha) {
	StringBuffer sb = new StringBuffer();
	for (int k = 0; k < s.length(); k++)
	    sb.append(alpha.charAt(s.charAt(k)-'a'));
	return new String(sb.toString());
    }

    /**
     * shiftText() breaks text into keywd.length() columns,
     *  shifts each columns by the keywd[k], and returns the merged
     *  columns.
     */
    private String shiftText(String text, String keywd) {
	int period = keywd.length();
	String columns[] = new String[period];
	columns = getKColumns(text, period);
	for (int k = 0; k < period; k++) 
	    columns[k] = shift(columns[k], keywd.charAt(k)-'a');
	return mergeColumns(columns);
    }
    
    /**
     * shift() shifts each character in the string s by n, assuming a 26
     *  letter alphabet
     * @param s, a text
     * @param n -- the amount to shift each letter
     */
    private String shift (String s, int n) {
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < s.length(); k++) {
            char ch = s.charAt(k);
            sb.append((char)('a' + ((ch - 'a') + n) % 26));
	}
        return sb.toString();
    }    

    /**
     * getWords() returns the space-delimited string of words formed
     *  by shifting the string s, 26 times.
     */
    private String getWords(String s) {
        StringBuffer sb = new StringBuffer();
	sb.append(" ");
        for (int k = 0; k < 26; k++) { // For each shift
            sb.append(shift(s,k));
	    //	    sb.append("\n");
	    sb.append(" ");
        }
        return sb.toString();
    } 

    private int countWrongs(char a1[], char a2[]) {
	int sum = 0;
	for (int k = 0; k < a1.length; k++)
	    if (a1[k] != a2[k])
		++sum;
	return sum;
    }

    /**
     * countWrongChars() compares s1 and s2 and returns the number
     *  of characters that are not identical
     */
    private int countWrongChars(String s1, String s2) {
	int n = Math.min(s1.length(),s2.length());
	int count = 0;
	for (int k = 0; k < n; k++)
	    if (s1.charAt(k) != s2.charAt(k))
		++count;
	return count;
    }

    /**
     * getMinKeywdMatch() returns the minimum number of characters by which 
     *   keywd differs from one of words on keywdlist.  If keywd exactly
     *   matches a word on keywdlist, 0 is returned.
     */
     private int getMinKeywdMatch(String keywd, String keywdlist) {
	 java.util.StringTokenizer st = new java.util.StringTokenizer(keywdlist);
	 int min = 100;
	 while (st.hasMoreTokens()) {
             int diff = countWrongChars(keywd, st.nextToken());
	     if (diff < min)
		 min = diff;
	 }
	 return min;
     }


    /**
     * run() conducts the cryptanalysis and reports the result
     */
    public void run() {
	nf.setMaximumFractionDigits(3);

	/********** Clean up the text and initialize things. *************/
        cleantext = TextUtilities.cleanString(text.toLowerCase());
        cleantext = TextUtilities.removeWhiteSpace(cleantext);
        int pound = this.solution.indexOf("#");
        if (pound != -1) {
	String secretshiftkeyword = this.solution.substring(this.solution.indexOf("#"),this.solution.length()-1);
	actualKeywd = secretshiftkeyword.substring(secretshiftkeyword.indexOf("#")+1,secretshiftkeyword.indexOf("("));
        actualKeywdLen = Integer.parseInt(secretshiftkeyword.substring(secretshiftkeyword.indexOf("(")+1,secretshiftkeyword.indexOf(")")));
	}
        else {
	    actualKeywd="???";
	    actualKeywdLen=0;
	}
	//	System.out.println("Secret: " + actualKeywd + " " + actualKeywdLen + "\tsolution="+ solution);
	
	this.solution = TextUtilities.cleanString(this.solution.toLowerCase());
        this.solution = TextUtilities.removeWhiteSpace(this.solution);

	System.out.print("\tN= " + cleantext.length());
	//        System.out.println("CRYPTOGRAM begins: " + cleantext.substring(0,27) + "...");

        IC = new IndexOfCoincidence(cleantext);
        System.out.print("\tIC= " + nf.format(IC.getIOC()));
   
        /****** STEP 1: Find the length of the shift **************/
	shiftLen = findShiftLength(cleantext);


        System.out.print("\tP= " + actualKeywdLen + "\tN/P= " + nf.format(1.0*cleantext.length()/actualKeywdLen));
        if (shiftLen == actualKeywdLen)
	    System.out.print("\tOK");
	else if (actualKeywdLen % shiftLen  == 0)  // The period is a multiple of the actual shift
	    System.out.print("\tOK(" + shiftLen +")");
	else
	    System.out.print("\tNO(" + shiftLen +")");

	System.out.println();

	/* Break text into columns and analyze each column with unigram analyzer */

	//	shiftLen=5;

	FastIC fastIC = new FastIC(cleantext, shiftLen);
        System.out.println("\tIC= " + nf.format(fastIC.getIC()));
	fastIC.printColumnDistributions();

/********
	for (int k = 0; k < shiftLen; k++) {
	    FrequencyRecord freqs[] = fastIC.getSortedColumn(k);
	    System.out.print("Column " + (k+1) + ": ");
	    for (int j = 0; j < freqs.length; j++)
		System.out.print(freqs[j].toString() + " ");
	    System.out.println();
	}
*********/
	//	/****
	String columns[] = getKColumns(cleantext, shiftLen);
	String colkeys[] = new String[shiftLen];
	for (int k = 0; k < shiftLen; k++) {
	    //	    System.out.println("Column " + (k+1) + ": " + columns[k]);
	    colkeys[k] = analyzeColumn(columns[k]);;
	    System.out.println("ABCDEFGHIJKLMNOPQRSTUVWXYZ\n" + colkeys[k] + "\tColumn " + (k+1));
	}
	for (int k = 1; k < shiftLen; k++) {
	    int diffs[] = calcDiffs(colkeys[0], colkeys[k]);
	    //	    for (int j = 0; j < diffs.length; j++) 
	    //		System.out.print("" + (char)('a' + j) + ":" + diffs[j] + " ");
	    //	    System.out.println();
	}
	//	*******/
    }

    private int[] calcDiffs(String key1, String key2) {
	int diffs[] = new int[26];
	int indices[] = {0, 4, 8, 13, 14, 17, 18, 19};
	//	for (int k = 0; k < 26; k++) {
	for (int k = 0; k < indices.length; k++) {
	    //	    char ch = (char)('a' + k);
	    char ch = (char)('a' + indices[k]);
	    int indx1 = key1.indexOf(ch);
	    int indx2 = key2.indexOf(ch);
	    int diff = indx1 - indx2;
	    if (diff < 0)
		diff += 26;
	    //	    diffs[k] = diff;
	    diffs[indices[k]] = diff;
	    System.out.print("" + (char)('a' + indices[k]) + ":" + diff + " ");
	}
	System.out.println();
	return diffs;
    }

    private String analyzeColumn(String text) {
	FastIC IC = new FastIC(text);
	String s = "abcdefghijklmnopqrstuvwxyz";
	int key[] = new int[26];
	int bestkey[] = new int[26];

	for (int k = 0; k < s.length(); k++) {
	    key[k] = s.charAt(k)-'a';
	}

	double best = 0;
	double sqrdiffbest = 1.0;
	for (int m = 0; m < 10; m++) {

	randomize(key,26);
	String decrypt = decrypt(text,key);
	double eval = IC.chi(decrypt);
	double sqrdiff = IC.sqrdiff(decrypt);
	//	System.out.println("Initial key = " + keyToString(key) + " " + eval + " " + IC.sqrdiff(decrypt));

	int j = 0, k = 1;
	while (k < 26) {
            swap(key, j, k);
	    decrypt = decrypt(text,key);
	    sqrdiff = IC.sqrdiff(decrypt);
	    eval = IC.chi(decrypt);
	    if (eval > best) {
	    //	    if (sqrdiff < sqrdiffbest) {
		sqrdiffbest = sqrdiff;
		best = eval;
		copy(key,bestkey);
		//		System.out.println(keyToString(key) + " " + eval + " sqrdiff= " + IC.sqrdiff(decrypt));
		j = 0; k = 1;
	    } else {
                swap(key,j,k);
		j++;
		if (j > k) { j = 0; k++; }
	    }
	}
	}
	//	return "ABCDEFGHIJKLMNOPQRSTUVWXYZ (" + best + ")\n" + keyToString(bestkey);
	//	return "ABCDEFGHIJKLMNOPQRSTUVWXYZ (" + best + ")\n" + reverseKeyToString(bestkey);
	return reverseKeyToString(bestkey);
    }

    
    private String keyToString(int key[]) {
        StringBuffer sb = new StringBuffer();
	for (int k = 0; k < key.length; k++) 
	    sb.append((char)('a' + key[k]));
	return sb.toString();
    }

    private String reverseKeyToString(int key[]) {
	int revkey[] = new int[key.length];
	for (int k = 0; k < key.length; k++) 
	    revkey[key[k]] = k;
	return keyToString(revkey);
    }

    private void randomize(int key[], int N) {
        int m;
        for (int k = 0; k < N; k++) {
            m = (int)(Math.random() * N);
	    swap(key, k, m);
        } //for
    }

     private void swap(int[] theArr, int k, int m){
       if (k != m){  // WARNING - Doesn't work
         theArr[m] = theArr[k] - theArr[m];  //Tricky swap to avoid allocating
         theArr[k] = theArr[k] - theArr[m];  //memory. Might be faster.
         theArr[m] = theArr[k] + theArr[m];  //Might not.
       } //if
     } //swap

    /**
     * copy(arr1, arr2) copies values of the elements of the array arr1
     * to the elements of the array arr2.  It is assumed that the arrays
     * have been constructed and are of the same size.
     */
     private void copy(int[] arr1, int[] arr2){
         for (int k = 0; k < arr1.length; k++){
            arr2[k] = arr1[k];
         } //for
     } //copy

    private String decrypt(String s, int key[]) {
	StringBuffer sb = new StringBuffer();
	for (int k = 0; k < s.length(); k++)
	    sb.append((char)('a'+ key[s.charAt(k)-'a']));
	return sb.toString();
    }


    /******
    private String analyzeColumn(String s) {
	String result="";
	try {
            Analyzer analyzer;
            analyzer = new NgramAnalyzer(params);
	    Alphabet alpha = AlphabetFactory.getInstance(AlphabetFactory.ALPH_az);
	    ((NgramAnalyzer)analyzer).setup(text+"$$$"+solution, params.book);
	    ((NgramAnalyzer)analyzer).setNgramAnalyzer(1,params.book,alpha,26,NgramAnalyzer.SIMPLESUB,100000);

	    analyzer.run();
	    result =  analyzer.getReport(); 

	} catch (Exception e) {
	    System.out.println(e.toString());
	}
	return result;
    }
    ********.

    /*****
    private String complement(String words) {
	StringTokenizer st = new StringTokenizer(words);
	StringBuffer sb = new StringBuffer();
	while (st.hasMoreTokens()) {
	    sb.append(complementWord(st.nextToken()) + " ");
	}
	return sb.toString();
    }
    *********/

    /**
     * analyzeSimpleSubstitution() creates an NgramAnalyzer and uses it to analyze the text.
     * @param text -- the text to be analyzed.
     * @result -- a report giving the NgramAnalyzer's evaluation value and part of the decryption.
     */
    private String analyzeSimpleSubstitution(String text) {
	String result="";
	try {
            Analyzer analyzer;
            analyzer = new NgramAnalyzer(params);
	    Alphabet alpha = AlphabetFactory.getInstance(AlphabetFactory.ALPH_az);
	    //	    System.out.println("solution= " + solution);
	    ((NgramAnalyzer)analyzer).setup(text+"$$$"+solution, params.book);
	    ((NgramAnalyzer)analyzer).setNgramAnalyzer(4,params.book,alpha,26,NgramAnalyzer.SIMPLESUB,100000);

	    analyzer.run();
	    result =  analyzer.getReport(); 

	} catch (Exception e) {
	    System.out.println("Problem in analyzeSimpleSubstitution: " + e.toString());
	    e.printStackTrace();
	}
	return result;
    }


} // TypeIIAnalyzer

