/*
 * File: AlbertiAnalyzer.java
 * @author R. Morelli <ralph.morelli@trincoll.edu> 
 * 
 * Description: This class assumes that the text is
 *  encrypted with a polyalphabetic Alberti cipher. It uses the
 *  Index of Coincidence (IC) to cryptanalyze it. This version
 *  attacks the "easy" Alberti---that is, it assumes that each
 *  plaintext characters is first substituted from the permutation
 *  alphabet and then shifted.  In the easy version, the IC can
 *  be used to determine when the correct set of shifts have been
 *  found, as outlined in the algorithm below.
 *
 * The "hard" version of Alberti is really the decrypt function, which
 *  first shifts each plaintext character and then substitutes
 *  for it from the permutation alphabet.  In the hard case,
 *  each plaintext character is encrypted by a composition 
 *  consisting of a substitution and a shift. This is equivalent
 *  to a double substitution. It is not possible to use IC
 *  to identify the correct set of shifts. 
 * 
 * Algorithm: A three step algorithm is used:
 *  1. Find the length of the shift keyword, k, using the IC
 *  2. Break the text into k simple substitution subtexts and use IC 
 *     to determine the shift of each subtext. These correspond to
 *     the letters in the shift keyword.
 *  3. Merge each of the k subtexts into the plaintext message.
 *
 *  To compile and run from the TestCryptAnalyzer application:
 *
 *  cd ~crypto/hcryptoj/1.4/applications/testanalyzer
 *  javac -classpath ../../classes -d ../../classes AlbertiAnalyzer.java
 *  java -classpath ../../classes:. TestCryptAnalyzer analyzers.AlbertiAnalyzer ga_paramfiles/albertiparam.txt
 */

package analyzers;

import hcrypto.analyzer.*;
import hcrypto.cipher.*;
import hcrypto.engines.*;
import hcrypto.provider.*;

import java.util.*;
import java.io.*;

public class AlbertiAnalyzer extends CryptoAnalyzer implements Analyzer {
    private final int KEYWD_MAX = 34;
    private final int MAX_TRIES = 7;            // Step 2   Magic numbers determined experimentally
    private final double IC_EPSILON = 0.008;    // Step 2
    //    private final double BACKUP_EPSILON = 0.004;  // Step 2 How we decide which ICs to save
    private final int MAX_RETRIES = 15;         // Step 3
    private final double EVAL_EPSILON = 0.25; // 0.35;   // Step 3  How we know when text is decrypted
    private final double EVAL_DELTA = 0.0018;   // Step 3  How we know if a retry has improved decryption

    private IndexOfCoincidence IC;
    java.text.NumberFormat nf = java.text.NumberFormat.getNumberInstance();

    private String actualKeywd = "";  // The actual secret shift keyword 
    private int  actualKeywdLen;
    private String cleantext = "";
    private int shiftLen = 0;
    private int keyCount = 0;

    /**
     * AlbertiAnalyzer() -- Default constructor
     */
    public AlbertiAnalyzer() {
	super();
    }

    /**
     * AlbertiAnalyzer() -- this constructor is given an object containing parameter settings
     * @param params -- an object containing param1=val1 param2=val2 ...
     */
    public AlbertiAnalyzer(GaParameters params) {
	super(params);
    }

    /**
     * setup() sets up the text and the parameters.. 
     */
    public void setup(String text) {
	super.setup(text);
	if (params == null) {
	    params = new GaParameters(); // Default GaParameters
	}
    }


    /** 
     * getSumIC() concatenates the columns and returns their IC
     */
    private double getSumIC(String[] s) {
	String concat = "";	
	for (int k = 0; k < s.length; k++) 
	    concat += s[k];
	IndexOfCoincidence IC = new IndexOfCoincidence(concat);
        return IC.getIOC();
    }

    /**
     * getKColumns() converts the text, s, into an array of k columns. If k
     *  corresponds to the period of the Alberti cipher, then each column
     *  is a simple substitution cipher.
     * @param s -- the ciphertext
     * @param k -- the period
     */
    private String[] getKColumns(String s, int k) {
        String columns[] = new String[k];
        if (k <= 0) 
            throw new IllegalArgumentException("Skip size must be positive");
	for (int j = 0; j < k; j++) {
            StringBuffer sb = new StringBuffer();
            for (int i = j; i < s.length(); i+=k) 
                sb.append(s.charAt(i));
            columns[j] = sb.toString();
	    //            System.out.println("Column " + j + ":" + columns[j]);
	}
        return columns;
    }

    /**
     * mergeColumns() merges the columns into a single string of text.
     *  This method is used to reconstruct the original message
     * @param columns -- an array of the k simple substitution subtexts
     *  of the original message.
     */
    private String mergeColumns(String columns[]) {
	int d = Math.min(columns[0].length(), columns[columns.length-1].length());
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < d; k++) 
            for (int j = 0; j < columns.length; j++) 
                sb.append(columns[j].charAt(k));
	for (int j = 0; j < columns.length; j++)
	    if (columns[j].length() > d)
		sb.append(columns[j].charAt(d));
        return sb.toString();
    }

    /**
     * findShiftKeyword() uses brute force search to analyze each column in the array to determine
     *   how much it is shifted from a simple substitution histogram. Every possible shift is
     *   tried and the column is re-merged into the text and the text's IOC is computed. The shift
     *   that gives the highest IC is likely the correct shift for that column.
     * @param columns -- an array of the k simple substitution subtexts
     * @return -- returns the encryption keyword, the complement of the shiftword found in the search
     */
    private String findShiftKeyword(String columns[]) {
        String text;
        StringBuffer  shiftword = new StringBuffer();     // Initialize shiftword to "aaaaa..."
	for (int k = 1; k <= columns.length; k++) 
	    shiftword.append('a');
	boolean noimprovement = false;  // Variables used to control the search
	boolean lastlap = false;
	double prevmaxIC = 0;
	double thismaxIC = 0;
	int m = 1;                      // Number of tries

	while (!noimprovement && m <= MAX_TRIES) {           // Try this repeatedly for best results
	    if (lastlap) {
		noimprovement = true;
		//		System.out.println("No improvement " + prevmaxIC);
	    }
	    thismaxIC = 0;
	    for (int k = 0; k < columns.length; k++) {                // For each subtext column
		double maxioc = 0;                                    // Keep track of best IC
		int bestshift = 0;
		String beststring = null;
		for (int j = 1; j <= 26; j++) {                       // For each possible shift
		    StringBuffer sb = new StringBuffer();
		    for (int c=0; c < columns[k].length(); c++) {     // For each letter in subtext
			char ch = columns[k].charAt(c);
			sb.append((char)('a' +  ((ch-'a') + 1) % 26));  // Shift it by 1
		    }
		    columns[k] = sb.toString();              // Replace with the shifted column
		    double ioc = getSumIC(columns);          // Calculate the text's new IC
		    if (ioc > maxioc) {                      // Remember the good ones
			char ch1 = actualKeywd.charAt(k);    // Used in development
			char ch2 = (char)('a' + ((shiftword.charAt(k) - 'a' + j) % 26));
			//			System.out.println("m=" + m + " k= " + k + "\tj= " + j + "\tch= " + ch2 +"\tsum= " + (((ch1-'a')+(ch2-'a'))%26) +  "\tic= " + ioc + "\timpr= " + (ioc-maxioc));
			maxioc = ioc;
			bestshift = j;
			beststring = columns[k];
		    }
		} // j loop for each shift
		columns[k] = beststring;                                                                // Replace column k with best
		shiftword.setCharAt(k, (char)('a' + ((shiftword.charAt(k) -'a' + bestshift) % 26)));    // Update the shift keyword
		//		System.out.println("Shiftword= " + shiftword + " keywd= " + actualKeywd);
		if (maxioc > thismaxIC) 
		    thismaxIC = maxioc;
	    } // k loop for each column
	    if (thismaxIC > prevmaxIC) 
		prevmaxIC = thismaxIC;
	    else
		lastlap = true;
	    ++m;
	} // m loop
	return complementWord(shiftword.toString());  // Return the encryption keyword
    }

    /**
     * complementWord() computes the 26-ch complement of a word
     */
    private String complementWord(String s) {
	StringBuffer result = new StringBuffer();             
	for (int k = 0; k < s.length(); k++) 
	    result.append((char)('a' + ((26 - s.charAt(k) + 'a') % 26) )); // Complement of s
	return result.toString();
    }


    /**
     * shiftText() breaks text into keywd.length() columns,
     *  shifts each columns by the keywd[k], and returns the merged
     *  columns.
     */
    private String shiftText(String text, String keywd) {
	int period = keywd.length();
	String columns[] = new String[period];
	columns = getKColumns(text, period);
	for (int k = 0; k < period; k++) 
	    columns[k] = shift(columns[k], keywd.charAt(k)-'a');
	return mergeColumns(columns);
    }
    
    /**
     * shift() shifts each character in the string s by n, assuming a 26
     *  letter alphabet
     * @param s, a text
     * @param n -- the amount to shift each letter
     */
    private String shift (String s, int n) {
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < s.length(); k++) {
            char ch = s.charAt(k);
            sb.append((char)('a' + ((ch - 'a') + n) % 26));
	}
        return sb.toString();
    }    

    /**
     * getWords() returns the space-delimited string of words formed
     *  by shifting the string s, 26 times.
     */
    private String getWords(String s) {
        StringBuffer sb = new StringBuffer();
	sb.append(" ");
        for (int k = 0; k < 26; k++) { // For each shift
            sb.append(shift(s,k));
	    //	    sb.append("\n");
	    sb.append(" ");
        }
        return sb.toString();
    } 

    /**
     * countWrongChars() compares s1 and s2 and returns the number
     *  of characters that are not identical
     */
    private int countWrongChars(String s1, String s2) {
	int n = Math.min(s1.length(),s2.length());
	int count = 0;
	for (int k = 0; k < n; k++)
	    if (s1.charAt(k) != s2.charAt(k))
		++count;
	return count;
    }

    /**
     * countWrongChars() in character arrays, a1 and a2
     *  of characters that are not identical
     */
    private int countWrongs(char a1[], char a2[]) {
	int sum = 0;
	for (int k = 0; k < a1.length; k++)
	    if (a1[k] != a2[k])
		++sum;
	return sum;
    }

    /**
     * getMinKeywdMatch() returns the minimum number of characters by which 
     *   keywd differs from one of words on keywdlist.  If keywd exactly
     *   matches a word on keywdlist, 0 is returned.
     */
     private int getMinKeywdMatch(String keywd, String keywdlist) {
	 java.util.StringTokenizer st = new java.util.StringTokenizer(keywdlist);
	 int min = 100;
	 while (st.hasMoreTokens()) {
             int diff = countWrongChars(keywd, st.nextToken());
	     if (diff < min)
		 min = diff;
	 }
	 return min;
     }


    /**
     * run() conducts the cryptanalysis and reports the result
     */
    public void run() {
	nf.setMaximumFractionDigits(3);

	/***** Step  0: Clean up the text and initialize things. *************/
        cleantext = TextUtilities.cleanString(text.toLowerCase());
        cleantext = TextUtilities.removeWhiteSpace(cleantext);
	String secretshiftkeyword = this.solution.substring(this.solution.indexOf("#"),this.solution.length()-1);
	actualKeywd = secretshiftkeyword.substring(secretshiftkeyword.indexOf("#")+1,secretshiftkeyword.indexOf("("));
        actualKeywdLen = Integer.parseInt(secretshiftkeyword.substring(secretshiftkeyword.indexOf("(")+1,secretshiftkeyword.indexOf(")")));
	//	System.out.println("Secret: " + actualKeywd + " " + actualKeywdLen + "\tsolution="+ solution);
	this.solution = TextUtilities.cleanString(this.solution.toLowerCase());
        this.solution = TextUtilities.removeWhiteSpace(this.solution);

        IC = new IndexOfCoincidence(cleantext);
	System.out.print("\tN= " + cleantext.length() + "\tIC= " + nf.format(IC.getIOC()));
	//        System.out.println("CRYPTOGRAM begins: " + cleantext.substring(0,27) + "...");
   
        /****** STEP 1: Find the length of the shift keyword and report result **************/
	ShiftLengthAnalyzer sla = new ShiftLengthAnalyzer();
	shiftLen = sla.findShiftLength(cleantext);
        System.out.print("\tP= " + actualKeywdLen + "\tN/P= " + nf.format(1.0*cleantext.length()/actualKeywdLen));
        if (shiftLen == actualKeywdLen)
	    System.out.print("\tOK");
	else if (actualKeywdLen % shiftLen  == 0)  // The period is a multiple of the actual shift
	    System.out.print("\tOK(" + shiftLen +")");
	else
	    System.out.print("\tNO(" + shiftLen +")");

	/****** STEP 2: Find the shift keyword and report result ***********/
	String columns[] = new String[shiftLen];          // Break text into columns
	columns = getKColumns(cleantext, shiftLen);   
        String shiftKeyword = findShiftKeyword(columns);  
	int wrongChars = getMinKeywdMatch(actualKeywd, getWords(shiftKeyword));
	if (wrongChars == 0) {                               // The actualKeywd is a word on list of words
	    System.out.print(" RIGHT " + wrongChars + " ");
	} else {
	    System.out.print(" WRONG "+wrongChars + " ");
	}

	/******* Step 3: Merge columns and analyze simple substitution and report result **********/
	String result = analyzeSimpleSubstitution(mergeColumns(columns));
	String decrypt = result.substring(result.indexOf("DECRYPT=") + 8);
	if (decrypt.substring(0,20).equals(this.solution.substring(0,20))) {
	    System.out.println(" Done\n" + result);
	} else {
	    System.out.println(" nWrong/20=" + countWrongChars(decrypt.substring(0,20), this.solution.substring(0,20)) + "\n"  + result);
	}
    } // run()

    /**
     * analyzeSimpleSubstitution() creates an NgramAnalyzer and uses it to analyze the text.
     * @param text -- the text to be analyzed.
     * @result -- a report giving the NgramAnalyzer's evaluation value and part of the decryption.
     */
    private String analyzeSimpleSubstitution(String text) {
	String result="";
	try {
            Analyzer analyzer;
            analyzer = new NgramAnalyzer();
	    Alphabet alpha = AlphabetFactory.getInstance(AlphabetFactory.ALPH_az);
	    //	    System.out.println("solution= " + solution);
	    ((NgramAnalyzer)analyzer).setup(text+"$$$"+solution, params.book);
	    ((NgramAnalyzer)analyzer).setNgramAnalyzer(4,params.book,alpha,26,NgramAnalyzer.SIMPLESUB,100000);

	    analyzer.run();
	    result =  analyzer.getReport() + "\tSOLUTION=" + this.solution.substring(0,35);; 

	} catch (Exception e) {
	    System.out.println(e.toString());
	}
	return result;
    }
} // AlbertiAnalyzer

