/*
 * File: DigramAnalyzer.java
 * @author R. Morelli <ralph.morelli@trincoll.edu>
 * 
 * Description: This class analyzes traditional cryptograms of
 *  the sort you find in the newspaper. It assumes the text
 *  is a word-delimited cryptogram created using a simple substitution 
 *  cipher. 
 * 
 * Credits: The algorithm here is based on the algorithm reported by
 * Thomas Jakobsen, A fast method for the cryptanalysis of substitution
 *  ciphers. in (???).
 *
 * At present this analyzer only works for the alphabet 'a' to 'z'.
 */

package hcrypto.analyzer;

import hcrypto.cipher.*;
import hcrypto.engines.*;
import hcrypto.provider.*;
import java.util.*;
import java.text.*;

//public class DigramAnalyzer extends AnalyzerFrame implements Analyzer {
public class DigramAnalyzer implements Analyzer {
    private static final int MAX_KEY = 27;

    private String text;               // The raw text to be analyzed
    private String preparedText;       // Text with ' ' replaced by '{'
    private StringBuffer resultSB;
    private FrequencyTable ft;

    private DateFormat dateformat = DateFormat.getTimeInstance(DateFormat.LONG);

    private FrequencyRecord eng_freqs[];    // Character frequencies
    private FrequencyRecord crypto_freqs[];  

    private double english_digrams[][];
    private double crypto_digrams[][];
    private int map_crypt[];              // Index into digram table
    private int map_engl[];               // English freqs in descending order
    private int d_key[];                  // The d_key maps a-->e(a), b-->e(b), etc.

    private StringBuffer encipherKey;

    private long time0 = System.currentTimeMillis();
    private TextStatistics stats = null;
    
    public DigramAnalyzer() { // Default constructor required for CryptoToolJ
        resultSB = new StringBuffer();
    }

    public DigramAnalyzer(TextStatistics ts) throws NullPointerException {
        stats = ts;
        resultSB = new StringBuffer();
        if (ts == null)
            throw new NullPointerException("DigramAnalyzer: TextStatistics object is not instantiated");
    }

    /**
     * This method initializes the Analyzer. 
     * @param text a String pointing to the text being analyzed
     */
    public void setup(String text) { 
	this.text = text;
	preparedText = text.toLowerCase();

	ft = new FrequencyTable(preparedText, AlphabetFactory.ALPH_cryptogram);
	preparedText = cleanText(preparedText);

	crypto_freqs = ft.getSortedCryptogramFrequencies(); 
	crypto_freqs[26].ch = '{';
	freqPrint("CRYPTO: ", crypto_freqs);

	// The map_crypt contains the crypto chars in order of decreasing frequency
	map_crypt = makeMap(crypto_freqs);
	alphaPrint("MAP_CRYPT: ", map_crypt);

	eng_freqs = makeEnglishDigramTable(ft);
	freqPrint("ENGLISH: ", eng_freqs);
	// The map_crypt contains the crypto chars in order of decreasing frequency
	map_engl = makeMap(eng_freqs);
	alphaPrint("MAP_ENGL: ", map_engl);

        d_key = initKey(map_crypt, map_engl);
	initDigramTables(eng_freqs, crypto_freqs);
    }

    /**
     * This method replaces  BLANK with '{' which is the character 
     * following 'z' in ASCII. This allows arithmetic to work easier.
     */
    private String cleanText(String text) {
	StringBuffer sb = new StringBuffer();
	for (int k = 0; k < text.length(); k++) {  // HACK: Replace blank with {
	    char ch = text.charAt(k);
	    if (ch == ' ')
		sb.append('{');
	    else if (ch >= 'a' && ch <= 'z')
		sb.append(ch);
	    else 
		sb.append('{');
	}
	return sb.toString();
    }

    /**
     * This method initializes both the English- and Crypto- digram tables.
     * Data for the english table are taken from TextStatistics. 
     */
    private void initDigramTables(FrequencyRecord eng_freqs[], FrequencyRecord crypto_freqs[]) {
	int total_chars = TextStatistics.digram_chars;
	int digram_data[][] = TextStatistics.digram_data;

	crypto_digrams = new double[27][27];
	english_digrams = new double[27][27];

	for (int j = 0; j < 27; j++)
	    for (int k = 0; k < 27; k++) {
		//		System.out.print(j + " " + k);
		//		int ch1 = eng_freqs[26-j].ch - 'a';
		//		int ch2 = eng_freqs[26-k].ch - 'a';
		//		System.out.println(" " + ch1 + " " + ch2);
		//		english_digrams[j][k] = 1.0 * digram_data[ch1][ch2]/total_chars;
		english_digrams[j][k] = 1.0 * digram_data[j][k]/total_chars;
		crypto_digrams[j][k] = 0;
	    }

	// Initialize crypto digrams
	String decrypt = decrypt(false);
	double incr = 1.0/(preparedText.length()-1); 

	for (int k = 1; k < decrypt.length(); k++) {
	    char ch1 = decrypt.charAt(k-1);
	    char ch2 = decrypt.charAt(k);
	    crypto_digrams[ch1 - 'a'][ch2 - 'a'] += incr;
	    /***********
	    int row = 0, col = 0;
	    while (crypto_freqs[row].ch != ch1)
		++row;
	    while (crypto_freqs[col].ch != ch2)
		++col;
	    crypto_digrams[row][col] += incr;
	    ************/
	}

	/****************
	for (int j = 0; j < digram_data.length; j++)
	    for (int k = 0; k < digram_data[j].length; k++) {
		crypto_digrams[j][k] = 0;
		english_digrams[j][k] = 1.0 * digram_data[j][k]/total_chars;
	    }
	******************/
    }

    /**
     * This methods creates an array of chars (represented as 0..26) 
     *  arranged in order of decreasing frequency from an array 
     *  of frequencies in ascending order.
     */
    private int[] makeMap(FrequencyRecord freqs[]) {
	int map[] = new int[27];
	int j = 0;
	for(int k = freqs.length-1; k >= 0; k--)
	    map[j++] = freqs[k].ch - 'a';
	map[0] = '{' - 'a'; // Space
	return map;
    }

    private FrequencyRecord[] makeEnglishDigramTable(FrequencyTable ft) {
	FrequencyRecord freqs[] = new FrequencyRecord[27];

	int count = ft.getCharCount();
	freqs[0] = new FrequencyRecord('{', (int)Math.round(0.25 * count));
	for (char ch = 'a'; ch <= 'z'; ch++) {
	    freqs[ch - 'a' + 1] = 
		new FrequencyRecord(ch, (int)Math.round(TextStatistics.englishFrequency[ch] * count));
	}
	java.util.Arrays.sort(freqs);
	return freqs;
    }

    /**
     * This method initializes key by comparing the frequency distribution
     *  of the message with the frequency distribution of English. 
     * @param ft the FrequencyTable for this message
     */
    //    private int[] initKey(FrequencyTable ft) {
    private int[] initKey(int map_c[], int map_e[]) {
	//	d_key = makeMap(crypto_freqs);
	d_key = new int[27];
	for (int k = 0; k < 26; k++) {
	    //	    int j = 1;
	    //	    while (k != map_c[j])
	    //		++j;
	    //	    d_key[k] = map_e[j];
	    d_key[k] = k;
	}
	d_key[26] = 26;
	alphaPrint("KEY: ", d_key);
	return d_key;
    }            

    /** 
     * This method prints the int array as if it contained a..z.
     * @param prompt an informative String
     * @param arr an array of integers
     */
    private void alphaPrint(String prompt, int arr[]) {
	System.out.print(prompt);
	for (int k = 0; k < arr.length; k++)
	    //	    if (arr[k] == '{')
	    //		System.out.print(' ');
	    //	    else
		System.out.print((char)('a'+ arr[k]));
	System.out.println();
    }

    /** 
     * This method prints an array of FrequencyRecords.
     * @param prompt an informative String
     * @param arr an array of integers
     */
    private void freqPrint(String prompt, FrequencyRecord f[]) {
	System.out.println(prompt);
	for (int k = 0; k < f.length; k++)
	    System.out.print(f[k].ch + ":" + f[k].count + ", ");
	System.out.println();
	System.out.println();
    }


    /**
     * This method is part of the Analyzer interface. It runs the analysis.
     */
    public void run() {
	resultSB.append(dateformat.format(new Date()) + "\n");
        doAnalysis();
	//	display.append(getReport());
    }
    
    /**
     * This method is part of the Analyzer interface. It returns the report
     * generated by the analysis.
     */
    public String getReport() {
        return toString();
    }

    /**
     * This method returns the report generated by the analysis.
     */
    public String toString() {
        return resultSB.toString();    
    }

    private void initCryptoDigrams(String msg) {
	double incr = 1.0/(msg.length()-1);
	for (int j = 0; j < crypto_digrams.length; j++)
	    for (int k = 0; k < crypto_digrams[j].length; k++) 
		crypto_digrams[j][k] = 0;
	for (int k = 1; k < msg.length(); k++) {
	    //	    System.out.print("" + msg.charAt(k));
	    crypto_digrams[msg.charAt(k-1)-'a'][msg.charAt(k)-'a'] += incr;
	}
    }

    private double getScore() {
	double sum = 0;
	for (int j = 0; j < crypto_digrams.length; j++)
	    for (int k = 0; k < crypto_digrams.length; k++)
		sum += Math.abs(crypto_digrams[j][k] - english_digrams[d_key[j]][d_key[k]]);
	//		sum += Math.abs(crypto_digrams[d_key[j]][d_key[k]] - english_digrams[j][k]);
	return sum;
    }


    public String decrypt(boolean replace_brace) {
	StringBuffer sb = new StringBuffer();
	for (int k = 0; k < preparedText.length(); k++) {
	    char ch = preparedText.charAt(k);
	    if (ch == '{')
		if (replace_brace)
		    sb.append(' ');
		else
		    sb.append(ch);
	    else {
		sb.append((char)('a' + d_key[ch - 'a']));
	    }
	}
	return sb.toString();
    }

    private void swap(int arr[], int a, int b) {
	//	System.out.print("Swap " + (char)('a' + a) + " " + (char)('a' + b));
	int ka = 0;
	while (arr[ka] != a)  // Find a and b in arr
	    ka++;
	int kb = 0;
	while (arr[kb] != b)
	    kb++;
	//	System.out.println(" " + (char)('a' + arr[ka]) + " " + (char)('a' + arr[kb]));
	int temp = arr[ka];
	arr[ka] = arr[kb];
	arr[kb] = temp;
    }
    /**
     *  This method performs an analysis of the text. It assumes the text
     *  is a word-delimited cryptogram created using a simple substitution cipher.
     */
    public void doAnalysis() { 
	double bestscore = getScore();   // Initial score
	double score = 0;
	int count = 0;
	int swap_count = 0;
	System.out.println("SCORE= " + bestscore);
	int a = 1, b = 1;
	int alpha = 0, beta = 0;

	while (count < 3000 && b < 26) {
	    alpha = map_crypt[a];
	    beta = map_crypt[a+b];
	    swap(d_key, alpha, beta);
	    score = getScore();
	    //	    System.out.println("SCORE= " + score);

	    if (score < bestscore) {
		System.out.print("SCORE= " + score      // + " a=" + a + " b=" + b 
				   + " alpha=" + (char)(alpha + 'a')
				   + " beta=" + (char)(beta + 'a')); 
		alphaPrint("  KEY: " , d_key);
		a = 1; b = 1;
		bestscore = score;
		count = 0;
		++swap_count;
	    } 
	    else {
		swap(d_key, alpha, beta);   // Undo the swap
		++count;
		a = a + 1;
		if (a + b  > 26) {
		    a = 1;
		    b = b + 1;
		}
	    }
	}
	alphaPrint("MAP_CRYPT: ", map_crypt);
	alphaPrint("KEY:", d_key );
	resultSB.append("DONE:  swap_count= " + swap_count + " count= " + count + " b= " + b + "\n");
	resultSB.append("TEXT: \n");
	resultSB.append(text + "\n");
	resultSB.append("DECRYPT: \n");
	resultSB.append(decrypt(true) + "\n");
	resultSB.append("Time Used = " + (System.currentTimeMillis() - time0) + "ms");
    }

}
