/**
 * File: VigenereAnalyzer.java
 * @author R. Morelli <ralph.morelli@trincoll.edu>
 * 
 * Description: This class assumes that the text it is passed
 *  is encrypted with a Vigenere cipher. It performs both
 *  the Kasiski Test and the Superposition Analysis.

 * <P>Copyright: This program is in the public domain. You can modify it as you 
 *  see fit as long as you properly acknowledge its original author.
 *  It would also be nice if you forwarded your changes to 
 *  <A HREF= "mailto:ralph.morelli@trincoll.edu">ralph.morelli@trincoll.edu</A> so 
 *  they can possibly be added to the "official" version.
 */

package hcrypto.analyzer; 

import hcrypto.cipher.*;
import hcrypto.provider.*;
import hcrypto.engines.*;

import java.io.*;

public class VigenereAnalyzer extends AnalyzerFrame implements ExpertAnalyzer {

    public static final int MAX_BIGRAMS = 10000;
    public static final int MAX_FACTORS = 10000;
    public static final int MAX_KEYWORD = 200;
    public static final int MAX_CONTACT = 26;
    public static final double SUPERPOSNUMBER = 6.0;

    protected int[][] contacts = new int[MAX_CONTACT][MAX_CONTACT];

    protected int keywordLengthEstStat = 0;
    protected int keywordLengthEstKasiski = 0;
    protected int nRepeatedBigrams = 0;
    protected int nSuperDisplacements = 0;

    protected int kasiskiFactors[] = new int[MAX_FACTORS]; 
    protected int superFactors[] = new int[MAX_FACTORS]; 
    protected int superDispl[] = new int[MAX_KEYWORD]; 
    private KasiskiRecord bigrams[] = new KasiskiRecord[MAX_BIGRAMS];

    private Alphabet alphabet;
    private String text;
    private String originalText;
    private StringBuffer resultSB;

    private String keyword;
 
    private TextStatistics stats = null;

    public VigenereAnalyzer() {}

    public VigenereAnalyzer(TextStatistics ts) throws NullPointerException {
        stats = ts;
        if (ts == null)
            throw new NullPointerException("CaesarAnalyzer: TextStatistics object is not instantiated");
    }

    public void setup(String s) {
        stats = new TextStatistics(s, true);
        originalText = s;
        try {
            alphabet = AlphabetFactory.getInstance(AlphabetFactory.ALPH_az);
	} catch (Exception e) {
            e.printStackTrace();
        }
        text = TextStatistics.removeNonAlphabetics(s, alphabet);
        resultSB = new StringBuffer();
	display.setText("Vigenere Analyzer: Begin Analysis\n");
	//	run();
    }

    public void run() {
        doAnalysis();
	display.append(getReport());
    }
 
    public String getReport() {
        return resultSB.toString();
    }

    /**
     *  This method inserts a bigram into a table of repeated bigrams
     *  where ch1 is the first character of the bigram, ch2 is
     *  the second character, and displ is the displacement from the
     *  previous occurrence of this bigram.
     */
    protected void insertBigram(char ch1, char ch2, int displ) {
        if (nRepeatedBigrams < MAX_BIGRAMS) {
            bigrams[nRepeatedBigrams] = new KasiskiRecord(new String("" + ch1 + ch2), displ);
            ++nRepeatedBigrams;
        }
    }
    
    /**
     *  This method retieves the kth bigram from the table of repeated
     *  bigrams.
     */
    protected String retrieveBigram(int k) {
         if (k < nRepeatedBigrams) 
             return bigrams[k].bigram + " " + bigrams[k].displ;
         return "RetrieveBigramK: Invalid value of k " + k;
    }


    /**
     *  This method performs a superposition test. It places the original
     *  text on one line and superposes a copy of the text, displaced by
     *  1..MAX_KEYWORD positions, under the original text. Then it computes
     *  the number of coincidences that result between the two lines.
     */
    public void computeSuperpositionAnalysis(String s) {
        java.text.NumberFormat nf = java.text.NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(4);

        for (int displ = 1; displ <= MAX_KEYWORD; displ++) {
            int nCoincidences = 0;
            int nComparisons = 0;
            for (int k = displ; k < text.length(); k++) {
                 ++nComparisons;
                 if (text.charAt(k) == text.charAt(k - displ))
                     ++nCoincidences;
            }
            double percent = 100. * nCoincidences / nComparisons;
            long nExpected = Math.round(TextStatistics.KAPPA_P * nComparisons);
            if (percent > SUPERPOSNUMBER) {
                 superDispl[nSuperDisplacements] = displ;
                 ++nSuperDisplacements;
//                System.out.println(displ + "\t" + nCoincidences + "\t" + nf.format(percent) + "\t" + nf.format(nExpected));
            }
        }

    }

    public void computeBigramDistances(String s)  {
        char ch1, ch2;
	
        for (int k = 0; k < s.length() - 1; k++)  {
            ch1 = s.charAt(k);
            ch2 = s.charAt(k+1);
            if (Character.isLetter(ch1) && Character.isLetter(ch2)) {
               ch1 = Character.toLowerCase(ch1);             
               ch2 = Character.toLowerCase(ch2);             
				
               if (contacts[ch1-'a'][ch2-'a'] == 0) {    // if first occurence
//                   System.out.println("First occurrence of " + ch1 + " " + ch2);
                   contacts[ch1-'a'][ch2-'a'] = -k;    // mark the location	
               }
               else if (contacts[ch1-'a'][ch2-'a'] < 0)	// if not first occurence
               {
                   insertBigram(ch1, ch2, k + contacts[ch1-'a'][ch2-'a']);
//                   System.out.println("Bigram " + ch1 + " " + ch2 + " " + contacts[ch1-'a'][ch2-'a']);
                   contacts[ch1-'a'][ch2-'a'] = -k;
               }
            } // both in alphabet
        }
    }

    public String getKeywordString() {
	return keyword;
    }

   public void doAnalysis(String s) {       // For ExpertAnalyzer interface
        stats = new TextStatistics(s, true);
        originalText = s;
        try {
            alphabet = AlphabetFactory.getInstance(AlphabetFactory.ALPH_az);
	} catch (Exception e) {
            e.printStackTrace();
        }
        text = TextStatistics.removeNonAlphabetics(s, alphabet);
        resultSB = new StringBuffer();
	display.setText("Vigenere Analyzer: Begin Analysis\n");
	doAnalysis();
    }

    public void doAnalysis() {
        int keyLengthKasiski = 0;
        int keyLengthSuperpos = 0;
        computeBigramDistances(text);
        resultSB.append("\nVigenerAnalyzer: There are " + nRepeatedBigrams + " repeated bigrams\n");
        for (int k = 0; k < nRepeatedBigrams; k++) {
	    //            resultSB.append(retrieveBigram(k) + "\t");
	    //            resultSB.append(computeFactors(kasiskiFactors, bigrams[k].displ ));
	    //            resultSB.append("\n");
            computeFactors(kasiskiFactors,  bigrams[k].displ );
        }
        keyLengthKasiski = estimateKeywordLength(kasiskiFactors);

        computeSuperpositionAnalysis(text);
        resultSB.append("\nVigenerAnalyzer: There are " + nSuperDisplacements + 
            " that give more than " + SUPERPOSNUMBER + " percent coincidences.");
        for (int k = 0; k < nSuperDisplacements; k++) {
//            resultSB.append("compute factors of " + k);
            computeFactors(superFactors, superDispl[k]);
            keyLengthSuperpos = estimateKeywordLength(superFactors);
        }
        keyLengthSuperpos = superDispl[0];

        String keywd1, keywd2;
        keyword = keywd1 = estimateKeyword(keyLengthKasiski);
        keywd2 = estimateKeyword(keyLengthSuperpos);
        resultSB.append("\nThe Kasiski test suggests the keyword length is " + keyLengthKasiski);
        resultSB.append("\n A possible keyword of that length is " + keywd1);
        resultSB.append("\nThe Superposition test suggests the keyword length is " + keyLengthSuperpos);
        resultSB.append("\n A possible keyword of that length is " + keywd2);

        decrypt(keywd1);
        decrypt(keywd2);
    }

     private void decrypt(String keyword) {
	 //	 String newkeywd = HistoricalKey.removeDuplicateChars(keyword);
	 String newkeywd = keyword;
        try {
            Provider.addProvider(new DefaultProvider("Default")); 
            Cipher cipher = Cipher.getInstance("Vigenere");
            VigenereKey key = (VigenereKey)HistoricalKey.getInstance("Vigenere", cipher.getProvider());
            resultSB.append("\n\nHere's a partial decryption with keyword " + newkeywd + " \n");
	    //            key.init(newkeywd + "/az/az");
            key.init(newkeywd, AlphabetFactory.getInstance(AlphabetFactory.ALPH_az), 
		     AlphabetFactory.getInstance(AlphabetFactory.ALPH_az));
            cipher.init(key);
            String decryption;

            if (text.length() <= DECIPHER_LIMIT) 
                decryption = cipher.decrypt(originalText);
            else
                decryption = cipher.decrypt(originalText.substring(0, DECIPHER_LIMIT));
            resultSB.append(decryption + "\n");
        } catch (Exception e) {
            e.printStackTrace();
        }
     }


    public int estimateKeywordLength(int factors[]) {
        int totalChars = stats.getCharCount();
        double ci = stats.getCoincidenceIndex();
        double num = 0.027 * totalChars;
        double den = (ci * (totalChars - 1.0)) - 
               (TextStatistics.KAPPA_R * totalChars) + TextStatistics.KAPPA_P;
        keywordLengthEstStat =  (int)Math.round(num / den); // compute a statistical estimate using formula
	
        // Find the most common factor 
        int pMaxFactor = 0;
        int vMaxFactor = 0;
        for (int k = 3; k < nRepeatedBigrams; k++)	// find the most frequent factor
            if (factors[k] > vMaxFactor) {
                vMaxFactor = factors[k];
                pMaxFactor = k;
        }
        return pMaxFactor;
    }

    /**
     *  To find the find the keyword we break the cryptogram into
     *  keyLen monoalphabetic cryptograms, each of which is obtained
     *  by a Caesar shift from the standard alphabet. So for each
     *  letter in the keyword, find its shift.
     */
    public String estimateKeyword(int keyLen) {
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < keyLen; k++) {
            sb.append((char)('a' + getOptimalShift(k,keyLen)));
	}			
        return sb.toString();
    }


    protected String computeFactors(int factors[], int displ) {
        StringBuffer sb = new StringBuffer();
        boolean isEven = false;
	
        if ((displ % 2) == 0 && displ != 0) {	// check if 2 is a factor
            isEven = true;
            sb.append("2,");
	
            int k = 2;                  // powers of 2
            do {                          // remove all 2 factors
		//		System.out.println("displ=" + displ + " k=" + k);
                ++factors[k];             // count this factor
                displ = displ / 2;
                k = k * 2;               // double k, giving 2,4,8,16, etc
            } while ((displ % 2) == 0);	
        }
                                // now we're left with an odd number
						
        for (int k = 3; k <= displ; k += 2) { // check 3, 5, 7, ...
            if ((displ % k) == 0)  {
                sb.append(k + ",");
                int j = k;                 // powers of k
                do {
                   ++factors[j];            // count this factor
                   displ = displ / k;
                   if (isEven)
                      ++factors[2*j];       // count j * 2
                   j = j * k;               // factors
                 } while ((displ % k) == 0);			
           }
      }
      if (displ != 1) 
         sb.append(displ + "");
      return sb.toString();
   }

    /** 
     * This method performs a Chi-Square test to find the optimal Caesar shift
     *  on a polyalphabetic text. It assumes that every _displ_ character
     *  starting at _firstChar_ belongs to the same alphabet. It is used primarily
     *  for analyzing Vigenere-like and Caesar-shift cryptograms.
     * @param firstChar - the location of the first character in the cryptotext
     * @param displ - the displacement (keyword length) or cycle length
     */
    public int getOptimalShift (int firstChar, int displ) {
        String text = this.text;
        double tChiSqrs[] = new double[26];
        int tFreqs[] = new int[26];
        for (int k = 0; k < 26; k++) 
            tFreqs[k] = 0;              // initialize frequency array
        for (int k = 0; k < 26; k++) 
            tChiSqrs[k] = 0.0;		// initialize Chi Squares
	
	// Compute the frequencies for the characters in the text
        // For every displ char starting at firstChar ...

        for (int k = firstChar; k < text.length(); k+= displ ) {
            char ch = text.charAt(k);
            if (Character.isLetter(ch)) {
                ch = Character.toLowerCase(ch);
                ++tFreqs[ch - 'a'];
            }
        }
//        System.out.println("Finished counting");

	// Compute the Chi-square values for each possible shift

        for (int j = 0; j < 26; j++)       // for each possible shift j = 0..25
            for (char k = 'a'; k <= 'z'; k++) {  // for each character
                int index = ((k - 'a') + j) % 26;
                double freq = (TextStatistics.getEnglishFrequency(k) - tFreqs[index]);
                freq = freq * freq;
                freq = freq / TextStatistics.getEnglishFrequency(k);
                tChiSqrs[j] = tChiSqrs[j] +  freq;
            }
		
	// Return the minimum Chi-square value
	
        return TextStatistics.getIndexOfMinimum(tChiSqrs);
    }


}
