package hcrypto.analyzer;

 /*
 * File: NgramClimber.java
 * @author R.Walde <rwalde@nyc.rr.com>
 *
 *  Description: This class uses an NgramArray with frequencies of
 *  N = 2, 3, or 4  (bigrams, trigrams, or  tetragrams) of a langauge
 *  to cryptanalyze simple substitution ciphers or cryptograms in particular.
 *  Using the sum of the inverses of the frequencies as an evaluation of how
 *  accurately a substitution matches the usual N-gram frequencies in a
 *  language is an idea described by Alex Griffing the developer of the
 *  "Automatic Cryptogram Solver".
 *
 * <P>Copyright: This program is in the public domain. You can modify it as you
 *  see fit as long as you properly acknowledge its original author.
 *  It would also be nice if you forwarded your changes to
 *  <A HREF= "mailto:ralph.morelli@trincoll.edu">ralph.morelli@trincoll.edu</A> so
 *  they can possibly be added to the "official" version.
 */

import hcrypto.cipher.*;
import java.io.*;
import java.text.NumberFormat;
import java.lang.Math;

public class NgramClimber implements Analyzer{

    private Alphabet alphabet; // Defines which chars occur in N-grams.
    private int alphSize; // The size of the alphabet.
    private int NN;  // The N of the N-gram = sequence of N chars.
    private NgramArray ngramArr; //inverse of frequency of each N-gram

    private String cryptoText; // The cryptotext to cryptanalyze
    private String plainText; // The current best plaintext found.
    private int randSize; // Randomize the first randSize chars of alphabet.
    private int cnumsSize; // The number of alphabet chars in cryptoText.
    private int[] cryptoCnums; // Stores the int array of cryptoText.
    private int[] bestCnums; // Stores the int array of current best solution.
    private int[] bestDecrypt; //Decrypting substitution for best plaintext.
    private float bestValue; // ngram.recipDist(bestCnums)

    /**
     * NgramClimber(N, filename, alph) - constructor
     * NN = N is the size of the N-gram N=2, 3, or 4
     * fileName is a file containing a large text typical of the language
     *   of the cryptotext
     * alphabet = alph describes which letters occur in the N-grams
     */

    public NgramClimber(int N, String fileName, Alphabet alph) throws Exception{

      NN = N;
      alphabet = alph;
      alphSize = alphabet.getSize();
      ngramArr = new NgramArray(NN, fileName, alphabet);
    }  //NgramClimber(N, fileName, alphabet)


    /**
     * From the Analyzer interface.
     */

    public String getReport() {
        return toString();
    }

    /**
     * From the Analyzer interface.
     * Everything is done in the constructor or calcSolution().
     */

    public void run() {
    }

    /**
     * Puts characters with frequencies into a string.
     */

    public String toString(){
      StringBuffer sb = new StringBuffer();
      sb.append(ngramArr.getReport());

      return sb.toString();
    }//toString()

    public void print(){
        System.out.println(toString());
    } //print()



    /**
     * charToString(ch) converts a nonprintable character to a string.
     */

    protected String charToString(char ch) {
        if (Character.isLetterOrDigit(ch))
            return ch + "";
        else
            return "\\" + (int)ch;
    }  //charToString()


   public int getAlphSize() {
        return alphSize;
   } //getAlphSize()


   public String getCryptoText(){
      if (cryptoText != null) return cryptoText;
      else return "";
   } // getCryptoText()

   public String getPlainText(){
      if (plainText != null) return plainText;
      else return "";
   } // getPlainText()


   /**
   * calcPlaintext(cText, rSize, mLoops) attempts to cryptanalyze the
   * cryptoText cText by finding the best N-gram fit.  It is assumed that the
   * encryption permuted the first rSize alphabet elements only.  The integer
   * mLoops is used as the max number of attempts at finding a better solution
   * without success before stopping.
   */

   public  void calcPlaintext(String cText, int rSize, int mLoops) {
      randSize = rSize;   //Assign instance variable
      setup(cText);  //Just assign cText to instance variable

     try{
      int chNum = 0;  //for use in loops
      int m; //loop variables

      int len = cText.length();  //Calc the num chars of cryptoText in alphabet
      for (m = 0; m < len; m++)
            if (alphabet.isInAlphabet(cryptoText.charAt(m))) chNum++;
      cnumsSize = chNum;  //Assign instance variable

      cryptoCnums = new int[cnumsSize];  //allocate memory for instance variable
      chNum = 0;    //Translate cryptoText to array of ints cryptoCnums
      for (m = 0; m < len; m++)
            if (alphabet.isInAlphabet(cryptoText.charAt(m))){
                cryptoCnums[chNum] = alphabet.charToInt(cryptoText.charAt(m));
                chNum++;
            } //if

      bestCnums = new int[cnumsSize];  //Allocate memory for instance variables.
      bestDecrypt = new int[alphSize];

      int[] testCnums = new int[cnumsSize];  //Allocate memory for local variables.
      int[] testDecrypt = new int[alphSize];
      for (m = 0; m < alphSize; m++){    //init substitution arrays
            bestDecrypt[m] = m;
            testDecrypt[m] = m;
      }//for
      randomizeDecrypt(bestDecrypt);
      decryptCnums(cryptoCnums, bestDecrypt, bestCnums);
      bestValue = ngramArr.recipDist(bestCnums);
      plainText = decryptString(cryptoText, bestDecrypt);
      System.out.println("Init bestValue = " + bestValue + "\n");


      int loopCount = 0;
      float testValue;
      while (loopCount < mLoops) {
          loopCount++;
          randomizeDecrypt(testDecrypt);  //New random guess
          decryptCnums(cryptoCnums, testDecrypt, testCnums);
	  //	  String testText = decryptString(cryptoText, testDecrypt);
	  //          testValue = ngramArr.recipDist(testText);
          testValue = ngramArr.recipDist(testCnums);
          int j = 0;  int k = 1;
          float currValue;
          while (k < randSize) {//Climb one hill == Find rel minimum
               swap(testDecrypt, j, k);
               decryptCnums(cryptoCnums, testDecrypt, testCnums);
	       //	       testText = decryptString(cryptoText, testDecrypt);
	       //               currValue = ngramArr.recipDist(testText);
               currValue = ngramArr.recipDist(testCnums);
               if (currValue < testValue){
                    testValue = currValue; //and leave testDecrypt changed
                    j = 0;  k = 1;  //start this loop over
               }//if
               else{
                    swap(testDecrypt, j, k); //restore testDecrypt
                    j++;
                    if (j >= k){ j = 0; k++; }
               }//else
          } //while
           if (testValue < bestValue){
              bestValue = testValue;
              copy(testDecrypt, bestDecrypt); //and copy testDecrypt
              decryptCnums(cryptoCnums, bestDecrypt, bestCnums);
              plainText = decryptString(cryptoText, bestDecrypt);
              System.out.print("New bestValue = " + bestValue + " ");
              System.out.println(iArrayString(bestDecrypt));
              System.out.println(plainText +"\n");
              loopCount = 0;
           }//if
           else {
              System.out.print("Low testValue = " + testValue + " ");
              System.out.println(iArrayString(testDecrypt));
              System.out.println(decryptString(cryptoText, testDecrypt) +"\n");
           }//else
      }//while
     } //try
     catch(Exception exc){
        System.out.println("In Ngram Climber calcPlaintext() - " + exc.toString());
      } //catch
   } //calcPlaintext()

      /**
     * setup(cText) - from the Analyzer interface.
     * Justs assigns the text to an instance variable.
     */

     public void setup(String cText){
        cryptoText = cText;
     } // setup()

      // The following private methods are used in calcPlaintext()
   /*
   * decryptCnums(inCnums,aDecrypt,outCnums) applies the substitution
   * aDecrypt to inCnums and copies it to outCnums
   */
    private void decryptCnums(int[] inCnums, int[] aDecrypt, int[] outCnums){
        for (int k = 0; k < inCnums.length; k++)
              outCnums[k] = aDecrypt[inCnums[k]];
    } //decryptCnums()

    /*
     * randomizeDecrypt(aDecrypt) randomizes the first randSize elements of
     *  the substitution aDecrypt by swapping the corresponding elements.
     */

     private void randomizeDecrypt(int[] aDecrypt){
        int m;
        for (int k = 0; k < randSize; k++) {
            m = (int)(Math.random()*randSize);
            swap(aDecrypt, k, m);
        } //for
     } //randomizeDecrypt()

    /*
     * swap(theArr, k, m) swaps theArr[k] with theArr[m]
     */

     private void swap(int[] theArr, int k, int m){
       if (k != m){  // WARNING - Doesn't work
         theArr[m] = theArr[k] - theArr[m];  //Tricky swap to avoid allocating
         theArr[k] = theArr[k] - theArr[m];  //memory. Might be faster.
         theArr[m] = theArr[k] + theArr[m];  //Might not.
       } //if
     } //swap

    /**
     * copy(arr1, arr2) copies values of the elements of the array arr1
     * to the elements of the array arr2.  It is assumed that the arrays
     * have been constructed and are of the same size.
     */

     private void copy(int[] arr1, int[] arr2){
         for (int k = 0; k < arr1.length; k++){
            arr2[k] = arr1[k];
         } //for
     } //swap

     /**
     * decryptString(String inText, int[] decryptArr)
     * to the elements of the array arr2.  It is assumed that the arrays
     * have been constructed and are of the same size.
     */

     private String decryptString(String inText, int[] decryptArr){
         StringBuffer sb = new StringBuffer();

         char ch;
         int x;
         int len = inText.length();
         try{
           for (int k = 0; k < len; k++){
             ch = inText.charAt(k);
             if (alphabet.isInAlphabet(ch)){
                x = alphabet.charToInt(ch);
                sb.append(alphabet.intToChar(decryptArr[x]));
             } //if
             else {
                sb.append(ch);
             } //else
            } //for
         } //try
         catch(Exception exc){
            System.out.println("In NgramClimber DecryptString -" + exc.toString());
         } //catch
         return sb.toString();
     } //decryptString()


      /**
     * iArrayString( inArr) - creates a string of the sequence of
     * int values in the array intArr. The values are assumed to be
     * in the range 0 to (alphSize - 1).
     */

     private String iArrayString(int[] intArr){
         StringBuffer sb = new StringBuffer();
         int len = intArr.length;
         try{
             for (int k = 0; k < len; k++){
                 sb.append(alphabet.intToChar(intArr[k]));
             } //for
             return sb.toString();
         }//try
         catch(Exception exc){
         System.out.println("In NgramClimber iArrayString) - " + exc.toString());
         return "error";
      } //catch

     } //iArrayString()

//TEMP
   public void tempTester(){
      int[] tDecrypt = new int[alphSize];
      for (int k = 0; k < alphSize; k++) tDecrypt[k] = k;
      System.out.println("initialized decrypt = " + iArrayString(tDecrypt));
      randomizeDecrypt(tDecrypt);
      System.out.println("randomized decrypt = " + iArrayString(tDecrypt));
   }//tempTester

   public static void main(String[] args){

      try{
        //FileWriter outStream = new FileWriter("hereiam.txt");
        //outStream.write("testing");
        //outStream.close();
        NumberFormat nf = NumberFormat.getInstance();
        nf.setMaximumFractionDigits(6);
        nf.setMinimumFractionDigits(6);
        StringBuffer sb = new StringBuffer();

        char[] arr = {'a','z',' ',' '}; //Test 1 and Test 2
//        char[] arr = {'a','z'};  //Test 3
        Alphabet alph = new Alphabet(arr);
        NgramClimber ngc = new NgramClimber(4,"book.txt",alph);
        sb.append(ngc.getReport());
        System.out.println(sb.toString());

//      Test 1 - cryptotext 27 chars (a-z + space) with space preserved
        String test = "WKVHDG XWLNA BLDDLIS FI DWA XONN, \n";
        test = test + "UJFPA YFXI OIY DWAI UASOI DF UOXN, \n";
       test = test + "UAZOKBA BFVA DGPA, ONN BDOJJG AGAY, \n";
        test = test + "WOY HOLIDAY ZFNFJB FI WLB WLYA.";

//      Test 2 - cryptotext 27 chars (a-z + space) with space NOT preserved
//        String test = "WKV DGHXWLNAHBLDDLISHFIHDWAHXONNH\n";
//        test = test + "UJFPAHYFXIHOIYHDWAIHUASOIHDFHUOXNH\n";
//        test = test + "UAZOKBAHBFVAHDGPAHONNHBDOJJGHAGAYH\n";
//        test = test + "WOYH OLIDAYHZFNFJBHFIHWLBHWLYA";

//      Test 3 - cryptotext 26 chars (a-z) with word boundaries removed.
//        String test = "WKVHDGXWLNABLDDLISFIDWAXONN\n";
//        test = test + "UJFPAYFXIOIYDWAIUASOIDFUOXN\n";
//        test = test + "UAZOKBABFVADGPAONNBDOJJGAGAY\n";
//        test = test + "WOYHOLIDAYZFNFJBFIWLBWLYA";
        test = test.toLowerCase();
        System.out.println("test = " + test);
        ngc.calcPlaintext(test,26,5);  //Test 1  and Test 3
//        ngc.calcPlaintext(test,27,5);    //Test 2
        System.out.println(ngc.getPlainText());

       // ngc.tempTester(); //Could be used to test methods.

      }  //try
      catch(Exception exc){
        System.out.println("In NgramClimber main() - " + exc.toString());
      } //catch
    }//main()

} // NgramClimber class


