 /*
 * File: NgramAnalyzer.java
 * @author R.Walde <rwalde@nyc.rr.com>
 * @author R.Morelli <ralph.morelli@trincoll.edu>

 *
 *  Description: This class uses an NgramArray with frequencies of
 *  N = 2, 3, or 4  (bigrams, trigrams, or  tetragrams) of a langauge
 *  to cryptanalyze transposition and substitution ciphers.
 *  Using the sum of the inverses of the frequencies as an evaluation of how
 *  accurately a decrypt matches the usual N-gram frequencies in a
 *  language is an idea described by Alex Griffing the developer of the
 *  "Automatic Cryptogram Solver".  This program is based on R. Walde's
 *  NgramClimber.java.
 *
 * <P>Copyright: This program is in the public domain. You can modify it as you
 *  see fit as long as you properly acknowledge its original author.
 *  It would also be nice if you forwarded your changes to
 *  <A HREF= "mailto:ralph.morelli@trincoll.edu">ralph.morelli@trincoll.edu</A> so
 *  they can possibly be added to the "official" version.
 *
 *  To compile: javac -classpath hcryptoclasses -d hcryptoclasses NgramAnalyzer.java
 *  To run main(): java -classpath hcryptoclasses NgramAnalyzer
 *
 *  To compile and run from the TestAnalyzer application:
 *
 *  cd ~crypto/hcryptoj/1.4/applications/testanalyzer
 *  javac -classpath ../../classes -d ../../classes ../../source/hcrypto/analyzer/NgramAnalyzer.java
 *  java -classpath ../../classes:. TestAnalyzer analyzers.NgramAnalyzer ga_paramfiles ngram.cgrams.txt
 *
 */

package analyzers;

import hcrypto.analyzer.*;
import hcrypto.cipher.*;
import hcrypto.provider.*;
import java.io.*;
import java.text.NumberFormat;
import java.lang.Math;

public class NgramAnalyzer extends CryptoAnalyzer {

    private final int MAX_BLOCKSIZE = 8;
    private final int MAX_KEYS = 100000;
    public static final int SIMPLESUB = 0;
    public static final int PERMUTATION = 1;
    public static final int RAILFENCE = 2;
    public static final int PLAYFAIR = 3;

    private Alphabet alphabet; // Defines which chars occur in N-grams.
    private int randSize;      // The portion of the subst alphabet to permute
    private int alphSize;      // The size of the alphabet.
    private int cipherType;    // The type of cipher
    private int NN;            // The N of the N-gram = sequence of N chars.
    private NgramArray ngramArr; //inverse of frequency of each N-gram

    private String plainText = "";  // The current best plaintext found.
    private String book = null;       // Default Book used for NgramArray

    private double bestValue;            // ngram.recipDist(bestCnums)
    private int maxKeys = MAX_KEYS;
    private NgramDecrypter decrypter;
    private int hillCount = 0;
    private int newStarts = 0;
    

    /**
     * NgramAnalyzer() -- Default constructor
     */
    public NgramAnalyzer() {
	super();
    } 

    /**
     * NgramAnalyzer() -- this constructor is given an object containing parameter settings
     * @param params -- an object containing param1=val1 param2=val2 ...
     */
    public NgramAnalyzer(GaParameters params) {
	super(params);
    }

    /**
     * setup(cText) - from the Analyzer interface.
     * Justs assigns the text to an instance variable.
     */
     public void setup(String cText){
	 super.setup(cText);       
	 initGa();
	 num.setMaximumFractionDigits(2);
	 try {
	     setNgramAnalyzer(NN, book, alphabet, randSize, cipherType, maxKeys);
	 } catch (Exception e) {
	     e.printStackTrace();
	 }
     } // setup()

     public void setup(String cText, String book){
	 this.book = book;
	 initGa();
	 super.setup(cText);       
     } // setup()


    /**
     * initGa() - Sets useful variables
     * NN = N is the size of the N-gram N=2, 3, or 4
     * fileName is a file containing a large text typical of the language
     *   of the cryptotext
     */
    public void initGa() {
	if (params == null) {
	    params = new GaParameters();
	}
	NN = params.NN;
	maxKeys = params.nkeys;
	randSize = params.randSize;
	cipherType = params.cipherType;
	if (book == null)
	    book = params.book;
	alphabet = params.alphabet;
    }

    /**
     * setNgramAnalyzer interfaces with main(). It is also called from setup().
     */
    public void setNgramAnalyzer(int N, String book, Alphabet alpha, int rSize, int cType, int mkeys) throws Exception { 
	this.NN = N;
	this.maxKeys = mkeys;
	this.randSize = rSize;
	this.cipherType = cType;
	this.book = book;
	this.alphabet = alpha;
	alphSize = alphabet.getSize();
	ngramArr = new NgramArray(NN, book, alphabet);

	if (cipherType == SIMPLESUB) {
	    decrypter = NgramDecrypterFactory.getInstance("Substitution", text, alphabet, ngramArr);
	    ((SubstitutionNgramDecrypter)decrypter).setRandSize(randSize);
	}
	else if (cipherType == PERMUTATION)
	    decrypter = NgramDecrypterFactory.getInstance("Permutation", text, alphabet, ngramArr);
	else if (cipherType == RAILFENCE)
	    decrypter = NgramDecrypterFactory.getInstance("Railfence", text, alphabet, ngramArr);
	else if (cipherType == PLAYFAIR)
	    decrypter = NgramDecrypterFactory.getInstance("Playfair", text, alphabet, ngramArr);
	else
	    decrypter = NgramDecrypterFactory.getInstance("Substitution", text, alphabet, ngramArr);
	resultSB.append("ANALYZER: NGramAnalyzer N= " + NN + " ALPHA= " + alphSize);
	//	resultSB.append("ANALYZER: NGramAnalyzer N= " + NN + " ALPHA= " + alphSize + " CIPHER= " + getCipherType() + " MAX_KEYS= " + maxKeys);
    }  


    /**
     * From the Analyzer interface.
     */
    public String getReport() {
        return toString();
    }

    /**
     * From the CryptoAnalyzer interface.
     */
    public void run() {
	java.text.NumberFormat nf = java.text.NumberFormat.getInstance();
	nf.setMaximumFractionDigits(1);

	stopThread = false;           // stopThread declared in superclass
	doHillClimb(text, maxKeys);
	//        System.out.println("Finished: Iterations = " + newStarts + " Best score is " + num.format(bestValue) 
	//			   + " KeyCount = " + keyCount + " HillCount = " + hillCount);
	//	System.out.println("Solut=" + solution + "\nPlain=" + plainText);
	int wrongs = TextUtilities.countInCorrectChars(solution,plainText);
	//	resultSB.append(" KEY_COUNT= " + keyCount + " WRONG_CHARS= " + wrongs + " (" + nf.format(100.0 * wrongs / plainText.length()) + "%)" + "\tValue=" + nf.format(bestValue));
	//	resultSB.append(" KEY_COUNT= " + keyCount + " WRONG_CHARS= " + wrongs + " (" + nf.format(100.0 * wrongs / plainText.length()) + "%)" + "\tValue=" + bestValue);
	//	resultSB.append(" KEY_COUNT= " + keyCount + " PERM_KEY= " + ((SubstitutionNgramDecrypter)decrypter).getDecryptKey() + "\tValue= " + bestValue);
	resultSB.append(" PERM_KEY= " + ((SubstitutionNgramDecrypter)decrypter).getDecryptKey() + "\tDECRYPT=" + plainText.substring(0,35));
	IndexOfCoincidence ic = new IndexOfCoincidence(plainText);
	//	resultSB.append("\tDECRYPT=" + plainText.substring(0,35) + "\tic=" + ic.getIOC());
	//	resultSB.append("\nDECRYPTED MESSAGE:\t" + plainText + "\n");
	//	resultSB.append("\nSOLUTION          : \n" + solution + "\n");
	//	resultSB.append("PERCENT WORDS: " + num.format(TextUtilities.percentWords(solution,plainText)) + "\n");
	//	resultSB.append("WRONG_CHARS: " + wrongs  + " (" + nf.format(1.0 * wrongs / plainText.length()) + ")" + "\n-------------\n");

	if (display != null) {
	    display.append("\nFinished");
	}
    }

    /**
     * allows the substitution key to be initialized to a
     *  specific alphabet rather than to a random one.
     *  Introduced for hard Alberti.
     */
    public void setInitialSubstitution(String s) {
	if (decrypter instanceof SubstitutionNgramDecrypter)
	    ((SubstitutionNgramDecrypter)decrypter).setDecryptKey(s);
    }

    /**
     * Puts characters with frequencies into a string.
     */
    public String toString(){
	return resultSB.toString();
    } // toString()

    /**
     * Another name for toString().
     */
    public void print(){
        System.out.println(toString());
    } // print()

   public int getAlphSize() {
        return alphSize;
   } //getAlphSize()

   public String getCryptoText(){
       if (text != null) return text;
       else return "";
   } // getCryptoText()

    public String getDecryptKey() {
        return ((SubstitutionNgramDecrypter)decrypter).getDecryptKey();
    }
   public String getPlainText(){
       if (plainText != null) return plainText;
       else return "";
   } // getPlainText()

    public String getCipherType() {
	switch (cipherType) {
	case SIMPLESUB: return "SimpleSubstitution"; 
	case PERMUTATION: return "Permutation";
	case RAILFENCE: return "Railfence";
	case PLAYFAIR: return "Playfair";
	default: return "UNKNOWN CIPHER TYPE";
	}
    }

    /**
     * doHillClimb(cText, mKeys) attempts to cryptanalyze the
     * cryptoText cText by finding the best N-gram fit assuming a transposition cipher. 
     * mKeys is used as the max number of keys to look at in the entire search.
     */
    public void doHillClimb(String cText, int mkeys) {
	String plain = null;
	int errCount = 100;
	hillCount = 0;
	keyCount = 0;
	newStarts = 0;
	try {
	    decrypter.randomizeClimb();
	    bestValue = decrypter.currentEval();
	    ++keyCount;
	    //	    while (errCount > 0 && keyCount < mkeys && !threadIsStopped()) {  // Given solution
	    while (keyCount < mkeys && !threadIsStopped()) {  // IJAIT Test Version
		//	    while (!threadIsStopped()) {          // CryptoToolJ Version
		decrypter.randomizeClimb();
		double hillValue = decrypter.currentEval();
		++keyCount;
		decrypter.initClimb();
		double currValue;

		while (decrypter.hasMoreHill()) {               //  Climb one hill == Find rel minimum
		    decrypter.climb();
		    currValue = decrypter.currentEval();             // Evaluate the current key
		    ++keyCount;
		    //		    System.out.println("Climb "  + " Key = " + ((SubstitutionNgramDecrypter)decrypter).getDecryptKey() + " Value= " + decrypter.currentEval()
		    //			       + " "  + ((SubstitutionNgramDecrypter)decrypter).getBestKey() + " Value= " + decrypter.bestEval() + " (" + hillValue + ")");
		    if (currValue < hillValue){                        // If key improved
			hillValue = currValue;                         
			decrypter.initClimb();                         //   start a new climb
			++newStarts;
			//			System.out.println("Climb "  + " Key = " + ((SubstitutionNgramDecrypter)decrypter).getDecryptKey() + " Value= " + decrypter.currentEval() 
			//					   + " "  + ((SubstitutionNgramDecrypter)decrypter).getBestKey() + " Value= " + decrypter.bestEval());
		    }
		    else{
			decrypter.undostep();
		    }//else
		} //while more hill
		++hillCount;    

		if (hillValue < bestValue){                             // If this is the best hill so far
		    bestValue = hillValue;                              //   remember it
		    decrypter.saveState();
		    plainText = decrypter.getPlainText();               // Get a decryption with the best key
		    errCount = TextUtilities.countInCorrectChars(solution,plainText);
		    if (params.verbose) {
			System.out.println("Finished Hill Value = " + bestValue + " Hillcount = " + hillCount + " NewStarts = " + newStarts + " KeyCount = " + keyCount);
			System.out.println(plainText);
			if (display != null)
			    display.setText(plainText);
		    }
		    //		    loopCount = 0;
		}//if
	    } //while more keys
	} catch (Exception e) {
	    System.out.println("In Ngram Climber doHillClimb() - " + e.toString());
	    e.printStackTrace();
	}
    }

   public static void main(String[] args){
      try{
	  if (args.length != 4) {
	      System.out.println("Usage: java NgramAnalyzer TestNum N book mKeys");
	      return;
	  }
	  int testNum = Integer.parseInt(args[0]);
	  int NN = Integer.parseInt(args[1]);
	  String file = args[2];
	  int mKeys = Integer.parseInt(args[3]);

	  int randSize=26;
	  int cipherNum = 0;
	  String test;
	  char[] arr;
	  
	  switch (testNum) {
//        Test 0 - cryptotext 27 chars (a-z + space) with space preserved 
	  case 0:  // TEST 0: Substitution cipher gadsby, no letter 'e'
	      cipherNum = 0;
	      test = "PKJI OEXN AGNXN X GH TJXIT OJ NEJR \n"
	            + "VJP EJR G APIDE JY AMXTEO VJPIT YJFCN SXS YXIS \n" 
		  + "G DEGHKXJI; G HGI RXOE AJVN GIS TXMFN JY EXN JRI. \n";
	      arr = new char[4]; arr[0]='a'; arr[1]='z';arr[2]=arr[3]=' ';
	      randSize = 26;
	      break;
//        Test 1 - cryptotext 27 chars (a-z + space) with space preserved 
	  case 1:  // TEST 1: Substitution cipher with word breaks
	      cipherNum = 0;
	      test = "WKVHDG XWLNA BLDDLIS FI DWA XONN, \n"
	            + "UJFPA YFXI OIY DWAI UASOI DF UOXN, \n"
		    + "UAZOKBA BFVA DGPA, ONN BDOJJG AGAY, \n"
		    + "WOY HOLIDAY ZFNFJB FI WLB WLYA.";
	      arr = new char[4]; arr[0]='a'; arr[1]='z';arr[2]=arr[3]=' ';
	      randSize = 26;
	      break;
	//	Test 2 - cryptotext 27 chars (a-z + space) with space NOT preserved
	  case 2: // TEST 2: Substitution with SPACE not preserved
	      cipherNum = 0;
	      test = "WKV DGHXWLNAHBLDDLISHFIHDWAHXONNH\n"
             + "UJFPAHYFXIHOIYHDWAIHUASOIHDFHUOXNH\n"
             + "UAZOKBAHBFVAHDGPAHONNHBDOJJGHAGAYH\n"
             + "WOYH OLIDAYHZFNFJBHFIHWLBHWLYA";
	      arr = new char[4]; arr[0]='a'; arr[1]='z';arr[2]=arr[3]=' ';
	      randSize = 27;
	      break;
	//        Test 3 - cryptotext 26 chars (a-z) with word boundaries removed.
	  case 3: // TEST 3: Substitution without word breaks
	      cipherNum = 0;
	      test = "WKVHDGXWLNABLDDLISFIDWAXONN\n"
	    + "UJFPAYFXIOIYDWAIUASOIDFUOXN\n"
            + "UAZOKBABFVADGPAONNBDOJJGAGAY\n"
	    + "WOYHOLIDAYZFNFJBFIWLBWLYA";
	      arr = new char[2]; arr[0]='a'; arr[1]='z';
	      randSize = 26;
	      break;
//        Test 4 - permutation cipher -- gadsby 40312
	  case 4: // TEST 4: Permutation
	      cipherNum = 1;
	      test = "pntouibashiiassginomtshogwouyooabwhnhocu\n"
		  + "ribfhyotgnfogukdislfndidcamhainaopawinmhoybt\n"
		  + "ndasilsrgfishowccno";
	      arr = new char[2]; arr[0]='a'; arr[1]='z';
	      randSize = 26;
	      break;
//        Test 5 - permutation cipher -- tobe 40312
	  case 5: // TEST 5: Permutation 
	      cipherNum = 1;
	      test = "oeobtnttorbtheotstiaeueqhtonis";
	      arr = new char[2]; arr[0]='a'; arr[1]='z';
	      randSize = 26;
	      break;
//        Test 6 -- railfence cipher tobe 40312
	  case 6: // TEST 6: Railfence
	      cipherNum = 2;
	      test = "othtentroahsettsuoboeiqionbtet";
	      arr = new char[2]; arr[0]='a'; arr[1]='z';
	      randSize = 26;
	      break;
//        Test 7 -- playfair cipher thisisatestofplayfairwithjoytotheworld
//	      test =               "skbpbpcqrtqepvodvkbaoxcpfbewqeskoyredndd";
	  case 7: // TEST 7: Playfair with 6-letter keyword
	      cipherNum = 3;
	      test = "skbpbpcqrtqekpkrumcwgibmdasfledgcufbprrtqbkobpelodpcymezmrfhdd";
	      //              test = test + "skbpbpcqrtqekpkrumcwgibmdasfledgcufbprrtqbkobpelodpcymezmrfh";
	      //              test = test + "skbpbpcqrtqekpkrumcwgibmdasfledgcufbprrtqbkobpelodpcymezmrfhdd";
	      arr = new char[2]; arr[0]='a'; arr[1]='z'; 
	      randSize = 26;
	      break;
	  case 8: // TEST 8: Playfair with 12-letter keyword
	      cipherNum = 3;
	      test ="vuwdwdixscvmcqrdzcswebsgcoabqavuwdolddinmsdcswemzerknu";
	      arr = new char[2]; arr[0]='a'; arr[1]='z'; 
	      randSize = 26;
	      break;
	  case 9: // TEST 9: Playfair with 9-letter keyword
	      cipherNum = 3;
	      //	      test ="hkpypygzryklcyagfqipodpsqmsrahtihsqrmlhhleqosxrefc";
	      /*********************
	      test = "hkpy is clshg kl be a xoes lrkg olyygte";
	      test = test + "hkgz uyrs aoiagysm cpsqmr ushk a kyfvrsn";
	      test = test + "of hsqr llqqlsd we sle krw qlanshg unlqqmr";
	      test = test + "hke mltdhk of hke olyygte tis staqubtd to";
	      test = test + "cr wahh hke gygk ttit hke tdlsm stgaaxle";
	      test = test + "drls krt kpkwlefl b";
	      ****************/

	      test = "ocoyfolbvnpiasakopvygeskovmufguwmlnooedrncforsocvmtuuty" +
		  "erpfolbvnpiasakopvivkyeocnkoccaricvvltsocoytrfdvcvooueg" +
		  "kpvooyvkthzscvmbtwtrhpnklrcuegmslnvlzscansckopormzckizu" +
		  "slccvfdlvorthzscleguxmifolbimvivkiuayvuufvwvccbovovpfrh" +
		  "cacsfgeolckmocgeumohuebrlxrhemhpbmpltvoedrncforsgisthog" +
		  "ilcvaioamvzirrlniiwusgewsrhcaugimforskvzmgclbcgdrnkcvcp" +
		  "yuxlokfyfolbvcckdokuuhavococlciusycrgufhbevkroicsvpftuq" +
		  "umkigpecemgcgpggmoqusyefvgfhralauqolevkroeokmuqirxccbcv" +
		  "maodclanoynkbmvsmvcnvroedrncgeskysysluuxnkgegmzgrsonlcv" +
		  "agebglbimordprockinankvcnfolbceumnkptvktcgefhokpdulxsue" +
		  "opclanoynkvkbuoyodorsnxlckmglvcvgrmnopoyofocvkocvkvwofc" +
		  "lanyefvuavnrpncwmipordgloshimocnmlccvgrmnopoyhxaifoouep" +
		  "gchk";

	      /***********************
	      test = "hkpypyclshtzmclyxoesmrtdolyygte" +
		  "hkgzwiryaoiagysmbpikleushkptyfvrsn" +
		  "odqshlmlqqlsdxrslrqrvqlanshdzqmhkle" +
		  "tqmmltdhkechkloryyifltiyiqshkshtzo" +
		  "crushkhkyqpghhtihhqmtdlslitseaxlr" +
		  "crrykrkgrkxoldedd";
	      ****************/
	      arr = new char[2]; arr[0]='a'; arr[1]='z'; 
	      randSize = 26;
	      break;

//        Test 10 - cryptotext 27 chars (a-z + space) very short
	  case 10:  // TEST 10: to be or not to be...
	      cipherNum = 0;
	      test = "sgzs ptdrshnm vgdsgdq hs hr adssdq sn ad nq mns sn ad ";
	      arr = new char[4]; arr[0]='a'; arr[1]='z';arr[2]=arr[3]=' ';
	      randSize = 26;
	      break;

	  default: // TEST 1: Substitution cipher with word breaks
	      cipherNum = 0;
	      test = "WKVHDG XWLNA BLDDLIS FI DWA XONN, \n"
	            + "UJFPA YFXI OIY DWAI UASOI DF UOXN, \n"
		    + "UAZOKBA BFVA DGPA, ONN BDOJJG AGAY, \n"
		    + "WOY HOLIDAY ZFNFJB FI WLB WLYA.";
	      arr = new char[4]; arr[0]='a'; arr[1]='z';arr[2]=arr[3]=' ';
	      randSize = 26;
	      break;
	  }

        test = test.toLowerCase();
        System.out.println("Testing = " + testNum + "\n" + test + "\n");
        Alphabet alph = new Alphabet(arr);
        NgramAnalyzer ngc = new NgramAnalyzer();
	ngc.setup(test,file);
	ngc.setNgramAnalyzer(NN,file,alph,randSize,cipherNum,mKeys); 
        ngc.run();  
        System.out.println("\n" + ngc.getReport());
      }  //try
      catch(Exception exc){
        System.out.println("In NgramAnalyzer main() - " + exc.toString());
	exc.printStackTrace();
      } //catch
    }//main()

} // NgramAnalyzer class


