/*
 * File: TextUtilities.java
 * @author R. Morelli <ralph.morelli@trincoll.edu>
 * 
 * Description: This class contains useful constants and static methods for computing 
 *  statistics about text files. 
 *
 */
 
package hcrypto.analyzer;

import hcrypto.cipher.*;
import java.util.*;
import java.io.*;

public final class TextUtilities {
    public static final int EVAL_DICT = 0;
    public static final int SEED_DICT = 1;
    public static final int N_SHUFFLES = 20;


    private static PatternDictionary dict;//  = new PatternDictionary(PatternDictionary.KUCERA_340);
    //    private static PatternDictionary dict = new PatternDictionary(PatternDictionary.KUCERA_340);
    //    private static PatternDictionary eval_dict = new PatternDictionary(PatternDictionary.KUCERA_3500);
    private static PatternDictionary eval_dict; // = new PatternDictionary(PatternDictionary.KUCERA_3500);

    //    private static Dictionary big_eval_dict = new Dictionary("/home/crypto/hcryptoj/1.4/applications/testanalyzer/dicts/words.txt", true);
    private static Dictionary big_eval_dict; // = new Dictionary("/home/crypto/hcryptoj/1.4/applications/testanalyzer/dicts/words.txt", true);

    private TextUtilities() { } // Can't be instantiated

    /**
     * This method returns the English frequency of a 
     *  given character.
     */
    public static Dictionary getDictionary() {
	try {
	    throw new Exception("getDictionary() is no longer used. Use getPatternDictionary()");
	} catch (Exception e) {
	    e.printStackTrace();
	} finally {
	    return null;
	}
    }
    public static PatternDictionary getPatternDictionary() {
	//        return patternDict;
	return dict;
    }

    public static PatternDictionary getPatternDictionary(int which) {
	//        return patternDict;
	if (which == EVAL_DICT)
	    return eval_dict;
	else
	    return dict;
    }

    public static boolean isAWord(Dictionary dict, String word) {
        return dict.contains(word);
    }

    /**
     * countTokens() counts the tokens (words) in the text.
     * @param text -- a space-delimited string
     * @result the number of tokens in the string
     */
    public static int countTokens(String text) {
        StringTokenizer st = new StringTokenizer(text);
	return st.countTokens();
    }

    /**
     * countWordsForPattern() counts the number of words
     *  in the current dictionary that match the pattern
     * @param pattern -- a string of the form 1231, 1223, etc.
     */
    public static int countWordsForPattern(String pattern) {
        return dict.countWordsForPattern(pattern);
    }

    /**
     * removeDuplicates() removes duplicate tokens from a string
     */
    public static String removeDuplicates (String s) {
	StringBuffer sb = new StringBuffer(" ");
	StringTokenizer st = new StringTokenizer(s);
	while (st.hasMoreTokens()) {
	    String word = st.nextToken();
	    if (sb.toString().indexOf(" " + word + " ") == -1) {
		sb.append(word);
		sb.append(" ");
	    }
	}
	return sb.toString();
    }

    public static String getUnusedLetters(String s) {
	String unused = new String("");
	for (char ch = 'a'; ch <= 'z'; ch++)         // FIll in string used in mutation
	    if (s.indexOf(ch) == -1)
		unused = unused + ch;
	//	System.out.println("unused= " + unused + "\n" + s);
	return unused;
    }

    /**
     * countWords() counts the number of words in the text
     *  by looking up each token in the current dictionary
     */
    //    public static int countWords(String text) {
    public static int countWords(Dictionary eval_dict, String text) {
	if (text == null)
	    return 0;
	if (text.equals(""))
	    return 0;
        StringTokenizer st = new StringTokenizer(text);
        int count = 0;
        while (st.hasMoreTokens()) {
	    String s = st.nextToken();
	    //	    if (big_eval_dict.contains(s)) {
	    if (eval_dict.contains(s)) {
//		System.out.println("Dictionary contains " + s);
		++count;
	    }
        }
        return count;
    }

    /**
     * remove() removes all occurrences of the char ch from a string
     * @param s -- a String
     * @param ch -- the char being removed
     */
    public static String remove(String s, char ch) {
	StringBuffer sb = new StringBuffer();
	for (int k = 0; k < s.length(); k++) 
	    if (s.charAt(k) != ch)
		sb.append(s.charAt(k));
	return sb.toString();
    }


    /**
     * shuffle() randomly shuffles the characters in a StringBuffer.
     */
    public static void shuffle(StringBuffer sb) 
    {
	sb = new StringBuffer(shuffle(sb.toString()));
    }

    /**
     * shuffle() randomly shuffles the characters in a String.
     */
    public static String shuffle(String s) 
    {
	StringBuffer sb = new StringBuffer(s);
	for (int k = 0; k < N_SHUFFLES; k++) 
	{
            int a = (int)(Math.random() * sb.length());
            int b = (int)(Math.random() * sb.length());
            char ch = sb.charAt(a);
            sb.setCharAt(a, sb.charAt(b));
            sb.setCharAt(b, ch);
	}
	return sb.toString();
    }

    /**
     * shuffle() randomly shuffles integers in an array.
     * @param arr-- an array of ints
     */
    public static void shuffle(int arr[]) 
    {
	for (int k = 0; k < arr.length - 1 ; k++) 
	{
	    int m = (int)(Math.random() * (arr.length -1));
	    int temp = arr[k];
	    arr[k] = arr[m];
	    arr[m] = temp;
	}
    }


    /******************* COMMENTED OUT
     * countWords() counts the number of words in the text
     *  by looking up each token in the current dictionary. This
     *  version also returns the letters that are not used in any
     *  found words.
    public static int countWords(String text, StringBuffer unusedLetters) {
	//        System.out.println("TEXT COUNT WORDS " + text + "\nUNUSED " + unusedLetters);
        String alphabet = "abcdefghijklmnopqrstuvwxyz";
        StringBuffer sb = new StringBuffer(alphabet);
        StringTokenizer st = new StringTokenizer(text);
        int count = 0;
        while (st.hasMoreTokens()) {
	    String s = st.nextToken();
	    if (dict.contains(s)) {
		//		System.out.print(s + " ");
		//		count += s.length();
		count++;
		for (int k = 0; k < s.length(); k++) {
		    int index = alphabet.indexOf(s.charAt(k));
		    if (index != -1)
			sb.setCharAt(index, '*');
		}
            }
        }
        for (int k = 0; k < sb.length(); k++)
            if (sb.charAt(k) != '*')
                unusedLetters.append(sb.charAt(k));
        return count;
    }
    ********************     */
    /******************************

    public static double evaluate(String text, StringBuffer usedLetters) {
	//        System.out.println("EVALUATE: TEXT= " + text + "\nUSED " + usedLetters);
	//        String alphabet = "abcdefghijklmnopqrstuvwxyz";
	//        StringBuffer sb = new StringBuffer(alphabet);

	double A = 1.0, B = 0.01, C = 2.0;             // Coefficients

        StringTokenizer st = new StringTokenizer(text);
        int count = 0;             // Number of words found
	double freqs = 0;          // Sum of frequecies
	double chars = 0;          // Total chars
	double charsInWords = 0;   // Chars in words found
        while (st.hasMoreTokens()) {
	    String s = st.nextToken();
	    chars += s.length();
	    if (eval_dict.contains(s)) {
	    //	    if (big_eval_dict.contains(s)) {
		//		System.out.print(s + " ");
		count++;
		//		freqs += big_eval_dict.getFreq(s);
		freqs += eval_dict.getFreq(s);
		charsInWords += s.length();
		usedLetters.append(s);
            }
        }
        return A * count + B * charsInWords/chars + C * freqs;
    }
    ****************************/

    /**
     * This method removes the whitespace from a String.
     */
    public static String removeWhiteSpace(String s) {
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < s.length(); k++) {
            char ch = s.charAt(k);
	    if (ch != '\t' && ch != ' ' && ch != '\n')
                sb.append(ch);
        }
        return sb.toString();
    }

    /**
     * This method removes punctuation and returns a space delimited string.
     */
    public static String cleanString(String s) {
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < s.length(); k++) {
            char ch = s.charAt(k);
	    if (Character.isLetter(ch) || Character.isWhitespace(ch))
                sb.append(Character.toLowerCase(ch));
        }
        return sb.toString();
    }

    /**
     * This method removes all characters not in the alphabet
     *  from the string.
     */
    public static String removeNonAlphabetics(String s, Alphabet a) {
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < s.length(); k++) {
            char ch = s.charAt(k);
	    if (a.isInAlphabet(ch))
                sb.append(ch);

        }
        return sb.toString();
    }

    /**
     * This method returns a pattern of the string. For example,
     *  if the word is "there" the pattern would be 12343. Words longer
     *  than 9 letters using UPPERCASE letters. For example, the
     *  word "appendectomy" would have the pattern "12234536789A".
     */
    public static String makePattern(String s) {
        String word = s.toLowerCase();
        String patternLetters = "123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
        int p = 0;
        StringBuffer sb = new StringBuffer();
        for (int k = 0; k < Math.min(word.length(),patternLetters.length()); k++) {
            char ch = word.charAt(k);
            int index = word.substring(0,k).indexOf(ch);
            if (index == -1) {
                sb.append(patternLetters.charAt(p));
                ++p;
            }
            else
                sb.append(sb.charAt(index));
	}
        return sb.toString();
    }

    public static double percentWords(String s1, String s2) {
	double matches = 0;
	double nWords = 0;
	//	java.util.StringTokenizer st1 = new java.util.StringTokenizer(s1,"\n\b\t;.,:'?!");
	java.util.StringTokenizer st1 = new java.util.StringTokenizer(s1);
	java.util.StringTokenizer st2 = new java.util.StringTokenizer(s2);
	//	java.util.StringTokenizer st2 = new java.util.StringTokenizer(s2, "\n\b\t;.,:'?!");
	while (st1.hasMoreTokens()) {
	    nWords += 1;
	    if (st1.nextToken().equals(st2.nextToken()))
		matches += 1;
	}
	return 100 * matches/nWords;
    }

    public static String markWords(Dictionary eval_dict, String s) {
	java.util.StringTokenizer st = new java.util.StringTokenizer(s);
	StringBuffer sb = new StringBuffer();
	while (st.hasMoreTokens()) {
	    String token = st.nextToken();
	    if (TextUtilities.isAWord(eval_dict, token))
		sb.append(token.toUpperCase() + " ");
	    else
		sb.append(token.toLowerCase() + " ");
	}
	return sb.toString();
    }

  public static int countInCorrectChars(String solution, String decrypt) {
      int bound = 0;
	decrypt = decrypt.toLowerCase().trim();
	solution = solution.toLowerCase().trim();
	//	System.out.println(solution + "\n" + decrypt);
	if (solution.length() != decrypt.length()) {
            bound = Math.min(solution.length(), decrypt.length());
	}	    

	int count = 0;
	String wrongchars = new String();
	for (int k=0; k < bound; k++) {
	    char ch = decrypt.charAt(k);
	    //	    System.out.println(ch + " " + solution.charAt(k));
	    //	    if (ch != solution.charAt(k) && wrongchars.indexOf(ch) == -1) {
	    if (ch != solution.charAt(k)) {
		++count;
		wrongchars = wrongchars + ch;
	    }
	}
	return count;
    } //countInCorrectChars()


   /*  *****Added by REW 7/14/2003
   Temporary hacks to create random substitutions in writeCiphertextFiles()
   and to eliminate sequences of spaces
   */
   public static String randomizeSub(String inText, Alphabet alph,
          int[] substitution, int maxChanged){

       int j, k, temp;  //used for loops and swap

       for ( j = 0; j < maxChanged; j++){
           k = (int)(Math.random()*maxChanged);
           temp = substitution[j];
           substitution[j] = substitution[k];
           substitution[k] = temp;
       }//for
        StringBuffer outText = new StringBuffer();
        try{
            for ( j = 0; j < inText.length(); j++){
                temp = alph.charToInt(inText.charAt(j));
                outText.append(alph.intToChar(substitution[temp]));
            }//for
        }//try
        catch(Exception exc){
            System.out.println(exc.toString());
        } //catch
        return outText.toString();
   }//randSubstitute

   public static String singleSpaces(String inText){

        char thisChar, lastChar;
        StringBuffer outText = new StringBuffer();
        lastChar = inText.charAt(0);
        outText.append(lastChar);
        for ( int j = 1; j < inText.length()-1; j++){
            thisChar = inText.charAt(j);
            if (!((thisChar == ' ') && (lastChar == ' ')) ){
              outText.append(thisChar);
              lastChar = thisChar;
            }//if
        }//for
        return outText.toString();
   }//singleSpaces()

 /* writeCiphertextFiles() creates a collection of files in a directory named
 outRootName+"dir". There are numEachSize different files for each of a
 collection of different sizes. The different sizes start at minMessSize and
 the size is increased by gapMessSize repeatedly up to the largest such
 size which is less than or equal to maxMessSize. The size of the file is
 is determined by counting the non-space characters.  Additional characters
 are added to complete the last word. Each file consists of a cryptogram on
 one line, "$$$" on the next line and the plaintext on a third line.
 */
  public static void writeCiphertextFiles(String sourceFile, String outRootName,
      int minMessSize, int gapMessSize, int maxMessSize, int numEachSize, Alphabet alph){

      try{
          StringBuffer pText = new StringBuffer(); //the palintext message
          String pString; //Will store the cleaned up version of pText
          String cText; //the ciphertext message
          String line = null;
          int len = 0; // will store the length of line.
          int pos = 0; // position of char in line
          char ch; // Will point to char read from file.
          int chNum = 0; // Will count the number of acceptable chars.

          boolean done = false;
          boolean messDone = false;
          FileWriter outStream;
          File outDir = new File(outRootName + "dir");
          if (outDir.mkdir())  System.out.println("Directory " + outRootName + "dir created");
          else System.out.println("Directory " + outRootName + "dir either already exists or failed");
          File outFile; //Used to reference files created.
          BufferedReader inStream = new BufferedReader(new FileReader(sourceFile));
          System.out.println("sourceFile= " + sourceFile + " opened");
          int curMessSize =  minMessSize;
          int curMessNum = 0;
          int lineCount = (int)(Math.random()*50);
          int[] substitution = new int[alph.getSize()];
          for (int j = 0; j < alph.getSize(); j++){
             substitution[j] = j;
          }//for

          while (!done){
              line = inStream.readLine();
              if(line == null){   // If eof then close and reopen
                  inStream.close();
                  inStream = new BufferedReader(new FileReader(sourceFile));
                  System.out.println("sourceFile= " + sourceFile + " opened");
                  line = inStream.readLine();
                  //if there still is no line throw exception
                  if (line == null) {
                      throw new Exception("Bad source file in TextUtilities.writeCiphertextFiles()");
                  }//if
              }//if
              lineCount--;
              len = line.length();
              if ((len > 0)&&(lineCount < 0)){
                  line = line.toLowerCase();
                  pos = 0;
                  while ((pos < len) && !messDone){
                      ch  = line.charAt(pos);
                      if (alph.isInAlphabet(ch)){
                            chNum++;
                            pText.append(ch);
                      }//if  ch is in alphabet
                      else{ //if not in the alphabet it still might separate words
                           if (ch != '\'') pText.append(' ');
                      }//else
                      if ( (chNum >= curMessSize) && ((ch ==' ') || (ch == '\n')) ){
                          messDone = true;
                          outFile = new File(outDir, outRootName+curMessSize+"_"+curMessNum+".txt");
                          outStream = new FileWriter(outFile);
                          pString = singleSpaces(pText.toString());
                          cText = randomizeSub(pString,alph,substitution,26);
                          outStream.write(cText);
                          outStream.write("\n$$$\n");
                          outStream.write(pString);
                          outStream.close();
                      }//if message is long enough
                      pos++;
                      if ((pos == len) && (messDone == false))  pText.append(' ');
                  }//while pos in line
              }//if a useable line
              if (messDone){ // reset variables before reading next line
                  messDone = false;
                  pText = new StringBuffer();
                  chNum = 0;
                  lineCount = (int)(Math.random()*50);
                  curMessNum++;
                  if (curMessNum >= numEachSize){
                      curMessNum = 0;
                      curMessSize += gapMessSize;
                  }//if
                  if (curMessSize > maxMessSize){
                      done = true;
                  }//if
              } //if message wasdone
              line = inStream.readLine();

           } //while not done
           // textSize  = chNum;
           System.out.println("Finished writing Files." );

            inStream.close();
        }//try
        catch(Exception exc){
            System.out.println("In TextUtilities.writeCiphertextFiles() - " +exc.toString());
	           exc.printStackTrace();
        } //catch
  }//writeCiphertextFiles

  public static void main(String[] args){

    try{

      char[] arr = {'a','z',' ',' '};
      Alphabet alph = new Alphabet(arr);

      TextUtilities.writeCiphertextFiles("oliver.txt","oliver",25,25,125,20,alph);

    }  //try
    catch(Exception exc){
        System.out.println(exc.toString());
    } //catch
  }//main()

} //TextUtilities class
