hcrypto.analyzer
Class TextUtilities

java.lang.Object
  |
  +--hcrypto.analyzer.TextUtilities

public final class TextUtilities
extends java.lang.Object


Method Summary
static java.lang.String cleanString(java.lang.String s)
          This method removes punctuation and returns a space delimited string.
static int countTokens(java.lang.String text)
          countTokens() counts the tokens (words) in the text.
static int countWords(java.lang.String text)
          countWords() counts the number of words in the text by looking up each token in the current dictionary
static int countWordsForPattern(java.lang.String pattern)
          countWordsForPattern() counts the number of words in the current dictionary that match the pattern
static double evaluate(java.lang.String text, java.lang.StringBuffer usedLetters)
          countWords() counts the number of words in the text by looking up each token in the current dictionary.
static Dictionary getDictionary()
          This method returns the English frequency of a given character.
static PatternDictionary getPatternDictionary()
           
static java.lang.String getUnusedLetters(java.lang.String s)
           
static boolean isAWord(java.lang.String word)
           
static java.lang.String makePattern(java.lang.String s)
          This method returns a pattern of the string.
static java.lang.String removeDuplicates(java.lang.String s)
          removeDuplicates() removes duplicate tokens from a string
static java.lang.String removeNonAlphabetics(java.lang.String s, Alphabet a)
          This method removes all characters not in the alphabet from the string.
static java.lang.String removeWhiteSpace(java.lang.String s)
          This method removes the whitespace from a String.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Method Detail

getDictionary

public static Dictionary getDictionary()
This method returns the English frequency of a given character.

getPatternDictionary

public static PatternDictionary getPatternDictionary()

isAWord

public static boolean isAWord(java.lang.String word)

countTokens

public static int countTokens(java.lang.String text)
countTokens() counts the tokens (words) in the text.
Parameters:
text - -- a space-delimited string

countWordsForPattern

public static int countWordsForPattern(java.lang.String pattern)
countWordsForPattern() counts the number of words in the current dictionary that match the pattern
Parameters:
pattern - -- a string of the form 1231, 1223, etc.

removeDuplicates

public static java.lang.String removeDuplicates(java.lang.String s)
removeDuplicates() removes duplicate tokens from a string

getUnusedLetters

public static java.lang.String getUnusedLetters(java.lang.String s)

countWords

public static int countWords(java.lang.String text)
countWords() counts the number of words in the text by looking up each token in the current dictionary

evaluate

public static double evaluate(java.lang.String text,
                              java.lang.StringBuffer usedLetters)
countWords() counts the number of words in the text by looking up each token in the current dictionary. This version also returns the letters that are not used in any found words. public static int countWords(String text, StringBuffer unusedLetters) { // System.out.println("TEXT COUNT WORDS " + text + "\nUNUSED " + unusedLetters); String alphabet = "abcdefghijklmnopqrstuvwxyz"; StringBuffer sb = new StringBuffer(alphabet); StringTokenizer st = new StringTokenizer(text); int count = 0; while (st.hasMoreTokens()) { String s = st.nextToken(); if (dict.contains(s)) { // System.out.print(s + " "); // count += s.length(); count++; for (int k = 0; k < s.length(); k++) { int index = alphabet.indexOf(s.charAt(k)); if (index != -1) sb.setCharAt(index, '*'); } } } for (int k = 0; k < sb.length(); k++) if (sb.charAt(k) != '*') unusedLetters.append(sb.charAt(k)); return count; }

removeWhiteSpace

public static java.lang.String removeWhiteSpace(java.lang.String s)
This method removes the whitespace from a String.

cleanString

public static java.lang.String cleanString(java.lang.String s)
This method removes punctuation and returns a space delimited string.

removeNonAlphabetics

public static java.lang.String removeNonAlphabetics(java.lang.String s,
                                                    Alphabet a)
This method removes all characters not in the alphabet from the string.

makePattern

public static java.lang.String makePattern(java.lang.String s)
This method returns a pattern of the string. For example, if the word is "there" the pattern would be 12343. Words longer than 9 letters using UPPERCASE letters. For example, the word "appendectomy" would have the pattern "12234536789A".