/*
    RE.java - Regular Expression Class in Java Package RegularExpression
    Copyright (C) 2001, 2002  Brian Westphal

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

package RegularExpression;

import java.util.*;

/**
 * The RE class handles standard PERL-type regular expression operations.<BR>
 * <BR>
 * <PRE>RE pattern matching and escape characters list:
 * /d        ANY DIGIT
 * /D        ANY NON-DIGIT
 * /s        ANY TYPE OF WHITESPACE
 * /S        ANY TYPE OF NON-WHITESPACE
 * /w        ANY TYPE OF ALPHANUMERIC
 * /W        ANY TYPE OF NON-ALPHANUMERIC
 * .         ANY NON NEWLINE CHARACTER
 * 
 * #x[0-9A-F]+ HEX REPRESENTATION OF A CHARACTER
 * 
 * /?        WHERE ? IS ANY OTHER CHARACTER YIELDS THAT CHARACTER
 * 
 * *         0 OR MORE TIMES
 * +         1 OR MORE TIMES
 * ?         0 OR 1 TIMES
 * {M}       M TIMES
 * {M,}      AT LEAST M TIMES
 * {M,N}     AT LEAST M AND AT MOST N TIMES (N >= M)
 * 
 * [al-z]    CHARACTER LIST, INDIVIDUAL CHARACTERS OR CHARACTER RANGES</PRE>
 *
 * @author  Brian Westphal
 * @version 1.10
 * @since   JDK1.3.1
 */
public class RE
{
	/**
	 * A flag denoting case-sensitivity.
	 */
	private boolean casesensitive;

	/**
	 * The automaton used to process regular expression operation.
	 */
	private NFA automaton;

	/**
	 * The string used to build the regular expression automaton.
	 */
	private String re;

	/**
	 * Denotes the set of numeric characters.
	 */
	private static final char SET_DIGIT           = (char) 256;

	/**
	 * Denotes the set of non-numeric characters.
	 */
	private static final char SET_NONDIGIT        = (char) 257;

	/**
	 * Denotes the set of whitespace characters.
	 */
	private static final char SET_WHITESPACE      = (char) 258;

	/**
	 * Denotes the set of non-whitespace characters.
	 */
	private static final char SET_NONWHITESPACE   = (char) 259;

	/**
	 * Denotes the set of alpha-numeric characters.
	 */
	private static final char SET_ALPHANUMERIC    = (char) 260;

	/**
	 * Denotes the set of non-alpha-numeric characters.
	 */
	private static final char SET_NONALPHANUMERIC = (char) 261;

	/**
	 * Denotes the set of non-newline characters.
	 */
	private static final char SET_NONNEWLINE      = (char) 262;

	/**
	 * The number indicating the char value of the next dynamic character
	 * set.
	 */
	private char charsetbase = (char) 263;

	/**
	 * The list of character sets.  Used whenever a character set
	 * (i.e. [al-z]) is needed.
	 */
	private Vector charsets = new Vector ();

	/**
	 * Empty constructor.
	 */
	public RE ()
	{
	}

	/**
	 * Contructs a regular expression handler with the specified regular
	 * expression (as a string) and a boolean denoting case-sensitivity for
	 * operations.
	 *
	 * Example: <CODE>RE regexp = new RE ("ab*c", true);</CODE>
	 *
	 * @param re the regular expression string.
	 * @param casesensitive the flags used to specify case-sensitivity.
	 */
	public RE (String re, boolean casesensitive) throws Exception
	{
		this.re            = re;
		this.casesensitive = casesensitive;

		automaton = buildNFA (re);
		automaton.shrinkfit ();
	}

	/**
	 * Returns the regular expression automaton.
	 *
	 * @return the regular expression automaton.
	 */
	public NFA getAutomaton ()
	{
		return automaton;
	}

	/**
	 * Returns the next token and delimiter in a specified string from the
	 * specified offset.  The delimiter is the first portion of text that
	 * matches the regular expression.  The token is the text immediately
	 * preceeding the delimiter.
	 *
	 * @param input the string to be parsed.
	 * @param offset the offset from the beginning of the input string.
	 *
	 * @return an Object array with three elements (int, String, String):
	 *  the index of the character following the delimiter (the next
	 *  offset), the token string, and the delimiter string (null if none
	 *  exists).  Returns null if at the end of input string.
	 */
	public Object [] nextTokenAndDelim (String input, int offset)
	{
		//If at end of input string no next token/delim exists.
		if (offset > input.length ()) return null;

                int input_length = input.length ();
		for (int beginning = offset; beginning < input_length; beginning++)
		{
			for (int length = input_length - beginning; length > 0; length--)
			{
				//Searching for first/longest delim.
				String teststring = input.substring (beginning, beginning + length);
				Object [] value = matchesWithFailPoint (teststring);
				boolean success = ((Boolean) value[0]).booleanValue ();

				//If delim is found return token and delim.
				if (success)
				{
					Object [] output = new Object[3];
					output[0] = new Integer (beginning + length);
					output[1] = input.substring (offset, beginning);
					output[2] = teststring;

					return output;
				}
				//If no delim is found update string search possibilities.
				else
				{
					int failpoint = ((Integer) value[1]).intValue ();
					if (failpoint >= length) failpoint = -1;
					length = failpoint + 1;
				}
			}
		}

		//If no delim exists return token only.
		Object [] output = new Object[3];
		output[0] = new Integer (input.length () + 1);
		output[1] = input.substring (offset);
		output[2] = null;

		return output;
	}

	/**
	 * Returns a boolean value specifying whether the beginning of the
	 * specified string matches the regular expression or not.
	 *
	 * @param input the string to be tested.
	 *
	 * @return the value specifying whether the string matched or not.
	 */
	public boolean beginningMatches (String input)
	{
		//Calls secondary matches function, returns simplified value.
		Object [] value = beginningMatchesWithLength (input);
		return ((Boolean) value[0]).booleanValue ();
	}

	/**
	 * Returns a boolean value specifying whether the beginning of the
	 * specified string matches the regular expression or not.
	 *
	 * @param input the string to be tested.
	 *
	 * @return an Object array with two elements (boolean, int): the value
	 *  specifying whether the beginning matched or not, the length of the
	 *  matched substring if one was found.
	 */
	public Object [] beginningMatchesWithLength (String input)
	{
		for (int length = input.length (); length >= 0; length--)
		{
			//Searching for longest match from beginning.
			String teststring = input.substring (0, length);
			Object [] value = matchesWithFailPoint (teststring);
			boolean success = ((Boolean) value[0]).booleanValue ();

			//If a match is found return true and length.
			if (success)
			{
				Object [] output = new Object[2];
				output[0] = new Boolean (true);
				output[1] = new Integer (length);

				return output;
			}
			//If a match if not found update string search possibilities.
			else
			{
                                int failpoint = ((Integer) value[1]).intValue ();
				if (failpoint >= length) length = 0;
				else length = failpoint + 1;
			}
		}
		
		//If no match exists return false and null.
		Object [] output = new Object[2];
		output[0] = new Boolean (false);
		output[1] = null;

		return output;
	}

	/**
	 * Returns a boolean value specifying whether the specified string
	 * matches the regular expression or not.  This is the primary matches
	 * function, used by end-user programmers.
	 *
	 * @param input the string to be tested.
	 *
	 * @return the value specifying whether the string matched or not.
	 */
	public boolean matches (String input)
	{
		//Calls secondary matches function, returns simplified value.
		Object [] value = matchesWithFailPoint (input);
		return ((Boolean) value[0]).booleanValue ();
	}

	/**
	 * Returns a boolean value specifying whether the specified string
	 * matches the regular expression or not and if the string does not
	 * match it returns the position at which the string first failed.  This
	 * is the secondary matches function that is used by developers looking
	 * to expand the RegularExpression package.
	 *
	 * @param input the string to be tested.
	 *
	 * @return an Object array with two elements (boolean, int): the value
	 *  specifying whether the string matched or not, the first point of
	 *  failure (if applicable).
	 */
	public Object [] matchesWithFailPoint (String input)
	{
		Object [] output = new Object[2];
		int size;

		//Resetting automaton.
		automaton.reset ();

		//Feeding epsilon transitions.
                automaton.input (automaton.epsilon);

                int charsets_size = charsets.size ();

                char [] chararray = new char[charsets_size + 5];
                int charsUsed = 0;

                //Detects special characters.
                boolean specialInAlphabetEnabled = false;
                for (int index = 0; index < automaton.specialInAlphabet.length && !specialInAlphabetEnabled; index++)
                {
                        if (automaton.specialInAlphabet[index]) specialInAlphabetEnabled = true;
                }

		//Feeding input string characters one at a time.
                int input_length = input.length ();
		for (int index = 0; index < input_length; index++)
		{
                        charsUsed = 0;
                
			//Getting input string character.
			char character;
			if (casesensitive) character = input.charAt (index);
			else character = Character.toLowerCase (input.charAt (index));

                        chararray[charsUsed++] = character;

                        //Optimized to detect if an automaton contains any special characters (256-262).
                        if (specialInAlphabetEnabled)
                        {
                                if (automaton.specialInAlphabet[SET_ALPHANUMERIC - 256] && Character.isLetterOrDigit (character)) chararray[charsUsed++] = SET_ALPHANUMERIC;
                                else if (automaton.specialInAlphabet[SET_NONALPHANUMERIC - 256]) chararray[charsUsed++] = SET_NONALPHANUMERIC;
                                
                                if (automaton.specialInAlphabet[SET_DIGIT - 256] && Character.isDigit (character)) chararray[charsUsed++] = SET_DIGIT;
                                else if (automaton.specialInAlphabet[SET_NONDIGIT - 256]) chararray[charsUsed++] = SET_NONDIGIT;
        
                                if (automaton.specialInAlphabet[SET_WHITESPACE - 256] && Character.isWhitespace (character)) chararray[charsUsed++] = SET_WHITESPACE;
                                else if (automaton.specialInAlphabet[SET_NONWHITESPACE - 256]) chararray[charsUsed++] = SET_NONWHITESPACE;
        
                                if (automaton.specialInAlphabet[SET_NONNEWLINE - 256] && character != '\n') chararray[charsUsed++] = SET_NONNEWLINE;
                        }

			//Creating list of dynamic character sets that the input string character belongs to.
			for (int csindex = 0; csindex < charsets_size; csindex++)
			{
				CharacterSet charset = (CharacterSet) charsets.get (csindex);
				if (!casesensitive) { if (charset.inSet (Character.toLowerCase (character))) chararray[charsUsed++] = (char) (charsetbase + csindex); }
				else { if (charset.inSet (character)) chararray[charsUsed++] = (char) (charsetbase + csindex); }
			}

			//Feeding all character possibilities simultaneously.
			//Return false and failure position if no character is accepted.
			if (!feedpattern (automaton, chararray, charsUsed))
			{
				output[0] = new Boolean (false);
				output[1] = new Integer (index);
				return output;
			}

                        //Feeding epsilon transitions.
                        automaton.input (automaton.epsilon);
		}

		//Returns the acceptingness and current position (after all input).
		output[0] = new Boolean (automaton.accepting ());
		output[1] = new Integer (input.length ());
		return output;
	}

        /**
         * Returns the index of the first instance of the regular expression
         * after the offset.
         *
         * @param input the input string.
         * @param offset the offset of the input string.
         *
         * @return the index result or -1 if no index is found.
         */
        public int indexOf (String input, int offset)
        {
                Object [] value = nextTokenAndDelim (input, offset);
                if (value == null || value[2] == null) return -1;
                return ((Integer) value[0]).intValue () - ((String) value[2]).length ();
        }

	/**
	 * Replaces all matching substrings in a specified string with a
	 * specified replacement string.
	 *
	 * @param input the string to be manipulated.
	 * @param replacement the string to replace matching portions.
	 *
	 * @return the string after all replacements have been made.
	 */
	public String replace (String input, String replacement)
	{
		String output = "";
	
		//Splits input string into tokens.
		String [] value = split (input);

		//Inserts replacement values between tokens.
		for (int index = 0; index < value.length; index++)
		{
			output += value[index];
			if (index < value.length - 1) output += replacement;
		}

		return output;
	}

	/**
	 * Splits the specified string into an array of tokens.
	 *
	 * @param input the string to be parsed.
	 *
	 * @return an array of tokens.
	 */
	public String [] split (String input)
	{
		LinkedList outputlist = new LinkedList ();
	
		int offset = 0;
		Object [] value;
		//Adds all tokens from an input string into a linked list.
		while ((value = nextTokenAndDelim (input, offset)) != null)
		{
			offset = ((Integer) value[0]).intValue ();
			outputlist.add (value[1]);
		}

		//Converts linked list into String array.
		String [] output = new String[outputlist.size ()];
                int outputlist_size = outputlist.size ();
		for (int index = 0; index < outputlist_size; index++)
		{
			output[index] = (String) outputlist.get (index);
		}

		return output;
	}

	/**
	 * Returns a string formatted to look like a regular expression.
	 *
	 * @return a string formatted to look like a regular expression.
	 */
	public String toString ()
	{
		return "/" + re + "/";
	}

	/**
	 * Builds the automaton described by the specified regular expression.
	 * This is a recursive function.
	 *
	 * @param re the regular expression that describes the automaton.
	 *
	 * @return the automaton described by the regular expression.
	 */
	private NFA buildNFA (String re) throws Exception
	{
		NFA output = null;
		
		//Used to denote special case characters.
		boolean escape       = false;
		boolean hexcharacter = false;
	
		//Building NFA one character at a time.
                int re_length = re.length ();
		for (int index = 0; index < re_length; index++)
		{
			//Handling parentheses (groups).
			if (!hexcharacter && !escape && re.charAt (index) == '(')
			{
				String group = "";
				int level = 1;
				boolean success        = false;
				boolean internalescape = false;

				//Collecting group (using multilevel parentheses).
				for (int gindex = index + 1; gindex < re_length; gindex++)
				{
					if (!internalescape && re.charAt (gindex) == '/') internalescape = true;
					else
					{
						if      (!internalescape && re.charAt (gindex) == '(') level++;
						else if (!internalescape && re.charAt (gindex) == ')') level--;

						//If at original parenthesis level.
						if (level == 0)
						{
							success = true;

							//Breaks out of for loop (w/o storing closing parenthesis).
							break;
						}
						internalescape = false;
					}
					group += new Character (re.charAt (gindex)).toString ();
				}

				//If matching closing parenthesis found.
				if (success)
				{
					//Get NFA for group.
					NFA concatnfa = buildNFA (group);
					index += group.length () + 1;
		
					//Add any modifiers.
					Object [] values = addModifiers (concatnfa, index, re);
					index += ((Integer) values[0]).intValue ();
					concatnfa = (NFA) values[1];
		
					//Concatenate NFA to output.
					if (output == null) output = concatnfa;
					else output.concat (concatnfa);
				}
				else throw new Exception ("Malformed regular expression");
			}
			//Handling pipes (union).
			else if (!hexcharacter && !escape && re.charAt (index) == '|')
			{
				//Get NFA for everything else.
				//Union a|b|c can be treated as a|(b|c).
				NFA unionnfa = buildNFA (re.substring (index + 1));
				index = re.length ();

				//Unioning NFA with output.
				if (output == null) output = unionnfa;
				output.union (unionnfa);
			}
			//Handling brackets (character sets).
			else if (!hexcharacter && !escape && re.charAt (index) == '[')
			{
				//Creates dynamic character set.
				CharacterSet charset = new CharacterSet ();
				
				boolean innerescape       = false;
				boolean innerhexcharacter = false;

				//Adding characters and character lists to character set.
				for (int cindex = index + 1; cindex < re_length; cindex++)
				{
					//If carrot set inversion property.
					//[^ab] means any character but a or b.
					if (!innerhexcharacter && re.charAt (cindex) == '^' && cindex == index + 1) charset.inverse = true;
					else if (!innerhexcharacter && !innerescape && re.charAt (cindex) == '/') innerescape = true;
					//If closing bracket found.
					else if (!innerhexcharacter && !innerescape && re.charAt (cindex) == ']')
					{
						index = cindex;
						cindex = re.length ();
					}
					//If hex character signal found.
					//Format: #x???...? where ? is a hex digit (at least one hex digit).
					else if (!innerhexcharacter && !innerescape && re.charAt (cindex) == '#')
					{
						if (cindex < re.length () - 1 && re.charAt (cindex + 1) == 'x')
						{
							int lastfound = -1;

							//Collecting all following hex digits.
							for (int character = cindex + 2; character < re_length; character++)
							{
								char use = re.charAt (character);
								if (use >= '0' && use <= '9' || use >= 'A' && use <= 'F') lastfound = character;
								else break;
							}
							
							//Converting hex representation into character representation.
							String convert = re.substring (cindex + 2, lastfound + 1);
							int value = HEX2int (convert);
							
							//Replacing value in string.
							re = re.substring (0, cindex) + (char) value + re.substring (lastfound + 1);
                                                        re_length = re.length ();
							cindex--;
						} else throw new Exception ("Invalid representation type after #");

						innerhexcharacter = true;
					}
					//If normal character or special character.
					else
					{
						//If character range.
						if (!innerescape && cindex < re.length () - 2 && re.charAt (cindex + 1) == '-')
						{
							//If range ends with hex character.
							if (re.charAt (cindex + 2) == '#')
							{
								if (cindex < re.length () - 3 && re.charAt (cindex + 3) == 'x')
								{
									int lastfound = -1;
									
									//Collecting all following hex digits.
									for (int character = cindex + 4; character < re_length; character++)
									{
										char use = re.charAt (character);
										if (use >= '0' && use <= '9' || use >= 'A' && use <= 'F') lastfound = character;
										else break;
									}
									
									//Converting hex representation into character representation.
									String convert = re.substring (cindex + 4, lastfound + 1);
									int value = HEX2int (convert);
									
									//Replacing value in string.
									re = re.substring (0, cindex + 2) + (char) value + re.substring (lastfound + 1);
                                                                        re_length = re.length ();
								} else throw new Exception ("Invalid representation type after #");
							}
					
							//Adding range to dynamic character set.
							if (!casesensitive) charset.addRange (Character.toLowerCase (re.charAt (cindex)), Character.toLowerCase (re.charAt (cindex + 2)));
							else charset.addRange (re.charAt (cindex), re.charAt (cindex + 2));
							cindex += 2;
						}
						//If single character.
						else
						{
							//Adding character to dynamic character set.
							if (!casesensitive) charset.addCharacter (Character.toLowerCase (re.charAt (cindex)));
							else charset.addCharacter (re.charAt (cindex));
						}

						innerescape       = false;
						innerhexcharacter = false;
					}
				}

				//Adding any modifiers.
				Object [] values = addModifiers ((char) (charsetbase + charsets.size ()), index, re);
                                int charactersUsed = ((Integer) values[0]).intValue ();
				index += charactersUsed;
				NFA concatnfa = (NFA) values[1];

				//Concatenating NFA to output.
				if (output == null)
                                {
                                        if (charactersUsed > 0)
                                        {
                                                output = concatnfa;
                                        }
                                        else
                                        {
                                                output = new NFA ();
                                                output.concat ((char) (charsetbase + charsets.size ()));
                                        }
                                }
				else if (charactersUsed > 0) output.concat (concatnfa);
                                else output.concat ((char) (charsetbase + charsets.size ()));

				//Adding character set to dynamic character set list.
				charsets.add (charset);					
			}
			//Handling scores (hex characters).
			else if (!hexcharacter && !escape && re.charAt (index) == '#')
			{
				if (index < re.length () - 1 && re.charAt (index + 1) == 'x')
				{
					int lastfound = -1;
					
					//Collecting all following hex digits.
					for (int character = index + 2; character < re_length; character++)
					{
						char use = re.charAt (character);
						if (use >= '0' && use <= '9' || use >= 'A' && use <= 'F') lastfound = character;
						else break;
					}
					
					//Converting hex representation into character representation.
					String convert = re.substring (index + 2, lastfound + 1);
					int value = HEX2int (convert);
					
					//Replacing value in string.
					re = re.substring (0, index) + (char) value + re.substring (lastfound + 1);
                                        re_length = re.length ();

					index--;
				} else throw new Exception ("Invalid representation type after #");

				hexcharacter = true;
			}
			//Handling slashes (escape characters).
			else if (!hexcharacter && !escape && re.charAt (index) == '/')
			{
				escape = true;
			}
			//Handling normal characters and special characters.
			else
			{
				char symbol;

				//If normal character (or period).
				if (!escape || hexcharacter)
				{
					//If period character (matches any non-newline character)
					if (!hexcharacter && re.charAt (index) == '.') symbol = SET_NONNEWLINE;
					else
					{
						//Creating alphabet with single character
						if (casesensitive) symbol = re.charAt (index);
						else symbol = Character.toLowerCase (re.charAt (index));
					}
				}
				//If escape character.
				else
				{
					if      (re.charAt (index) == 'd') symbol = SET_DIGIT;
					else if (re.charAt (index) == 'D') symbol = SET_NONDIGIT;
					else if (re.charAt (index) == 's') symbol = SET_WHITESPACE;
					else if (re.charAt (index) == 'S') symbol = SET_NONWHITESPACE;
					else if (re.charAt (index) == 'w') symbol = SET_ALPHANUMERIC;
					else if (re.charAt (index) == 'W') symbol = SET_NONALPHANUMERIC;
					//If normal escape character (no special meaning or canceling meaning)
					else
					{
						//Creating alphabet with single character
						if (casesensitive) symbol = re.charAt (index);
						else symbol = Character.toLowerCase (re.charAt (index));
					}
				}

				//Adding any modifiers.
				Object [] values = addModifiers (symbol, index, re);
                                int charactersUsed = ((Integer) values[0]).intValue ();
				index += charactersUsed;
				NFA concatnfa = (NFA) values[1];

				//Concatenating NFA to output.
				if (output == null)
                                {
                                        if (charactersUsed > 0)
                                        {
                                                output = concatnfa;
                                        }
                                        else
                                        {
                                                output = new NFA ();
                                                output.concat (symbol);
                                        }
                                }
				else if (charactersUsed > 0) output.concat (concatnfa);
                                else output.concat (symbol);
				
				escape = false;
			}
		}

		return output;
	}

	/**
	 * Returns a boolean specifying whether the specified character is a
	 * modifier or not.  Modifiers include: '*', '+', '?', and '{'.
	 *
	 * @param character the character to be tested.
	 *
	 * @return the value specifying whether the specified character is a
	 *  modifier or not.
	 */
	private boolean isModifier (char character)
	{
		if (character == '*' || character == '+' || character == '?' || character == '{') return true;
		else return false;
	}

	/**
	 * Returns the number of modifiers following a definition and the
	 * automaton after applying the modifiers.
	 *
         * @param character the character to add modifiers to.
	 * @param offset the index in the regular expression string to start looking for modifiers.
	 * @param re the regular expression string.
	 *
	 * @return an Object array with two elements (int, NFA): the number of
	 *  modifier characters used, and the automaton after applying the
	 *  modifiers.
	 */
	private Object [] addModifiers (char character, int offset, String re)
	{
                NFA automaton = new NFA ();

		Object [] output = new Object[2];
	
		int modifiers = 0;

		//Searching for modifiers (must immediately follow pattern).
		//a+ not a +.
                int re_length = re.length ();
                int modindex = offset + 1;

                if (modindex >= re.length ())
                {
                        output[0] = new Integer (modifiers);
                        output[1] = automaton;
                        return output;
                }

                char modifier;

                //If modifier found.
                if (isModifier (modifier = re.charAt (modindex)))
                {
                        modifiers++;
                        if      (modifier == '*') automaton = NFA.star (character);
                        else if (modifier == '+') automaton = NFA.plus (character);
                        else if (modifier == '?') automaton = NFA.maybe (character);
                        //If repetition pattern.
                        //{m} exactly m times.
                        //{m,} at least m times.
                        //{m,n} between (inclusive) m and n times.
                        else if (modifier == '{')
                        {
                                int commaindex = -1;
                                int endindex   = -1;

                                //Searching for comma and/or closing brace.
                                for (int charindex = modindex + 1; charindex < re_length && endindex == -1; charindex++)
                                {
                                        if      (re.charAt (charindex) == ',') commaindex = charindex;
                                        else if (re.charAt (charindex) == '}') endindex   = charindex;
                                }

                                //If contains comma (at least or between matching).
                                if (commaindex != -1)
                                {
                                        //If comma next to close brace get low value (at least).
                                        if (endindex == commaindex + 1)
                                        {
                                                int value = new Integer (re.substring (modindex + 1, commaindex)).intValue ();
                                                automaton = NFA.repeat (character, value, -1);
                                        }
                                        //Else get low and high values (between).
                                        else
                                        {
                                                int lowvalue = new Integer (re.substring (modindex + 1, commaindex)).intValue ();
                                                int hivalue  = new Integer (re.substring (commaindex + 1, endindex)).intValue ();
                                                automaton = NFA.repeat (character, lowvalue, hivalue);
                                        }
                                }
                                //Else get value (exactly matching).
                                else
                                {
                                        int value = new Integer (re.substring (modindex + 1, endindex)).intValue ();
                                        automaton = NFA.repeat (character, value, value);
                                }

                                modifiers += endindex - modindex;
                                modindex  += endindex - modindex;
                        }
                }
                //If not modifier return number of modifier characters used and automaton.
                else
                {
                        output[0] = new Integer (modifiers);
                        output[1] = automaton;
                        return output;
                }

		//Return number of modifier characters used and automaton (if at end of pattern).
		output[0] = new Integer (modifiers);
		output[1] = automaton;
		return output;
	}

	/**
	 * Returns the number of modifiers following a definition and the
	 * automaton after applying the modifiers.
	 *
	 * @param automaton the automaton to add modifiers to.
	 * @param offset the index in the regular expression string to start looking for modifiers.
	 * @param re the regular expression string.
	 *
	 * @return an Object array with two elements (int, NFA): the number of
	 *  modifier characters used, and the automaton after applying the
	 *  modifiers.
	 */
	private Object [] addModifiers (NFA automaton, int offset, String re)
	{
		Object [] output = new Object[2];
	
		int modifiers = 0;

		//Searching for modifiers (must immediately follow pattern).
		//a+ not a +.
                int re_length = re.length ();
                int modindex = offset + 1;

                if (modindex >= re.length ())
                {
                        output[0] = new Integer (modifiers);
                        output[1] = automaton;
                        return output;
                }

                char modifier;

                //If modifier found.
                if (isModifier (modifier = re.charAt (modindex)))
                {
                        modifiers++;
                        if      (modifier == '*') automaton.star ();
                        else if (modifier == '+') automaton.plus ();
                        else if (modifier == '?') automaton.maybe ();
                        //If repetition pattern.
                        //{m} exactly m times.
                        //{m,} at least m times.
                        //{m,n} between (inclusive) m and n times.
                        else if (modifier == '{')
                        {
                                int commaindex = -1;
                                int endindex   = -1;

                                //Searching for comma and/or closing brace.
                                for (int charindex = modindex + 1; charindex < re_length && endindex == -1; charindex++)
                                {
                                        if      (re.charAt (charindex) == ',') commaindex = charindex;
                                        else if (re.charAt (charindex) == '}') endindex   = charindex;
                                }

                                //If contains comma (at least or between matching).
                                if (commaindex != -1)
                                {
                                        //If comma next to close brace get low value (at least).
                                        if (endindex == commaindex + 1)
                                        {
                                                int value = new Integer (re.substring (modindex + 1, commaindex)).intValue ();
                                                automaton.repeat (value, -1);
                                        }
                                        //Else get low and high values (between).
                                        else
                                        {
                                                int lowvalue = new Integer (re.substring (modindex + 1, commaindex)).intValue ();
                                                int hivalue  = new Integer (re.substring (commaindex + 1, endindex)).intValue ();
                                                automaton.repeat (lowvalue, hivalue);
                                        }
                                }
                                //Else get value (exactly matching).
                                else
                                {
                                        int value = new Integer (re.substring (modindex + 1, endindex)).intValue ();
                                        automaton.repeat (value, value);
                                }

                                modifiers += endindex - modindex;
                                modindex  += endindex - modindex;
                        }
                }
                //If not modifier return number of modifier characters used and automaton.
                else
                {
                        output[0] = new Integer (modifiers);
                        output[1] = automaton;
                        return output;
                }

		//Return number of modifier characters used and automaton (if at end of pattern).
		output[0] = new Integer (modifiers);
		output[1] = automaton;
		return output;
	}

        /**
         * Simulates the simultaneous feeding of multiple characters into an
         * automaton.
         *
         * @param automaton the automaton to be fed.
         * @param patternchar the list of characters to feed simultaneously.
         *
         * @return a boolean specifying whether or not any of the characters were accepted.
         */
	private boolean feedpattern (NFA automaton, char [] patternchar, int charsUsed)
	{
		boolean found = false;

		//Storing current states of machine.
		Vector currentstate = (Vector) automaton.getCurrentStates ().clone ();

		Vector savestate = new Vector ();

                boolean [] inSaveState = new boolean[automaton.Q];

		//Feeding each possible character from array.
		for (int index = 0; index < charsUsed; index++)
		{
                        //If input is accepted.
                        if (automaton.input (patternchar[index]))
                        {
                                found = true;
                                
                                Vector newCurrentStates = automaton.getCurrentStates ();
                                savestate.addAll (newCurrentStates);
                                int newCurrentStates_size = newCurrentStates.size ();
                                for (int subindex = 0; subindex < newCurrentStates_size; subindex++)
                                {
                                        NFANode node = (NFANode) newCurrentStates.get (subindex);
                                        if (!inSaveState[node.nodeNumber])
                                        {
                                                savestate.add (node);
                                                inSaveState[node.nodeNumber] = true;
                                        }
                                }
                        }

			//Resetting machine to initial states.
                        automaton.setCurrentStates (currentstate);
		}

		automaton.setCurrentStates (savestate);
		return found;
	}

        /**
         * Converts from hexidecimal to int.
         *
         * @param hexvalue the value of a number in hexidecimal notation.
         *
         * @return the int value from the conversion.
         */
	private int HEX2int (String hexvalue)
	{
		int power = 1;
		int value = 0;

		//Adding value one character at a time.
		//0-9 =  0- 9.
		//A-F = 10-15.
                int hexvalue_length = hexvalue.length ();
		for (int index = 0; index < hexvalue_length; index++)
		{
			char use = hexvalue.charAt (hexvalue.length () - 1 - index);
			if (use >= '0' && use <= '9') value += power * ((int) use - (int) '0');
			else if (use >= 'A' && use <= 'F') value += power * (10 + (int) use - (int) 'A');
			power *= 16;
		}

		return value;
	}
}

/**
 * The NFA class is an implementation for handling non-deterministic finite
 * state automata.  This class simulates determinism in real-time rather than
 * converting to a DFA.
 *
 * @author  Brian Westphal
 * @version 1.10
 * @since   JDK1.3.1
 */
class NFA implements Cloneable
{
	/**
	 * The number of states in the NFA.
	 */
	public int Q;

	/**
	 * Epsilon constant.
	 */
	protected static final char epsilon = (char) 0;

        /**
         * Flags denoting whether special characters (256 to 262) are in the alphabet.
         */
	protected boolean [] specialInAlphabet; //256 to 262

	/**
	 * The collection of nodes.
	 */
	private Vector nodes;

	/**
	 * The collection of current nodes.
	 */
	private Vector currentNodes;

	/**
	 * The collection of final state nodes.
	 */
	private Vector finalStateNodes;

        /**
         * Flag denoting whether node numbers have been assigned (used in reset).
         */
        private boolean nodeNumbersAssigned = false;

	/**
	 * Constructs a new non-deterministic finite state automaton.
	 */
	public NFA ()
	{
		Q = 1;

		NFANode newState = new NFANode (true);
		
		nodes = new Vector ();
		nodes.add (newState);
		
		finalStateNodes = new Vector ();
		finalStateNodes.add (newState);

		specialInAlphabet = new boolean[7];
	}

        /**
         * Assigns node numbers (used for optimization) to each node.
         */
	private void assignNodeNumbers ()
	{
		//Assigning node numbers.
		for (int index = 0; index < nodes.size (); index++)
		{
			NFANode node = (NFANode) nodes.get (index);
			node.nodeNumber = index;
		}
	}

	/**
	 * Performs a deep copy of the automaton.
	 *
	 * @return a deep copy of the automaton.
	 */
	public Object clone ()
	{
		assignNodeNumbers ();
	
		//Copying nodes.
		Vector newNodes = new Vector ();
		Vector newFinalStateNodes = new Vector ();
		for (int index = 0; index < nodes.size (); index++)
		{
			NFANode node = (NFANode) nodes.get (index);
			NFANode newNode = new NFANode (node.finalState);
			newNodes.add (newNode);
			if (node.finalState) newFinalStateNodes.add (newNode);
		}

		//Copying transitions.
		for (int index = 0; index < nodes.size (); index++)
		{
			NFANode node = (NFANode) nodes.get (index);
			NFANode newNode = (NFANode) newNodes.get (index);
			int node_branches_size = node.branches.size ();
			for (int subindex = 0; subindex < node_branches_size; subindex++)
			{
				NFATransition transition = (NFATransition) node.branches.get (subindex);
				newNode.branches.add (new NFATransition (transition.inChar, (NFANode) newNodes.get (transition.outNode.nodeNumber)));
			}
		}

                //Creating new NFA.
		NFA output = new NFA ();
		output.Q = Q;
		output.nodes = newNodes;
		output.finalStateNodes = newFinalStateNodes;
		System.arraycopy (specialInAlphabet, 0, output.specialInAlphabet, 0, specialInAlphabet.length);

		return output;
	}

	/**
	 * Concatenates a single character.
         *
         * @param character the character to be concated.
	 *
	 * @param character the character to be concatenated.
	 */
	public void concat (char character)
	{
                //If special character, record in flag array.
		if (character >= (char) 256 && character <= (char) 262)
		{
			specialInAlphabet[(int) character - 256] = true;
		}
	
                //Creating new final state.
		NFANode newFinalState = new NFANode (true);
	
                //Creating transitions from previous final states to new final state.
		int finalStateNodes_size = finalStateNodes.size ();
		for (int index = 0; index < finalStateNodes_size; index++)
		{
			NFANode node = (NFANode) finalStateNodes.get (index);
			node.branches.add (new NFATransition (character, newFinalState));
			node.finalState = false;
		}

		Q++;
		nodes.add (newFinalState);
		finalStateNodes.clear ();
		finalStateNodes.add (newFinalState);
	}

	/**
	 * Concatenates an automaton.
	 *
	 * @param RHS the automaton to be concatenated.
	 */
	public void concat (NFA RHS)
	{
		NFANode newInitialState = (NFANode) RHS.nodes.get (0);
	
                //Creating transitions from final states to initial state of RHS.
		int finalStateNodes_size = finalStateNodes.size ();
		for (int index = 0; index < finalStateNodes_size; index++)
		{
			NFANode node = (NFANode) finalStateNodes.get (index);
			node.branches.add (new NFATransition (epsilon, newInitialState));
			node.finalState = false;
		}

		Q += RHS.Q;
		nodes.addAll (RHS.nodes);
		finalStateNodes = RHS.finalStateNodes;

                //Checking for special characters in RHS alphabet.
		for (int index = 0; index < specialInAlphabet.length; index++)
		{
			if (RHS.specialInAlphabet[index]) specialInAlphabet[index] = true;
		}
	}

	/**
	 * Stars a character.
         *
         * @param character the character to be stared.
         *
         * @return an NFA that is the star of the character.
	 */
	public static NFA star (char character)
	{
                NFA output = new NFA ();
        
                //If special character, record in flag array.
		if (character >= (char) 256 && character <= (char) 262)
		{
			output.specialInAlphabet[(int) character - 256] = true;
		}
	
		NFANode node = (NFANode) output.nodes.get (0);
        
                node.branches.add (new NFATransition (character, node));

                return output;
        }

	/**
	 * Stars an automaton.
	 */
	public void star ()
	{
                //Adding branches from final states to initial state.
		int finalStateNodes_size = finalStateNodes.size ();
		for (int index = 0; index < finalStateNodes_size; index++)
		{
			NFANode node = (NFANode) finalStateNodes.get (index);
			node.branches.add (new NFATransition (epsilon, (NFANode) nodes.get (0)));
			node.finalState = false;
		}
		
		finalStateNodes.clear ();
		((NFANode) nodes.get (0)).finalState = true;
		finalStateNodes.add (nodes.get (0));
	}

	/**
	 * Pluses a character.
         *
         * @param character the character to be plused.
         *
         * @return an NFA that is the plus of the character.
	 */
	public static NFA plus (char character)
	{
                NFA output = new NFA ();
        
                output.concat (character);

		NFANode node = (NFANode) output.finalStateNodes.get (0);

                node.branches.add (new NFATransition (character, node));

                return output;
        }

	/**
	 * Pluses an automaton.
	 */
	public void plus ()
	{
		//Duplicating NFA.
		NFA clonedNFA = (NFA) clone ();

		//Building NFA using concat and star.
		clonedNFA.star ();
		concat (clonedNFA);
	}

	/**
	 * Maybies a character.
         *
         * @param character the character to be maybied.
         *
         * @return an NFA that is the maybe of the character.
	 */
	public static NFA maybe (char character)
	{
                NFA output = new NFA ();
        
                output.concat (character);

                //Creating new final state.
		NFANode headNode = (NFANode) output.nodes.get (0);
                NFANode tailNode = (NFANode) output.finalStateNodes.get (0);
		NFANode newFinalState = new NFANode (true);

                headNode.branches.add (new NFATransition (epsilon, newFinalState));
                tailNode.branches.add (new NFATransition (epsilon, newFinalState));
                output.Q++;
                output.nodes.add (newFinalState);
                output.finalStateNodes.clear ();
		output.finalStateNodes.add (newFinalState);

                return output;
        }

	/**
	 * Maybies an automaton.
	 */
	public void maybe ()
	{
		concat (epsilon);
		NFANode node = (NFANode) nodes.get (0);
		node.branches.add (new NFATransition (epsilon, (NFANode) nodes.get (nodes.size () - 1)));
	}

	/**
	 * Repeats a character.<BR>
	 * <PRE>a{m}        //REPEAT EXACTLY M TIMES (N IS IMPLIED = M)
	 * a{m,}            //REPEAT AT LEAST M TIMES (N IS IMPLIED = -1)
	 * a{m,n}           //REPEAT BETWEEN (INCLUSIVE) M AND N TIMES</PRE>
	 *
         * @param character the character to repeat.
	 * @param lower the value of m, the lower bound.
	 * @param upper the value of n, the upper bound.
	 *
	 * @return the result of the repeating.
	 */
	public static NFA repeat (char character, int lower, int upper)
	{
                NFA output = new NFA ();
                output.concat (character);

		//Adding lower limit.
		for (int index = 1; index < lower; index++)
		{
			output.concat (character);
		}
		
		//Adding upper limit.
		if (upper != -1)
		{
                        if (lower == 0)
                        {
                                output = new NFA ();
                        
                                int storeQ = 1;
                                for (int index = 0; index < upper; index++)
                                {
                                        output.concat (character);
                                }
        
                                NFANode finalNode = (NFANode) output.finalStateNodes.get (0);
                                for (int index = storeQ - 1; index < output.Q; index++)
                                {
                                        NFANode node = (NFANode) output.nodes.get (index);
                                        node.branches.add (new NFATransition (epsilon, finalNode));
                                }
                        }
                        else
                        {
                                int storeQ = output.Q;
                                for (int index = lower; index < upper; index++)
                                {
                                        output.concat (character);
                                }
        
                                NFANode finalNode = (NFANode) output.finalStateNodes.get (0);
                                for (int index = storeQ - 1; index < output.Q; index++)
                                {
                                        NFANode node = (NFANode) output.nodes.get (index);
                                        node.branches.add (new NFATransition (epsilon, finalNode));
                                }
                        }
		}
		//Adding at least.
		else
		{
                        if (lower == 0)
                        {
                                output = star (character);
                        }
                        else
                        {
                                NFANode finalNode = (NFANode) output.finalStateNodes.get (0);
                                finalNode.branches.add (new NFATransition (character, finalNode));
                        }
		}
                
                return output;
	}

	/**
	 * Repeats an automaton.<BR>
	 * <PRE>a{m}        //REPEAT EXACTLY M TIMES (N IS IMPLIED = M)
	 * a{m,}            //REPEAT AT LEAST M TIMES (N IS IMPLIED = -1)
	 * a{m,n}           //REPEAT BETWEEN (INCLUSIVE) M AND N TIMES</PRE>
	 *
	 * @param lower the value of m, the lower bound.
	 * @param upper the value of n, the upper bound.
	 *
	 * @return the result of the repeating.
	 */
	public void repeat (int lower, int upper)
	{
		//Duplicating NFA.
		NFA clonedNFA = (NFA) clone ();

		//Adding lower limit.
		for (int index = 1; index < lower; index++)
		{
			concat ((NFA) clonedNFA.clone ());
		}
		
		//Adding upper limit.
		if (upper != -1)
		{
                        if (lower == 0)
                        {
                                maybe ();
                                for (int index = 1; index < upper; index++)
                                {
                                        NFA tempNFA = (NFA) clonedNFA.clone ();
                                        tempNFA.maybe ();
                                        concat (tempNFA);
                                }
                        }
                        else
                        {
                                for (int index = lower; index < upper; index++)
                                {
                                        NFA tempNFA = (NFA) clonedNFA.clone ();
                                        tempNFA.maybe ();
                                        concat (tempNFA);
                                }
                        }
		}
		//Adding at least.
		else
		{
                        if (lower == 0)
                        {
                                star ();
                        }
                        else
                        {
                                clonedNFA.star ();
                                concat (clonedNFA);
                        }
		}
	}

	/**
	 * Unions an automaton.
	 *
	 * @param RHS the automaton to be unioned with.
	 */
	public void union (NFA RHS)
	{
		NFANode newInitialState = new NFANode (false);

		newInitialState.branches.add (new NFATransition (epsilon, (NFANode) nodes.get (0)));
		newInitialState.branches.add (new NFATransition (epsilon, (NFANode) RHS.nodes.get (0)));

		Q += 1 + RHS.Q;
		nodes.add (0, newInitialState);
		nodes.addAll (RHS.nodes);
		finalStateNodes.addAll (RHS.finalStateNodes);

		for (int index = 0; index < specialInAlphabet.length; index++)
		{
			if (RHS.specialInAlphabet[index]) specialInAlphabet[index] = true;
		}
	}

	/**
	 * Function not currently implemented.  Intended to allow further
	 * optimization of NFA by removing unnecessary complication.
	 */
	public void shrinkfit ()
	{
	}

	/**
	 * Resets the automaton by setting its set of current states to include
	 * only 0.
	 */
	public void reset ()
	{
		if (!nodeNumbersAssigned)
		{
			assignNodeNumbers ();
			nodeNumbersAssigned = true;
		}
	
		currentNodes = new Vector (Q);
		currentNodes.add (nodes.get (0));
	}

	/**
	 * Returns the currentstate variable.
	 *
	 * @return the currentstate variable.
	 */
	public Vector getCurrentStates ()
	{
		return currentNodes;
	}

	/**
	 * Sets the currentstate variable.
	 *
	 * @param currentstate the currentstate variable.
	 */
	public void setCurrentStates (Vector currentNodes)
	{
		this.currentNodes = currentNodes;
	}

	/**
	 * Takes a character as input into the automaton.
	 *
	 * @param inputchar the character to input into the automaton.
	 *
	 * @return a boolean value specifying whether the character was accepted or not.
	 */
	public boolean input (char inputchar)
	{
		Vector newNodes;

		boolean found = false;

                //If input is epsilon, copy current nodes into new nodes (epsilon cannot fail).
		if (inputchar == epsilon)
		{
			newNodes = (Vector) currentNodes.clone ();
			found = true;
		}
		else
		{
			newNodes = new Vector ();
		}

                //Loop while no more epsilons should be added (if applicable)
		int currentNodes_size;
		do
		{
			currentNodes_size = currentNodes.size ();
			for (int index = 0; index < currentNodes_size; index++)
			{
				NFANode node = (NFANode) currentNodes.get (index);
				int node_branches_size = node.branches.size ();
				for (int subindex = 0; subindex < node_branches_size; subindex++)
				{
					NFATransition transition = (NFATransition) node.branches.get (subindex);
					if (transition.inChar == inputchar)
					{
						if (!newNodes.contains (transition.outNode))
						{
							newNodes.add (transition.outNode);
							found = true;
						}
					}
				}
			}
	
			currentNodes = (Vector) newNodes.clone ();
		} while (inputchar == epsilon && currentNodes_size != currentNodes.size ());
		return found;
	}

	/**
	 * Returns a boolean value specifying whether the automaton is in an
	 * accepting state or not.
	 *
	 * @return a value specifying whether the automaton is in an accepting
	 *  state or not.
	 */
	public boolean accepting ()
	{
		int currentNodes_size = currentNodes.size ();
		for (int index = 0; index < currentNodes_size; index++)
		{
			NFANode node = (NFANode) currentNodes.get (index);
			if (node.finalState) return true;
		}
		return false;
	}
}

/**
 * A single node for an NFA.
 */
class NFANode
{
        /**
         * A flag denoting whether the state is final or not.
         */
	public boolean finalState;
	
        /**
         * A vector of transitions.
         */
	public Vector branches;
	
        /**
         * The number in the nodes vector (used for optimiation).
         */
	public int nodeNumber;
	
        /**
         * Constructs a new NFANode.
         *
         * @param finalState the flag denoting whether the state is final or not.
         */
	public NFANode (boolean finalState)
	{
		this.finalState = finalState;
		branches = new Vector ();
	}
}

/**
 * A single NFA transition.
 */
class NFATransition
{
        /**
         * The character needed to make the transition.
         */
	public char inChar;

        /**
         * A reference to the node to be transitioned to.
         */
	public NFANode outNode;

        /**
         * Constructs a new NFATransition.
         *
         * @param inChar the character needed to make the transition.
         * @param outNode the reference to the node to be transitioned to.
         */
	NFATransition (char inChar, NFANode outNode)
	{
		this.inChar = inChar;
		this.outNode = outNode;
	}
}

/**
 * A set of characters which may include character ranges.
 */
class CharacterSet
{
	/**
	 * Flag used to denote set inversion.
	 */
	public boolean inverse = false;

	/**
	 * List of characters and character ranges.
	 */
	private Vector set = new Vector ();

	/**
	 * Adds a single character to the set.
	 *
	 * @param character the character to be added.
	 */
	public void addCharacter (char character)
	{
		set.add (new Character (character));
	}

	/**
	 * Adds a range of characters to the set.
	 *
	 * @param lowchar the lower bound character in the range.
	 * @param hichar the upper bound character in the range.
	 */
	public void addRange (char lowchar, char hichar)
	{
		set.add (new CharacterRange (lowchar, hichar));
	}

	/**
	 * Returns a boolean value specifying whether a specified character is
	 * in the set or not.
	 *
	 * @param character the character to be tested.
	 *
	 * @return a value specifying whether a specified character is in the set or not.
	 */
	public boolean inSet (char character)
	{
		int set_size = set.size ();
		for (int index = 0; index < set_size; index++)
		{
			Object item = set.get (index);
			if (item instanceof Character && character == ((Character) item).charValue ()) return returnValue (true);
			else if (item instanceof CharacterRange && character >= ((CharacterRange) item).lowchar && character <= ((CharacterRange) item).hichar) return returnValue (true);
		}
		return returnValue (false);
	}

	/**
	 * Returns a value relative to the inversion flag.
	 *
	 * @param value the value to return.
	 *
	 * @return if inverse is false, returns value, otherwise returns the
	 *  opposite of value.
	 */
	private boolean returnValue (boolean value)
	{
		if (inverse) return !value;
		else return value;
	}
}

/**
 * The CharacterRange class represents a range of characters.
 */
class CharacterRange
{
	/**
	 * The lower bound character.
	 */
	public char lowchar;

	/**
	 * The upper bound character.
	 */
	public char hichar;

	/**
	 * Constructs a new character range.
	 *
	 * @param lowchar the lower bound character.
	 * @param hichar the upper bound character.
	 */
	public CharacterRange (char lowchar, char hichar)
	{
		this.lowchar = lowchar;
		this.hichar  = hichar;
	}
}
