Java Package RegularExpression Copyright (C) 2001, 2002 Brian Westphal This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Internationalization -------------------------------------------------------------------------------- The RegularExpression package is not current ready for use outside the United States. In particular, not all unicode characters are currently supported. We are working on a fix for this problem. Source Code -------------------------------------------------------------------------------- All documents, including source files, are formatted with 8 character tabbing RE Special Characters and Patterns -------------------------------------------------------------------------------- RE pattern matching and escape characters list: /d ANY DIGIT /D ANY NON-DIGIT /s ANY TYPE OF WHITESPACE /S ANY TYPE OF NON-WHITESPACE /w ANY TYPE OF ALPHANUMERIC /W ANY TYPE OF NON-ALPHANUMERIC . ANY NON NEWLINE CHARACTER #x[0-9A-F]+ HEX REPRESENTATION OF A CHARATER /? WHERE ? IS ANY OTHER CHARACTER YIELDS THAT CHARACTER * 0 OR MORE TIMES + 1 OR MORE TIMES ? 0 OR 1 TIMES {M} M TIMES {M,} AT LEAST M TIMES {M,N} AT LEAST M AND AT MOST N TIMES (N >= M) [al-z] CHARACTER LIST, INDIVIDUAL CHARACTERS OR CHARACTER RANGES Useful Public RE Functions -------------------------------------------------------------------------------- public RE (String pRE, boolean pCASESENSITIVE) throws Exception public Object [] nextTokenAndDelim (String input, int offset) throws Exception Object[0] = int //CHARACTER AFTER END OF DELIM Object[1] = String //TOKEN, SUBSTRING OF input FROM offset TO BEGINNING OF DELIM Object[2] = String //DELIM, SUBSTRING OF input FROM END OF TOKEN TO Object[0] public Object [] beginningMatches (String input) throws Exception Object[0] = boolean //MATCHES Object[1] = int //LENGTH public boolean matches (String input) throws Exception boolean //MATCHES public String replace (String input, String replacement) throws Exception String //STRING AFTER REPLACEMENT public String [] split (String input) throws Exception String [] //ARRAY OF TOKENS public String toString () String //FORMATTED REGULAR EXPRESSION Useful Public NFA Functions -------------------------------------------------------------------------------- public NFA (int pQ, char [] pSIGMA, Transition [] pDELTA, int [] pF) public Object clone () Object //DEEP COPY OF NFA public NFA concat (NFA RHS) throws Exception NFA //RESULT public NFA star () NFA //RESULT public NFA plus () throws Exception NFA //RESULT public NFA maybe () throws Exception NFA //RESULT public NFA repeat (int lower, int upper) throws Exception NFA //RESULT public NFA union (NFA RHS) throws Exception NFA //RESULT public void shrinkfit () public void buildtable () throws Exception //MUST BE CALLED AFTER CREATING NFA, BEFORE USING NFA public void reset () //MUST BE CALLED BEFORE EACH USE OF NFA public boolean input (char inputchar) throws Exception boolean //CHARACTER ACCEPTED public boolean accepting () boolean //NFA IS CURRENTLY IN AN ACCEPTING STATE public String toString () String //STRING DESCRIBING NFA public boolean inLanguage (char character) boolean //CHARACTER IS IN ALPHABET (OR IS EPSILON)