PageRenderTime 61ms CodeModel.GetById 14ms app.highlight 33ms RepoModel.GetById 1ms app.codeStats 0ms

/jEdit/tags/jedit-4-2-pre14/gnu/regexp/RE.java

#
Java | 1356 lines | 682 code | 165 blank | 509 comment | 373 complexity | 9fbc614c6c42a6a0b950df0a0d8af87e MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
   1/*
   2 *  gnu/regexp/RE.java
   3 *  Copyright (C) 1998-2001 Wes Biggs
   4 *
   5 *  This library is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU Lesser General Public License as published
   7 *  by the Free Software Foundation; either version 2.1 of the License, or
   8 *  (at your option) any later version.
   9 *
  10 *  This library is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 *  GNU Lesser General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU Lesser General Public License
  16 *  along with this program; if not, write to the Free Software
  17 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18 */
  19
  20package gnu.regexp;
  21import java.io.InputStream;
  22import java.io.Reader;
  23import java.io.Serializable;
  24import java.util.Locale;
  25import java.util.PropertyResourceBundle;
  26import java.util.ResourceBundle;
  27import java.util.Vector;
  28
  29class IntPair implements Serializable {
  30  public int first, second;
  31}
  32
  33class CharUnit implements Serializable {
  34  public char ch;
  35  public boolean bk;
  36}
  37
  38/**
  39 * RE provides the user interface for compiling and matching regular
  40 * expressions.
  41 * <P>
  42 * A regular expression object (class RE) is compiled by constructing it
  43 * from a String, StringBuffer or character array, with optional 
  44 * compilation flags (below)
  45 * and an optional syntax specification (see RESyntax; if not specified,
  46 * <code>RESyntax.RE_SYNTAX_PERL5</code> is used).
  47 * <P>
  48 * Once compiled, a regular expression object is reusable as well as
  49 * threadsafe: multiple threads can use the RE instance simultaneously
  50 * to match against different input text.
  51 * <P>
  52 * Various methods attempt to match input text against a compiled
  53 * regular expression.  These methods are:
  54 * <LI><code>isMatch</code>: returns true if the input text in its
  55 * entirety matches the regular expression pattern.
  56 * <LI><code>getMatch</code>: returns the first match found in the
  57 * input text, or null if no match is found.
  58 * <LI><code>getAllMatches</code>: returns an array of all
  59 * non-overlapping matches found in the input text.  If no matches are
  60 * found, the array is zero-length.
  61 * <LI><code>substitute</code>: substitute the first occurence of the
  62 * pattern in the input text with a replacement string (which may
  63 * include metacharacters $0-$9, see REMatch.substituteInto).
  64 * <LI><code>substituteAll</code>: same as above, but repeat for each
  65 * match before returning.
  66 * <LI><code>getMatchEnumeration</code>: returns an REMatchEnumeration
  67 * object that allows iteration over the matches (see
  68 * REMatchEnumeration for some reasons why you may want to do this
  69 * instead of using <code>getAllMatches</code>.
  70 * <P>
  71 *
  72 * These methods all have similar argument lists.  The input can be a
  73 * String, a character array, a StringBuffer, a Reader or an
  74 * InputStream of some sort.  Note that when using a Reader or
  75 * InputStream, the stream read position cannot be guaranteed after
  76 * attempting a match (this is not a bug, but a consequence of the way
  77 * regular expressions work).  Using an REMatchEnumeration can
  78 * eliminate most positioning problems.
  79 *
  80 * <P>
  81 *
  82 * The optional index argument specifies the offset from the beginning
  83 * of the text at which the search should start (see the descriptions
  84 * of some of the execution flags for how this can affect positional
  85 * pattern operators).  For a Reader or InputStream, this means an
  86 * offset from the current read position, so subsequent calls with the
  87 * same index argument on a Reader or an InputStream will not
  88 * necessarily access the same position on the stream, whereas
  89 * repeated searches at a given index in a fixed string will return
  90 * consistent results.
  91 *
  92 * <P>
  93 * You can optionally affect the execution environment by using a
  94 * combination of execution flags (constants listed below).
  95 * 
  96 * <P>
  97 * All operations on a regular expression are performed in a
  98 * thread-safe manner.
  99 *
 100 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
 101 * @version 1.1.5-dev, to be released
 102 */
 103
 104public class RE extends REToken {
 105  // This String will be returned by getVersion()
 106  private static final String VERSION = "1.1.5-dev";
 107
 108  // The localized strings are kept in a separate file
 109  private static ResourceBundle messages = PropertyResourceBundle.getBundle("gnu/regexp/MessagesBundle", Locale.getDefault());
 110
 111  // These are, respectively, the first and last tokens in our linked list
 112  // If there is only one token, firstToken == lastToken
 113  private REToken firstToken, lastToken;
 114
 115  // This is the number of subexpressions in this regular expression,
 116  // with a minimum value of zero.  Returned by getNumSubs()
 117  private int numSubs;
 118
 119    /** Minimum length, in characters, of any possible match. */
 120    private int minimumLength;
 121
 122  /**
 123   * Compilation flag. Do  not  differentiate  case.   Subsequent
 124   * searches  using  this  RE will be case insensitive.
 125   */
 126  public static final int REG_ICASE = 2;
 127
 128  /**
 129   * Compilation flag. The match-any-character operator (dot)
 130   * will match a newline character.  When set this overrides the syntax
 131   * bit RE_DOT_NEWLINE (see RESyntax for details).  This is equivalent to
 132   * the "/s" operator in Perl.
 133   */
 134  public static final int REG_DOT_NEWLINE = 4;
 135
 136  /**
 137   * Compilation flag. Use multiline mode.  In this mode, the ^ and $
 138   * anchors will match based on newlines within the input. This is
 139   * equivalent to the "/m" operator in Perl.
 140   */
 141  public static final int REG_MULTILINE = 8;
 142
 143  /**
 144   * Execution flag.
 145   * The match-beginning operator (^) will not match at the beginning
 146   * of the input string. Useful for matching on a substring when you
 147   * know the context of the input is such that position zero of the
 148   * input to the match test is not actually position zero of the text.
 149   * <P>
 150   * This example demonstrates the results of various ways of matching on
 151   * a substring.
 152   * <P>
 153   * <CODE>
 154   * String s = "food bar fool";<BR>
 155   * RE exp = new RE("^foo.");<BR>
 156   * REMatch m0 = exp.getMatch(s);<BR>
 157   * REMatch m1 = exp.getMatch(s.substring(8));<BR>
 158   * REMatch m2 = exp.getMatch(s.substring(8),0,RE.REG_NOTBOL); <BR>
 159   * REMatch m3 = exp.getMatch(s,8);                            <BR>
 160   * REMatch m4 = exp.getMatch(s,8,RE.REG_ANCHORINDEX);         <BR>
 161   * <P>
 162   * // Results:<BR>
 163   * //  m0.toString(): "food"<BR>
 164   * //  m1.toString(): "fool"<BR>
 165   * //  m2.toString(): null<BR>
 166   * //  m3.toString(): null<BR>
 167   * //  m4.toString(): "fool"<BR>
 168   * </CODE>
 169   */
 170  public static final int REG_NOTBOL = 16;
 171
 172  /**
 173   * Execution flag.
 174   * The match-end operator ($) does not match at the end
 175   * of the input string. Useful for matching on substrings.
 176   */
 177  public static final int REG_NOTEOL = 32;
 178
 179  /**
 180   * Execution flag.
 181   * When a match method is invoked that starts matching at a non-zero
 182   * index into the input, treat the input as if it begins at the index
 183   * given.  The effect of this flag is that the engine does not "see"
 184   * any text in the input before the given index.  This is useful so
 185   * that the match-beginning operator (^) matches not at position 0
 186   * in the input string, but at the position the search started at
 187   * (based on the index input given to the getMatch function).  See
 188   * the example under REG_NOTBOL.  It also affects the use of the \&lt;
 189   * and \b operators.
 190   */
 191  public static final int REG_ANCHORINDEX = 64;
 192
 193  /**
 194   * Execution flag.
 195   * The substitute and substituteAll methods will not attempt to
 196   * interpolate occurrences of $1-$9 in the replacement text with
 197   * the corresponding subexpressions.  For example, you may want to
 198   * replace all matches of "one dollar" with "$1".
 199   */
 200  public static final int REG_NO_INTERPOLATE = 128;
 201
 202  /** Returns a string representing the version of the gnu.regexp package. */
 203  public static final String version() {
 204    return VERSION;
 205  }
 206
 207  // Retrieves a message from the ResourceBundle
 208  static final String getLocalizedMessage(String key) {
 209    return messages.getString(key);
 210  }
 211
 212  /**
 213   * Constructs a regular expression pattern buffer without any compilation
 214   * flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
 215   *
 216   * @param pattern A regular expression pattern, in the form of a String,
 217   *   StringBuffer or char[].  Other input types will be converted to
 218   *   strings using the toString() method.
 219   * @exception REException The input pattern could not be parsed.
 220   * @exception NullPointerException The pattern was null.
 221   */
 222  public RE(Object pattern) throws REException {
 223    this(pattern,0,RESyntax.RE_SYNTAX_PERL5,0,0);
 224  }
 225
 226  /**
 227   * Constructs a regular expression pattern buffer using the specified
 228   * compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
 229   *
 230   * @param pattern A regular expression pattern, in the form of a String,
 231   *   StringBuffer, or char[].  Other input types will be converted to
 232   *   strings using the toString() method.
 233   * @param cflags The logical OR of any combination of the compilation flags listed above.
 234   * @exception REException The input pattern could not be parsed.
 235   * @exception NullPointerException The pattern was null.
 236   */
 237  public RE(Object pattern, int cflags) throws REException {
 238    this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5,0,0);
 239  }
 240
 241  /**
 242   * Constructs a regular expression pattern buffer using the specified
 243   * compilation flags and regular expression syntax.
 244   *
 245   * @param pattern A regular expression pattern, in the form of a String,
 246   *   StringBuffer, or char[].  Other input types will be converted to
 247   *   strings using the toString() method.
 248   * @param cflags The logical OR of any combination of the compilation flags listed above.
 249   * @param syntax The type of regular expression syntax to use.
 250   * @exception REException The input pattern could not be parsed.
 251   * @exception NullPointerException The pattern was null.
 252   */
 253  public RE(Object pattern, int cflags, RESyntax syntax) throws REException {
 254    this(pattern,cflags,syntax,0,0);
 255  }
 256
 257  // internal constructor used for alternation
 258  private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
 259    super(subIndex);
 260    firstToken = first;
 261    lastToken = last;
 262    numSubs = subs;
 263    minimumLength = minLength;
 264    addToken(new RETokenEndSub(subIndex));
 265  }
 266
 267  private RE(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
 268    super(myIndex); // Subexpression index of this token.
 269    initialize(patternObj, cflags, syntax, myIndex, nextSub);
 270  }
 271
 272    // For use by subclasses
 273    protected RE() { super(0); }
 274
 275    // The meat of construction
 276  protected void initialize(Object patternObj, int cflags, RESyntax syntax, int myIndex, int nextSub) throws REException {
 277      char[] pattern;
 278    if (patternObj instanceof String) {
 279      pattern = ((String) patternObj).toCharArray();
 280    } else if (patternObj instanceof char[]) {
 281      pattern = (char[]) patternObj;
 282    } else if (patternObj instanceof StringBuffer) {
 283      pattern = new char [((StringBuffer) patternObj).length()];
 284      ((StringBuffer) patternObj).getChars(0,pattern.length,pattern,0);
 285    } else {
 286	pattern = patternObj.toString().toCharArray();
 287    }
 288
 289    int pLength = pattern.length;
 290
 291    numSubs = 0; // Number of subexpressions in this token.
 292    Vector branches = null;
 293
 294    // linked list of tokens (sort of -- some closed loops can exist)
 295    firstToken = lastToken = null;
 296
 297    // Precalculate these so we don't pay for the math every time we
 298    // need to access them.
 299    boolean insens = ((cflags & REG_ICASE) > 0);
 300
 301    // Parse pattern into tokens.  Does anyone know if it's more efficient
 302    // to use char[] than a String.charAt()?  I'm assuming so.
 303
 304    // index tracks the position in the char array
 305    int index = 0;
 306
 307    // this will be the current parse character (pattern[index])
 308    CharUnit unit = new CharUnit();
 309
 310    // This is used for {x,y} calculations
 311    IntPair minMax = new IntPair();
 312
 313    // Buffer a token so we can create a TokenRepeated, etc.
 314    REToken currentToken = null;
 315    char ch;
 316
 317    while (index < pLength) {
 318      // read the next character unit (including backslash escapes)
 319      index = getCharUnit(pattern,index,unit);
 320
 321      // ALTERNATION OPERATOR
 322      //  \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
 323      //  not available if RE_LIMITED_OPS is set
 324
 325      // TODO: the '\n' literal here should be a test against REToken.newline,
 326      // which unfortunately may be more than a single character.
 327      if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
 328	     || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
 329	   && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
 330	// make everything up to here be a branch. create vector if nec.
 331	addToken(currentToken);
 332	RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
 333	minimumLength = 0;
 334	if (branches == null) {
 335	    branches = new Vector();
 336	}
 337	branches.addElement(theBranch);
 338	firstToken = lastToken = currentToken = null;
 339      }
 340      
 341      // INTERVAL OPERATOR:
 342      //  {x} | {x,} | {x,y}  (RE_INTERVALS && RE_NO_BK_BRACES)
 343      //  \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES)
 344      //
 345      // OPEN QUESTION: 
 346      //  what is proper interpretation of '{' at start of string?
 347
 348      else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
 349	int newIndex = getMinMax(pattern,index,minMax,syntax);
 350        if (newIndex > index) {
 351          if (minMax.first > minMax.second)
 352            throw new REException(getLocalizedMessage("interval.order"),REException.REG_BADRPT,newIndex);
 353          if (currentToken == null)
 354            throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,newIndex);
 355          if (currentToken instanceof RETokenRepeated) 
 356            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,newIndex);
 357          if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
 358            throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,newIndex);
 359          if ((currentToken.getMinimumLength() == 0) && (minMax.second == Integer.MAX_VALUE))
 360            throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,newIndex);
 361          index = newIndex;
 362          currentToken = setRepeated(currentToken,minMax.first,minMax.second,index); 
 363        }
 364        else {
 365          addToken(currentToken);
 366          currentToken = new RETokenChar(subIndex,unit.ch,insens);
 367        } 
 368      }
 369      
 370      // LIST OPERATOR:
 371      //  [...] | [^...]
 372
 373      else if ((unit.ch == '[') && !unit.bk) {
 374	Vector options = new Vector();
 375	boolean negative = false;
 376	char lastChar = 0;
 377	if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
 378	
 379	// Check for initial caret, negation
 380	if ((ch = pattern[index]) == '^') {
 381	  negative = true;
 382	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
 383	  ch = pattern[index];
 384	}
 385
 386	// Check for leading right bracket literal
 387	if (ch == ']') {
 388	  lastChar = ch;
 389	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
 390	}
 391
 392	while ((ch = pattern[index++]) != ']') {
 393	  if ((ch == '-') && (lastChar != 0)) {
 394	    if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
 395	    if ((ch = pattern[index]) == ']') {
 396	      options.addElement(new RETokenChar(subIndex,lastChar,insens));
 397	      lastChar = '-';
 398	    } else {
 399	      options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
 400	      lastChar = 0;
 401	      index++;
 402	    }
 403          } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
 404            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
 405	    int posixID = -1;
 406	    boolean negate = false;
 407            char asciiEsc = 0;
 408	    if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
 409	      switch (pattern[index]) {
 410	      case 'D':
 411		negate = true;
 412	      case 'd':
 413		posixID = RETokenPOSIX.DIGIT;
 414		break;
 415	      case 'S':
 416		negate = true;
 417	      case 's':
 418		posixID = RETokenPOSIX.SPACE;
 419		break;
 420	      case 'W':
 421		negate = true;
 422	      case 'w':
 423		posixID = RETokenPOSIX.ALNUM;
 424		break;
 425	      }
 426	    }
 427            else if ("nrt".indexOf(pattern[index]) != -1) {
 428              switch (pattern[index]) {
 429                case 'n':
 430                  asciiEsc = '\n';
 431                  break;
 432                case 't':
 433                  asciiEsc = '\t';
 434                  break;
 435                case 'r':
 436                  asciiEsc = '\r';
 437                  break;
 438              }
 439            }
 440	    if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
 441	    
 442	    if (posixID != -1) {
 443	      options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
 444	    } else if (asciiEsc != 0) {
 445	      lastChar = asciiEsc;
 446	    } else {
 447	      lastChar = pattern[index];
 448	    }
 449	    ++index;
 450	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
 451	    StringBuffer posixSet = new StringBuffer();
 452	    index = getPosixSet(pattern,index+1,posixSet);
 453	    int posixId = RETokenPOSIX.intValue(posixSet.toString());
 454	    if (posixId != -1)
 455	      options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
 456	  } else {
 457	    if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
 458	    lastChar = ch;
 459	  }
 460	  if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
 461	} // while in list
 462	// Out of list, index is one past ']'
 463	    
 464	if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
 465	    
 466	// Create a new RETokenOneOf
 467	addToken(currentToken);
 468	options.trimToSize();
 469	currentToken = new RETokenOneOf(subIndex,options,negative);
 470      }
 471
 472      // SUBEXPRESSIONS
 473      //  (...) | \(...\) depending on RE_NO_BK_PARENS
 474
 475      else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
 476	boolean pure = false;
 477	boolean comment = false;
 478        boolean lookAhead = false;
 479        boolean negativelh = false;
 480	if ((index+1 < pLength) && (pattern[index] == '?')) {
 481	  switch (pattern[index+1]) {
 482          case '!':
 483            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
 484              pure = true;
 485              negativelh = true;
 486              lookAhead = true;
 487              index += 2;
 488            }
 489            break;
 490          case '=':
 491            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
 492              pure = true;
 493              lookAhead = true;
 494              index += 2;
 495            }
 496            break;
 497	  case ':':
 498	    if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
 499	      pure = true;
 500	      index += 2;
 501	    }
 502	    break;
 503	  case '#':
 504	    if (syntax.get(RESyntax.RE_COMMENTS)) {
 505	      comment = true;
 506	    }
 507	    break;
 508          default:
 509            throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
 510	  }
 511	}
 512
 513	if (index >= pLength) {
 514	    throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
 515	}
 516
 517	// find end of subexpression
 518	int endIndex = index;
 519	int nextIndex = index;
 520	int nested = 0;
 521
 522	while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
 523		&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
 524	  if ((endIndex = nextIndex) >= pLength)
 525	    throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
 526	  else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
 527	    nested++;
 528	  else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
 529	    nested--;
 530
 531	// endIndex is now position at a ')','\)' 
 532	// nextIndex is end of string or position after ')' or '\)'
 533
 534	if (comment) index = nextIndex;
 535	else { // not a comment
 536	  // create RE subexpression as token.
 537	  addToken(currentToken);
 538	  if (!pure) {
 539	    numSubs++;
 540	  }
 541
 542	  int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
 543	  currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
 544	  numSubs += ((RE) currentToken).getNumSubs();
 545
 546          if (lookAhead) {
 547	      currentToken = new RETokenLookAhead(currentToken,negativelh);
 548	  }
 549
 550	  index = nextIndex;
 551	} // not a comment
 552      } // subexpression
 553    
 554      // UNMATCHED RIGHT PAREN
 555      // ) or \) throw exception if
 556      // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
 557      else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
 558	throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
 559      }
 560
 561      // START OF LINE OPERATOR
 562      //  ^
 563
 564      else if ((unit.ch == '^') && !unit.bk) {
 565	addToken(currentToken);
 566	currentToken = null;
 567	addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
 568      }
 569
 570      // END OF LINE OPERATOR
 571      //  $
 572
 573      else if ((unit.ch == '$') && !unit.bk) {
 574	addToken(currentToken);
 575	currentToken = null;
 576	addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
 577      }
 578
 579      // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
 580      //  .
 581
 582      else if ((unit.ch == '.') && !unit.bk) {
 583	addToken(currentToken);
 584	currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
 585      }
 586
 587      // ZERO-OR-MORE REPEAT OPERATOR
 588      //  *
 589
 590      else if ((unit.ch == '*') && !unit.bk) {
 591	if (currentToken == null)
 592          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
 593	if (currentToken instanceof RETokenRepeated)
 594          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
 595	if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
 596	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
 597	if (currentToken.getMinimumLength() == 0)
 598	  throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
 599	currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
 600      }
 601
 602      // ONE-OR-MORE REPEAT OPERATOR
 603      //  + | \+ depending on RE_BK_PLUS_QM
 604      //  not available if RE_LIMITED_OPS is set
 605
 606      else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
 607	if (currentToken == null)
 608          throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
 609	if (currentToken instanceof RETokenRepeated)
 610          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
 611	if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
 612	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
 613	if (currentToken.getMinimumLength() == 0)
 614	  throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
 615	currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
 616      }
 617
 618      // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
 619      //  ? | \? depending on RE_BK_PLUS_QM
 620      //  not available if RE_LIMITED_OPS is set
 621      //  stingy matching if RE_STINGY_OPS is set and it follows a quantifier
 622
 623      else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
 624	if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
 625
 626	// Check for stingy matching on RETokenRepeated
 627	if (currentToken instanceof RETokenRepeated) {
 628          if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
 629            ((RETokenRepeated)currentToken).makeStingy();
 630          else
 631            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
 632        }
 633        else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
 634          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
 635	else
 636	  currentToken = setRepeated(currentToken,0,1,index);
 637      }
 638	
 639      // BACKREFERENCE OPERATOR
 640      //  \1 \2 ... \9
 641      // not available if RE_NO_BK_REFS is set
 642
 643      else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
 644	addToken(currentToken);
 645	currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
 646      }
 647
 648      // START OF STRING OPERATOR
 649      //  \A if RE_STRING_ANCHORS is set
 650      
 651      else if (unit.bk && (unit.ch == 'A') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
 652	addToken(currentToken);
 653	currentToken = new RETokenStart(subIndex,null);
 654      }
 655
 656      // WORD BREAK OPERATOR
 657      //  \b if ????
 658
 659      else if (unit.bk && (unit.ch == 'b') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
 660	  addToken(currentToken);
 661	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, false);
 662      } 
 663
 664      // WORD BEGIN OPERATOR 
 665      //  \< if ????
 666      else if (unit.bk && (unit.ch == '<')) {
 667	  addToken(currentToken);
 668	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN, false);
 669      } 
 670
 671      // WORD END OPERATOR 
 672      //  \> if ????
 673      else if (unit.bk && (unit.ch == '>')) {
 674	  addToken(currentToken);
 675	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.END, false);
 676      } 
 677
 678      // NON-WORD BREAK OPERATOR
 679      // \B if ????
 680
 681      else if (unit.bk && (unit.ch == 'B') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
 682	  addToken(currentToken);
 683	  currentToken = new RETokenWordBoundary(subIndex, RETokenWordBoundary.BEGIN | RETokenWordBoundary.END, true);
 684      } 
 685
 686      
 687      // DIGIT OPERATOR
 688      //  \d if RE_CHAR_CLASS_ESCAPES is set
 689      
 690      else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
 691	addToken(currentToken);
 692	currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
 693      }
 694
 695      // NON-DIGIT OPERATOR
 696      //  \D
 697
 698	else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
 699	  addToken(currentToken);
 700	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
 701	}
 702
 703	// NEWLINE ESCAPE
 704        //  \n
 705
 706	else if (unit.bk && (unit.ch == 'n')) {
 707	  addToken(currentToken);
 708	  currentToken = new RETokenChar(subIndex,'\n',false);
 709	}
 710
 711	// RETURN ESCAPE
 712        //  \r
 713
 714	else if (unit.bk && (unit.ch == 'r')) {
 715	  addToken(currentToken);
 716	  currentToken = new RETokenChar(subIndex,'\r',false);
 717	}
 718
 719	// WHITESPACE OPERATOR
 720        //  \s if RE_CHAR_CLASS_ESCAPES is set
 721
 722	else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
 723	  addToken(currentToken);
 724	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
 725	}
 726
 727	// NON-WHITESPACE OPERATOR
 728        //  \S
 729
 730	else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
 731	  addToken(currentToken);
 732	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
 733	}
 734
 735	// TAB ESCAPE
 736        //  \t
 737
 738	else if (unit.bk && (unit.ch == 't')) {
 739	  addToken(currentToken);
 740	  currentToken = new RETokenChar(subIndex,'\t',false);
 741	}
 742
 743	// ALPHANUMERIC OPERATOR
 744        //  \w
 745
 746	else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
 747	  addToken(currentToken);
 748	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
 749	}
 750
 751	// NON-ALPHANUMERIC OPERATOR
 752        //  \W
 753
 754	else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
 755	  addToken(currentToken);
 756	  currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
 757	}
 758
 759	// END OF STRING OPERATOR
 760        //  \Z
 761
 762	else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
 763	  addToken(currentToken);
 764	  currentToken = new RETokenEnd(subIndex,null);
 765	}
 766
 767	// NON-SPECIAL CHARACTER (or escape to make literal)
 768        //  c | \* for example
 769
 770	else {  // not a special character
 771	  addToken(currentToken);
 772	  currentToken = new RETokenChar(subIndex,unit.ch,insens);
 773	} 
 774      } // end while
 775
 776    // Add final buffered token and an EndSub marker
 777    addToken(currentToken);
 778      
 779    if (branches != null) {
 780	branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
 781	branches.trimToSize(); // compact the Vector
 782	minimumLength = 0;
 783	firstToken = lastToken = null;
 784	addToken(new RETokenOneOf(subIndex,branches,false));
 785    } 
 786    else addToken(new RETokenEndSub(subIndex));
 787
 788  }
 789
 790  private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
 791    unit.ch = input[index++];
 792    if (unit.bk = (unit.ch == '\\'))
 793      if (index < input.length)
 794	unit.ch = input[index++];
 795      else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
 796    return index;
 797  }
 798
 799  /**
 800   * Checks if the regular expression matches the input in its entirety.
 801   *
 802   * @param input The input text.
 803   */
 804  public boolean isMatch(Object input) {
 805    return isMatch(input,0,0);
 806  }
 807  
 808  /**
 809   * Checks if the input string, starting from index, is an exact match of
 810   * this regular expression.
 811   *
 812   * @param input The input text.
 813   * @param index The offset index at which the search should be begin.
 814   */
 815  public boolean isMatch(Object input,int index) {
 816    return isMatch(input,index,0);
 817  }
 818  
 819
 820  /**
 821   * Checks if the input, starting from index and using the specified
 822   * execution flags, is an exact match of this regular expression.
 823   *
 824   * @param input The input text.
 825   * @param index The offset index at which the search should be begin.
 826   * @param eflags The logical OR of any execution flags above.
 827   */
 828  public boolean isMatch(Object input,int index,int eflags) {
 829    return isMatchImpl(makeCharIndexed(input,index),index,eflags);
 830  }
 831
 832  private boolean isMatchImpl(CharIndexed input, int index, int eflags) {
 833    if (firstToken == null)  // Trivial case
 834      return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
 835    REMatch m = new REMatch(numSubs, index, eflags);
 836    if (firstToken.match(input, m)) {
 837	while (m != null) {
 838	    if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
 839		return true;
 840	    }
 841	    m = m.next;
 842	}
 843    }
 844    return false;
 845  }
 846    
 847  /**
 848   * Returns the maximum number of subexpressions in this regular expression.
 849   * If the expression contains branches, the value returned will be the
 850   * maximum subexpressions in any of the branches.
 851   */
 852  public int getNumSubs() {
 853    return numSubs;
 854  }
 855
 856  // Overrides REToken.setUncle
 857  void setUncle(REToken uncle) {
 858      if (lastToken != null) {
 859	  lastToken.setUncle(uncle);
 860      } else super.setUncle(uncle); // to deal with empty subexpressions
 861  }
 862
 863  // Overrides REToken.chain
 864
 865  boolean chain(REToken next) {
 866    super.chain(next);
 867    setUncle(next);
 868    return true;
 869  }
 870
 871  /**
 872   * Returns the minimum number of characters that could possibly
 873   * constitute a match of this regular expression.
 874   */
 875  public int getMinimumLength() {
 876      return minimumLength;
 877  }
 878
 879  /**
 880   * Returns an array of all matches found in the input.
 881   *
 882   * If the regular expression allows the empty string to match, it will
 883   * substitute matches at all positions except the end of the input.
 884   *
 885   * @param input The input text.
 886   * @return a non-null (but possibly zero-length) array of matches
 887   */
 888  public REMatch[] getAllMatches(Object input) {
 889    return getAllMatches(input,0,0);
 890  }
 891
 892  /**
 893   * Returns an array of all matches found in the input,
 894   * beginning at the specified index position.
 895   *
 896   * If the regular expression allows the empty string to match, it will
 897   * substitute matches at all positions except the end of the input.
 898   *
 899   * @param input The input text.
 900   * @param index The offset index at which the search should be begin.
 901   * @return a non-null (but possibly zero-length) array of matches
 902   */
 903  public REMatch[] getAllMatches(Object input, int index) {
 904    return getAllMatches(input,index,0);
 905  }
 906
 907  /**
 908   * Returns an array of all matches found in the input string,
 909   * beginning at the specified index position and using the specified
 910   * execution flags.
 911   *
 912   * If the regular expression allows the empty string to match, it will
 913   * substitute matches at all positions except the end of the input.
 914   *
 915   * @param input The input text.
 916   * @param index The offset index at which the search should be begin.
 917   * @param eflags The logical OR of any execution flags above.
 918   * @return a non-null (but possibly zero-length) array of matches
 919   */
 920  public REMatch[] getAllMatches(Object input, int index, int eflags) {
 921    return getAllMatchesImpl(makeCharIndexed(input,index),index,eflags);
 922  }
 923
 924  // this has been changed since 1.03 to be non-overlapping matches
 925  private REMatch[] getAllMatchesImpl(CharIndexed input, int index, int eflags) {
 926    Vector all = new Vector();
 927    REMatch m = null;
 928    while ((m = getMatchImpl(input,index,eflags,null)) != null) {
 929      all.addElement(m);
 930      index = m.getEndIndex();
 931      if (m.end[0] == 0) {   // handle pathological case of zero-length match
 932	index++;
 933	input.move(1);
 934      } else {
 935	input.move(m.end[0]);
 936      }
 937      if (!input.isValid()) break;
 938    }
 939    REMatch[] mset = new REMatch[all.size()];
 940    all.copyInto(mset);
 941    return mset;
 942  }
 943  
 944    /* Implements abstract method REToken.match() */
 945    boolean match(CharIndexed input, REMatch mymatch) { 
 946	if (firstToken == null) return next(input, mymatch);
 947
 948	// Note the start of this subexpression
 949	mymatch.start[subIndex] = mymatch.index;
 950
 951	return firstToken.match(input, mymatch);
 952    }
 953  
 954  /**
 955   * Returns the first match found in the input.  If no match is found,
 956   * null is returned.
 957   *
 958   * @param input The input text.
 959   * @return An REMatch instance referencing the match, or null if none.
 960   */
 961  public REMatch getMatch(Object input) {
 962    return getMatch(input,0,0);
 963  }
 964  
 965  /**
 966   * Returns the first match found in the input, beginning
 967   * the search at the specified index.  If no match is found,
 968   * returns null.
 969   *
 970   * @param input The input text.
 971   * @param index The offset within the text to begin looking for a match.
 972   * @return An REMatch instance referencing the match, or null if none.
 973   */
 974  public REMatch getMatch(Object input, int index) {
 975    return getMatch(input,index,0);
 976  }
 977  
 978  /**
 979   * Returns the first match found in the input, beginning
 980   * the search at the specified index, and using the specified
 981   * execution flags.  If no match is found, returns null.
 982   *
 983   * @param input The input text.
 984   * @param index The offset index at which the search should be begin.
 985   * @param eflags The logical OR of any execution flags above.
 986   * @return An REMatch instance referencing the match, or null if none.
 987   */
 988  public REMatch getMatch(Object input, int index, int eflags) {
 989    return getMatch(input,index,eflags,null);
 990  }
 991
 992  /**
 993   * Returns the first match found in the input, beginning the search
 994   * at the specified index, and using the specified execution flags.
 995   * If no match is found, returns null.  If a StringBuffer is
 996   * provided and is non-null, the contents of the input text from the
 997   * index to the beginning of the match (or to the end of the input,
 998   * if there is no match) are appended to the StringBuffer.
 999   *
1000   * @param input The input text.
1001   * @param index The offset index at which the search should be begin.
1002   * @param eflags The logical OR of any execution flags above.
1003   * @param buffer The StringBuffer to save pre-match text in.
1004   * @return An REMatch instance referencing the match, or null if none.  */
1005  public REMatch getMatch(Object input, int index, int eflags, StringBuffer buffer) {
1006    return getMatchImpl(makeCharIndexed(input,index),index,eflags,buffer);
1007  }
1008
1009  REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
1010      // Create a new REMatch to hold results
1011      REMatch mymatch = new REMatch(numSubs, anchor, eflags);
1012      do {
1013	  // Optimization: check if anchor + minimumLength > length
1014	  if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
1015	      if (match(input, mymatch)) {
1016		  // Find longest match of them all to observe leftmost longest
1017		  REMatch longest = mymatch;
1018		  while ((mymatch = mymatch.next) != null) {
1019		      if (mymatch.index > longest.index) {
1020			  longest = mymatch;
1021		      }
1022		  }
1023		  
1024		  longest.end[0] = longest.index;
1025		  longest.finish(input);
1026		  return longest;
1027	      }
1028	  }
1029	  mymatch.clear(++anchor);
1030	  // Append character to buffer if needed
1031	  if (buffer != null && input.charAt(0) != CharIndexed.OUT_OF_BOUNDS) {
1032	      buffer.append(input.charAt(0));
1033	  }
1034      } while (input.move(1));
1035      
1036      // Special handling at end of input for e.g. "$"
1037      if (minimumLength == 0) {
1038	  if (match(input, mymatch)) {
1039	      mymatch.finish(input);
1040	      return mymatch;
1041	  }
1042      }
1043
1044      return null;
1045  }
1046
1047  /**
1048   * Returns an REMatchEnumeration that can be used to iterate over the
1049   * matches found in the input text.
1050   *
1051   * @param input The input text.
1052   * @return A non-null REMatchEnumeration instance.
1053   */
1054  public REMatchEnumeration getMatchEnumeration(Object input) {
1055    return getMatchEnumeration(input,0,0);
1056  }
1057
1058
1059  /**
1060   * Returns an REMatchEnumeration that can be used to iterate over the
1061   * matches found in the input text.
1062   *
1063   * @param input The input text.
1064   * @param index The offset index at which the search should be begin.
1065   * @return A non-null REMatchEnumeration instance, with its input cursor
1066   *  set to the index position specified.
1067   */
1068  public REMatchEnumeration getMatchEnumeration(Object input, int index) {
1069    return getMatchEnumeration(input,index,0);
1070  }
1071
1072  /**
1073   * Returns an REMatchEnumeration that can be used to iterate over the
1074   * matches found in the input text.
1075   *
1076   * @param input The input text.
1077   * @param index The offset index at which the search should be begin.
1078   * @param eflags The logical OR of any execution flags above.
1079   * @return A non-null REMatchEnumeration instance, with its input cursor
1080   *  set to the index position specified.
1081   */
1082  public REMatchEnumeration getMatchEnumeration(Object input, int index, int eflags) {
1083    return new REMatchEnumeration(this,makeCharIndexed(input,index),index,eflags);
1084  }
1085
1086
1087  /**
1088   * Substitutes the replacement text for the first match found in the input.
1089   *
1090   * @param input The input text.
1091   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1092   * @return A String interpolating the substituted text.
1093   * @see REMatch#substituteInto
1094   */
1095  public String substitute(Object input,String replace) {
1096    return substitute(input,replace,0,0);
1097  }
1098
1099  /**
1100   * Substitutes the replacement text for the first match found in the input
1101   * beginning at the specified index position.  Specifying an index
1102   * effectively causes the regular expression engine to throw away the
1103   * specified number of characters. 
1104   *
1105   * @param input The input text.
1106   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1107   * @param index The offset index at which the search should be begin.
1108   * @return A String containing the substring of the input, starting
1109   *   at the index position, and interpolating the substituted text.
1110   * @see REMatch#substituteInto
1111   */
1112  public String substitute(Object input,String replace,int index) {
1113    return substitute(input,replace,index,0);
1114  }
1115
1116  /**
1117   * Substitutes the replacement text for the first match found in the input
1118   * string, beginning at the specified index position and using the
1119   * specified execution flags.
1120   *
1121   * @param input The input text.
1122   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1123   * @param index The offset index at which the search should be begin.
1124   * @param eflags The logical OR of any execution flags above.
1125   * @return A String containing the substring of the input, starting
1126   *   at the index position, and interpolating the substituted text.
1127   * @see REMatch#substituteInto
1128   */
1129  public String substitute(Object input,String replace,int index,int eflags) {
1130    return substituteImpl(makeCharIndexed(input,index),replace,index,eflags);
1131  }
1132
1133  private String substituteImpl(CharIndexed input,String replace,int index,int eflags) {
1134    StringBuffer buffer = new StringBuffer();
1135    REMatch m = getMatchImpl(input,index,eflags,buffer);
1136    if (m==null) return buffer.toString();
1137    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1138		   replace : m.substituteInto(replace) );
1139    if (input.move(m.end[0])) {
1140      do {
1141	buffer.append(input.charAt(0));
1142      } while (input.move(1));
1143    }
1144    return buffer.toString();
1145  }
1146  
1147  /**
1148   * Substitutes the replacement text for each non-overlapping match found 
1149   * in the input text.
1150   *
1151   * @param input The input text.
1152   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1153   * @return A String interpolating the substituted text.
1154   * @see REMatch#substituteInto
1155   */
1156  public String substituteAll(Object input,String replace) {
1157    return substituteAll(input,replace,0,0);
1158  }
1159
1160  /**
1161   * Substitutes the replacement text for each non-overlapping match found 
1162   * in the input text, starting at the specified index.
1163   *
1164   * If the regular expression allows the empty string to match, it will
1165   * substitute matches at all positions except the end of the input.
1166   *
1167   * @param input The input text.
1168   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1169   * @param index The offset index at which the search should be begin.
1170   * @return A String containing the substring of the input, starting
1171   *   at the index position, and interpolating the substituted text.
1172   * @see REMatch#substituteInto
1173   */
1174  public String substituteAll(Object input,String replace,int index) {
1175    return substituteAll(input,replace,index,0);
1176  }
1177 
1178  /**
1179   * Substitutes the replacement text for each non-overlapping match found 
1180   * in the input text, starting at the specified index and using the
1181   * specified execution flags.
1182   *
1183   * @param input The input text.
1184   * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto).
1185   * @param index The offset index at which the search should be begin.
1186   * @param eflags The logical OR of any execution flags above.
1187   * @return A String containing the substring of the input, starting
1188   *   at the index position, and interpolating the substituted text.
1189   * @see REMatch#substituteInto
1190   */
1191  public String substituteAll(Object input,String replace,int index,int eflags) {
1192    return substituteAllImpl(makeCharIndexed(input,index),replace,index,eflags);
1193  }
1194
1195  private String substituteAllImpl(CharIndexed input,String replace,int index,int eflags) {
1196    StringBuffer buffer = new StringBuffer();
1197    REMatch m;
1198    while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
1199	buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
1200		       replace : m.substituteInto(replace) );
1201      index = m.getEndIndex();
1202      if (m.end[0] == 0) {
1203	char ch = input.charAt(0);
1204	if (ch != CharIndexed.OUT_OF_BOUNDS) 
1205	    buffer.append(ch);
1206	input.move(1);
1207      } else {
1208	  input.move(m.end[0]);
1209      }
1210
1211      if (!input.isValid()) break;
1212    }
1213    return buffer.toString();
1214  }
1215  
1216  /* Helper function for constructor */
1217  private void addToken(REToken next) {
1218    if (next == null) return;
1219    minimumLength += next.getMinimumLength();
1220    if (firstToken == null) {
1221	lastToken = firstToken = next;
1222    } else {
1223      // if chain returns false, it "rejected" the token due to
1224      // an optimization, and next was combined with lastToken
1225      if (lastToken.chain(next)) {
1226	  lastToken = next;
1227      }
1228    }
1229  }
1230
1231  private static REToken setRepeated(REToken current, int min, int max, int index) throws REException {
1232    if (current == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
1233    return new RETokenRepeated(current.subIndex,current,min,max);
1234  }
1235
1236  private static int getPosixSet(char[] pattern,int index,StringBuffer buf) {
1237    // Precondition: pattern[index-1] == ':'
1238    // we will return pos of closing ']'.
1239    int i;
1240    for (i=index; i<(pattern.length-1); i++) {
1241      if ((pattern[i] == ':') && (pattern[i+1] == ']'))
1242	return i+2;
1243      buf.append(pattern[i]);
1244    }
1245    return index; // didn't match up
1246  }
1247
1248  private int getMinMax(char[] input,int index,IntPair minMax,RESyntax syntax) throws REException {
1249    // Precondition: input[index-1] == '{', minMax != null
1250
1251    boolean mustMatch = !syntax.get(RESyntax.RE_NO_BK_BRACES);
1252    int startIndex = index;
1253    if (index == input.length) {
1254      if (mustMatch)
1255        throw new REException(getLocalizedMessage("unmatched.brace"),REException.REG_EBRACE,index);
1256      else
1257        return startIndex;
1258    }
1259    
1260    int min,max=0;
1261    CharUnit unit = new CharUnit();
1262    StringBuffer buf = new StringBuffer();
1263    
1264    // Read string of digits
1265    do {
1266      index = getCharUnit(input,index,unit);
1267      if (Character.isDigit(unit.ch))
1268        buf.append(unit.ch);
1269    } while ((index != input.length) && Character.isDigit(unit.ch));
1270
1271    // Check for {} tomfoolery
1272    if (buf.length() == 0) {
1273      if (mustMatch)
1274        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1275      else
1276        return startIndex;
1277    }
1278
1279    min = Integer.parseInt(buf.toString());
1280	
1281    if ((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk))
1282      max = min;
1283    else if (index == input.length)
1284      if (mustMatch)
1285        throw new REException(getLocalizedMessage("interval.no.end"),REException.REG_EBRACE,index);
1286      else
1287        return startIndex;
1288    else if ((unit.ch == ',') && !unit.bk) {
1289      buf = new StringBuffer();
1290      // Read string of digits
1291      while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
1292	buf.append(unit.ch);
1293
1294      if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))
1295        if (mustMatch)
1296          throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1297        else
1298          return startIndex;
1299
1300      // This is the case of {x,}
1301      if (buf.length() == 0) max = Integer.MAX_VALUE;
1302      else max = Integer.parseInt(buf.toString());
1303    } else
1304      if (mustMatch)
1305        throw new REException(getLocalizedMessage("interval.error"),REException.REG_EBRACE,index);
1306      else
1307        return startIndex;
1308
1309    // We know min and max now, and they are valid.
1310
1311    minMax.first = min;
1312    minMax.second = max;
1313
1314    // return the index following the '}'
1315    return index;
1316  }
1317
1318   /**
1319    * Return a human readable form of the compiled regular expression,
1320    * useful for debugging.
1321    */
1322   public String toString() {
1323     StringBuffer sb = new StringBuffer();
1324     dump(sb);
1325     return sb.toString();
1326   }
1327
1328  void dump(StringBuffer os) {
1329    os.append('(');
1330    if (subIndex == 0)
1331      os.append("?:");
1332    if (firstToken != null)
1333      firstToken.dumpAll(os);
1334    os.append(')');
1335  }
1336
1337  // Cast input appropriately or throw exception
1338  private static CharIndexed makeCharIndexed(Object input, int index) {
1339      // We could let a String fall through to final input, but since
1340      // it's the most likely input type, we check it first.
1341    if (input instanceof String)
1342      return new CharIndexedString((String) input,index);
1343    else if (input instanceof char[])
1344      return new CharIndexedCharArray((char[]) input,index);
1345    else if (input instanceof StringBuffer)
1346      return new CharIndexedStringBuffer((StringBuffer) input,index);
1347    else if (input instanceof InputStream)
1348      return new CharIndexedInputStream((InputStream) input,index);
1349    else if (input instanceof Reader)
1350	return new CharIndexedReader((Reader) input, index);
1351    else if (input instanceof CharIndexed)
1352	return (CharIndexed) input; // do we lose index info?
1353    else 
1354	return new CharIndexedString(input.toString(), index);
1355  }
1356}