PageRenderTime 50ms CodeModel.GetById 31ms app.highlight 15ms RepoModel.GetById 1ms app.codeStats 0ms

/interpreter/tags/at2-build060407/src/edu/vub/util/regexp/RETokenOneOf.java

http://ambienttalk.googlecode.com/
Java | 233 lines | 145 code | 18 blank | 70 comment | 46 complexity | fe092442e97995f1c3a3c8f4e8444aeb MD5 | raw file
  1/* gnu/regexp/RETokenOneOf.java
  2   Copyright (C) 2006 Free Software Foundation, Inc.
  3
  4This file is part of GNU Classpath.
  5
  6GNU Classpath is free software; you can redistribute it and/or modify
  7it under the terms of the GNU General Public License as published by
  8the Free Software Foundation; either version 2, or (at your option)
  9any later version.
 10
 11GNU Classpath is distributed in the hope that it will be useful, but
 12WITHOUT ANY WARRANTY; without even the implied warranty of
 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14General Public License for more details.
 15
 16You should have received a copy of the GNU General Public License
 17along with GNU Classpath; see the file COPYING.  If not, write to the
 18Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 1902110-1301 USA.
 20
 21Linking this library statically or dynamically with other modules is
 22making a combined work based on this library.  Thus, the terms and
 23conditions of the GNU General Public License cover the whole
 24combination.
 25
 26As a special exception, the copyright holders of this library give you
 27permission to link this library with independent modules to produce an
 28executable, regardless of the license terms of these independent
 29modules, and to copy and distribute the resulting executable under
 30terms of your choice, provided that you also meet, for each linked
 31independent module, the terms and conditions of the license of that
 32module.  An independent module is a module which is not derived from
 33or based on this library.  If you modify this library, you may extend
 34this exception to your version of the library, but you are not
 35obligated to do so.  If you do not wish to do so, delete this
 36exception statement from your version. */
 37
 38package edu.vub.util.regexp;
 39import java.util.Vector;
 40import java.util.Stack;
 41
 42final class RETokenOneOf extends REToken {
 43  private Vector options;
 44  private boolean negative;
 45
 46  private Vector addition;
 47  // This Vector addition is used to store nested character classes.
 48  // For example, if the original expression is
 49  //    [2-7a-c[f-k][m-z]&&[^p-v][st]]
 50  // the basic part /2-7a-c/ is stored in the Vector options, and
 51  // the additional part /[f-k][m-z]&&[^p-v][st]/ is stored in the
 52  // Vector addition in the following order (Reverse Polish Notation):
 53  //           -- The matching result of the basic part is assumed here. 
 54  //    [f-k]  -- REToken
 55  //    "|"    -- or
 56  //    [m-z]  -- REToken
 57  //    "|"    -- or
 58  //    false
 59  //    [^p-v] -- REToken
 60  //    "|"    -- or
 61  //    [st]   -- REToken
 62  //    "|"    -- or
 63  //    "&"    -- and
 64  //
 65  // As it is clear from the explanation above, the Vector addition is
 66  // effective only when this REToken originates from a character class
 67  // expression.
 68
 69  // This constructor is used for convenience when we know the set beforehand,
 70  // e.g. \d --> new RETokenOneOf("0123456789",false, ..)
 71  //      \D --> new RETokenOneOf("0123456789",true, ..)
 72
 73  RETokenOneOf(int subIndex, String optionsStr, boolean negative, boolean insens) {
 74    super(subIndex);
 75    options = new Vector();
 76    this.negative = negative;
 77    for (int i = 0; i < optionsStr.length(); i++)
 78      options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens));
 79  }
 80
 81  RETokenOneOf(int subIndex, Vector options, boolean negative) {
 82    super(subIndex);
 83    this.options = options;
 84    this.negative = negative;
 85  }
 86
 87  RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) {
 88    super(subIndex);
 89    this.options = options;
 90    this.addition = addition;
 91    this.negative = negative;
 92  }
 93
 94  int getMinimumLength() {
 95    // (negative || addition != null) occurs when this token originates from
 96    // character class expression.
 97    if (negative || addition != null) return 1;
 98    int min = Integer.MAX_VALUE;
 99    int x;
100    for (int i=0; i < options.size(); i++) {
101      if ((x = ((REToken) options.elementAt(i)).getMinimumLength()) < min)
102	min = x;
103    }
104    return min;
105  }
106
107  int getMaximumLength() {
108    // (negative || addition != null) occurs when this token originates from
109    // character class expression.
110    if (negative || addition != null) return 1;
111    int max = 0;
112    int x;
113    for (int i=0; i < options.size(); i++) {
114      if ((x = ((REToken) options.elementAt(i)).getMaximumLength()) > max)
115	max = x;
116    }
117    return max;
118  }
119
120    boolean match(CharIndexed input, REMatch mymatch) {
121      REMatch tryMatch;
122      boolean tryOnly;
123      if (addition == null) {
124	  tryMatch = mymatch;
125	  tryOnly = false;
126      }
127      else {
128	  tryMatch = (REMatch) mymatch.clone();
129	  tryOnly = true;
130      }
131      boolean b = negative ?
132        matchN(input, tryMatch, tryOnly) :
133        matchP(input, tryMatch, tryOnly);
134      if (addition == null) return b;
135
136      Stack stack = new Stack();
137      stack.push(new Boolean(b));
138      for (int i=0; i < addition.size(); i++) {
139	Object obj = addition.elementAt(i);
140	if (obj instanceof REToken) {
141	  b = ((REToken)obj).match(input, (REMatch)mymatch.clone());
142	  stack.push(new Boolean(b));
143	}
144	else if (obj instanceof Boolean) {
145	  stack.push(obj);
146	}
147	else if (obj.equals("|")) {
148	  b = ((Boolean)stack.pop()).booleanValue();
149	  b = ((Boolean)stack.pop()).booleanValue() || b;
150	  stack.push(new Boolean(b));
151	}
152	else if (obj.equals("&")) {
153	  b = ((Boolean)stack.pop()).booleanValue();
154	  b = ((Boolean)stack.pop()).booleanValue() && b;
155	  stack.push(new Boolean(b));
156	}
157	else {
158	  throw new RuntimeException("Invalid object found");
159	}
160      }
161      b = ((Boolean)stack.pop()).booleanValue();
162      if (b) {
163        ++mymatch.index;
164        return next(input, mymatch);
165      }
166      return false;
167    }
168
169    private boolean matchN(CharIndexed input, REMatch mymatch, boolean tryOnly) {
170      if (input.charAt(mymatch.index) == CharIndexed.OUT_OF_BOUNDS) 
171        return false;
172
173      REMatch newMatch = null;
174      REMatch last = null;
175      REToken tk;
176      for (int i=0; i < options.size(); i++) {
177	tk = (REToken) options.elementAt(i);
178	REMatch tryMatch = (REMatch) mymatch.clone();
179	if (tk.match(input, tryMatch)) { // match was successful
180	    return false;
181	} // is a match
182      } // try next option
183
184      if (tryOnly) return true;
185      ++mymatch.index;
186      return next(input, mymatch);
187    }
188
189    private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) {
190      boolean stopMatchingIfSatisfied =
191	  (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
192      REMatch.REMatchList newMatch = new REMatch.REMatchList();
193      REToken tk;
194      for (int i=0; i < options.size(); i++) {
195	// In order that the backtracking can work,
196	// each option must be chained to the next token.
197	// But the chain method has some side effect, so
198	// we use clones.
199	tk = (REToken)((REToken) options.elementAt(i)).clone();
200	if (! tryOnly) {
201	  tk.chain(this.next);
202	  tk.setUncle(this.uncle);
203	  tk.subIndex = this.subIndex;
204        }
205	REMatch tryMatch = (REMatch) mymatch.clone();
206	if (tk.match(input, tryMatch)) { // match was successful
207	  if (tryOnly) return true;
208	  newMatch.addTail(tryMatch);
209	  if (stopMatchingIfSatisfied) break;
210	} // is a match
211      } // try next option
212      if (tryOnly) return false;
213
214      if (newMatch.head != null) {
215	  // set contents of mymatch equal to newMatch
216
217	  // try each one that matched
218	  mymatch.assignFrom(newMatch.head);
219	  return true;
220      } else {
221	  return false;
222      }
223    }
224
225  void dump(StringBuffer os) {
226    os.append(negative ? "[^" : "(?:");
227    for (int i = 0; i < options.size(); i++) {
228      if (!negative && (i > 0)) os.append('|');
229      ((REToken) options.elementAt(i)).dumpAll(os);
230    }
231    os.append(negative ? ']' : ')');
232  }  
233}