PageRenderTime 67ms CodeModel.GetById 17ms app.highlight 44ms RepoModel.GetById 1ms app.codeStats 0ms

/interpreter/tags/at2dist170907/src/edu/vub/util/regexp/RETokenRepeated.java

http://ambienttalk.googlecode.com/
Java | 330 lines | 212 code | 48 blank | 70 comment | 79 complexity | ed07b97f569621ca384b513a2b54737c MD5 | raw file
  1/* gnu/regexp/RETokenRepeated.java
  2   Copyright (C) 2006 Free Software Foundation, Inc.
  3
  4This file is part of GNU Classpath.
  5
  6GNU Classpath is free software; you can redistribute it and/or modify
  7it under the terms of the GNU General Public License as published by
  8the Free Software Foundation; either version 2, or (at your option)
  9any later version.
 10
 11GNU Classpath is distributed in the hope that it will be useful, but
 12WITHOUT ANY WARRANTY; without even the implied warranty of
 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14General Public License for more details.
 15
 16You should have received a copy of the GNU General Public License
 17along with GNU Classpath; see the file COPYING.  If not, write to the
 18Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 1902110-1301 USA.
 20
 21Linking this library statically or dynamically with other modules is
 22making a combined work based on this library.  Thus, the terms and
 23conditions of the GNU General Public License cover the whole
 24combination.
 25
 26As a special exception, the copyright holders of this library give you
 27permission to link this library with independent modules to produce an
 28executable, regardless of the license terms of these independent
 29modules, and to copy and distribute the resulting executable under
 30terms of your choice, provided that you also meet, for each linked
 31independent module, the terms and conditions of the license of that
 32module.  An independent module is a module which is not derived from
 33or based on this library.  If you modify this library, you may extend
 34this exception to your version of the library, but you are not
 35obligated to do so.  If you do not wish to do so, delete this
 36exception statement from your version. */
 37
 38
 39package edu.vub.util.regexp;
 40
 41final class RETokenRepeated extends REToken {
 42    private REToken token;
 43    private int min,max;
 44    private boolean stingy;
 45    private boolean possessive;
 46    
 47    RETokenRepeated(int subIndex, REToken token, int min, int max) {
 48	super(subIndex);
 49	this.token = token;
 50	this.min = min;
 51	this.max = max;
 52    }
 53
 54    /** Sets the minimal matching mode to true. */
 55    void makeStingy() {
 56	stingy = true;
 57    }
 58    
 59    /** Queries if this token has minimal matching enabled. */
 60    boolean isStingy() {
 61	return stingy;
 62    }
 63
 64    /** Sets possessive matching mode to true. */
 65    void makePossessive() {
 66        possessive = true;
 67    }
 68
 69    /** Queries if this token has possessive matching enabled. */
 70    boolean isPossessive() {
 71        return possessive;
 72    }
 73    
 74    /**
 75     * The minimum length of a repeated token is the minimum length
 76     * of the token multiplied by the minimum number of times it must
 77     * match.
 78     */
 79    int getMinimumLength() {
 80	return (min * token.getMinimumLength());
 81    }
 82
 83    int getMaximumLength() {
 84        if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
 85	int tmax = token.getMaximumLength();
 86	if (tmax == Integer.MAX_VALUE) return tmax;
 87	return (max * tmax);
 88    }
 89
 90    private static REMatch findDoables(REToken tk,
 91			CharIndexed input, REMatch mymatch) {
 92
 93	    REMatch.REMatchList doables = new REMatch.REMatchList();
 94
 95	    // try next repeat at all possible positions
 96	    for (REMatch current = mymatch;
 97		 current != null; current = current.next) {
 98		REMatch recurrent = (REMatch) current.clone();
 99		int origin = recurrent.index;
100		tk = (REToken) tk.clone();
101		tk.next = tk.uncle = null;
102		recurrent.matchFlags |= REMatch.MF_FIND_ALL;
103		if (tk.match(input, recurrent)) {
104		    for (REMatch m = recurrent; m != null; m = m.next) {
105			m.matchFlags &= ~REMatch.MF_FIND_ALL;
106		    }
107		    if (recurrent.index == origin) recurrent.empty = true;
108		    // add all items in current to doables array
109		    doables.addTail(recurrent);
110		}
111	    }
112	    return doables.head;
113    }
114
115    // We do need to save every possible point, but the number of clone()
116    // invocations here is really a killer for performance on non-stingy
117    // repeat operators.  I'm open to suggestions...
118
119    // Hypothetical question: can you have a RE that matches 1 times,
120    // 3 times, 5 times, but not 2 times or 4 times?  Does having
121    // the subexpression back-reference operator allow that?
122
123    boolean match(CharIndexed input, REMatch mymatch) {
124
125        boolean stopMatchingIfSatisfied =
126		(mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
127
128	REMatch newMatch = matchMinimum(input, mymatch);
129	if (newMatch == null) return false;
130
131	// Array of positions we have already visited
132	int[] visited = initVisited();
133	for (REMatch m = newMatch; m != null; m = m.next) {
134	    visited = addVisited(m.index, visited);
135	}
136
137	int max1 = decreaseMax(max, min);
138
139	newMatch = _match(input, newMatch, max1,
140	    stopMatchingIfSatisfied, visited);
141	if (newMatch != null) {
142	    mymatch.assignFrom(newMatch);
143	    return true;
144	}
145	return false;
146    }
147
148    private static int decreaseMax(int m, int n) {
149        if (m == Integer.MAX_VALUE) return m;
150	return m - n;
151    }
152
153    // Array visited is an array of character positions we have already
154    // visited. visited[0] is used to store the effective length of the
155    // array.
156    private static int[] initVisited() {
157	int[] visited = new int[32];
158	visited[0] = 0;
159	return visited;
160    }
161
162    private static boolean visitedContains(int n, int[] visited) {
163	// Experience tells that for a small array like this,
164	// simple linear search is faster than binary search.
165	for (int i = 1; i < visited[0]; i++) {
166	    if (n == visited[i]) return true;
167	}
168	return false;
169    }
170
171    private static int[] addVisited(int n, int[] visited) {
172	if (visitedContains(n, visited)) return visited;
173	if (visited[0] >= visited.length - 1) {
174	    int[] newvisited = new int[visited.length + 32];
175	    System.arraycopy(visited, 0, newvisited, 0, visited.length);
176	    visited = newvisited;
177	}
178	visited[0]++;
179	visited[visited[0]] = n;
180	return visited;
181    }
182
183    private REMatch _match(CharIndexed input, REMatch mymatch,
184    	    int max1, boolean stopMatchingIfSatisfied,
185	    int[] visited) {
186
187        if (max1 == 0) {
188	    return matchRest(input, mymatch);
189	}
190	max1 = decreaseMax(max1, 1);
191
192	REMatch.REMatchList allResults = new REMatch.REMatchList();
193
194	// Depth-first search
195
196	for (REMatch cur = mymatch; cur != null; cur = cur.next) {
197
198	    REMatch cur1 = (REMatch) cur.clone();
199
200	    if (stingy) {
201	        REMatch results = matchRest(input, cur1);
202	        if (results != null) {
203	            if (stopMatchingIfSatisfied) {
204		        return results;
205		    }
206		    allResults.addTail(results);
207	        }
208	    }
209
210	    DO_THIS:
211	    do {
212
213	    boolean emptyMatchFound = false;
214	    REMatch doables = findDoables(token, input, cur1);
215	    if (doables == null) break DO_THIS;
216	    if (doables.empty) emptyMatchFound = true;
217
218	    if (!emptyMatchFound) {
219	        REMatch.REMatchList list = new REMatch.REMatchList();
220	        for (REMatch m = doables; m != null; m = m.next) {
221	            REMatch m1 = (REMatch) m.clone();
222		    int n = m1.index;
223		    if (! visitedContains(n, visited)) {
224		        visited = addVisited(n, visited);
225		        list.addTail(m1);
226		    }
227	        }
228	        if (list.head == null) break DO_THIS;
229	        doables = list.head;
230	    }
231
232	    for (REMatch m = doables; m != null; m = m.next) {
233	        if (! emptyMatchFound) {
234	            REMatch m1 = _match(input, m, max1,
235		        stopMatchingIfSatisfied, visited);
236		    if (possessive) return m1;
237		    if (m1 != null) {
238	                if (stopMatchingIfSatisfied) {
239		            return m1;
240		        }
241		        allResults.addTail(m1);
242		    }
243	        }
244		else {
245		    REMatch m1 = matchRest(input, m);
246		    if (m1 != null) {
247		        if (stopMatchingIfSatisfied) {
248		            return m1;
249		        }
250		        allResults.addTail(m1);
251		    }
252		}
253	    }
254
255	    } while (false); // DO_THIS only once;
256
257	    // This point itself is a candidate.
258	    if (!stingy) {
259		REMatch m2 = matchRest(input, cur1);
260		if (m2 != null) {
261		    if (stopMatchingIfSatisfied) {
262		        return m2;
263		    }
264		    allResults.addTail(m2);
265	        }
266	    }
267	}
268
269        return allResults.head;
270    }
271
272    private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) {
273	// Possible positions for the next repeat to match at
274	REMatch newMatch = mymatch;
275
276	// number of times we've matched so far
277	int numRepeats = 0; 
278	
279	while (numRepeats < min) {
280	    REMatch doables = findDoables(token, input, newMatch);
281
282	    // if none of the possibilities worked out, 
283	    // it means that minimum number of repeats could not be found.
284	    if (doables == null) return null;
285	    
286	    // reassign where the next repeat can match
287	    newMatch = doables;
288	    
289	    // increment how many repeats we've successfully found
290	    ++numRepeats;
291	    
292	    if (newMatch.empty) break;
293	}
294	return newMatch;
295    }
296
297    private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
298	REMatch current, single;
299	REMatch.REMatchList doneIndex = new REMatch.REMatchList();
300	// Test all possible matches for this number of repeats
301	for (current = newMatch; current != null; current = current.next) {
302	    // clone() separates a single match from the chain
303	    single = (REMatch) current.clone();
304	    if (next(input, single)) {
305		// chain results to doneIndex
306		doneIndex.addTail(single);
307	    }
308	}
309	return doneIndex.head;
310    }
311
312    void dump(StringBuffer os) {
313	os.append("(?:");
314	token.dumpAll(os);
315	os.append(')');
316	if ((max == Integer.MAX_VALUE) && (min <= 1))
317	    os.append( (min == 0) ? '*' : '+' );
318	else if ((min == 0) && (max == 1))
319	    os.append('?');
320	else {
321	    os.append('{').append(min);
322	    if (max > min) {
323		os.append(',');
324		if (max != Integer.MAX_VALUE) os.append(max);
325	    }
326	    os.append('}');
327	}
328	if (stingy) os.append('?');
329    }
330}