PageRenderTime 49ms CodeModel.GetById 31ms app.highlight 13ms RepoModel.GetById 2ms app.codeStats 0ms

/interpreter/tags/at2-build270707/src/edu/vub/util/regexp/REMatch.java

http://ambienttalk.googlecode.com/
Java | 319 lines | 139 code | 26 blank | 154 comment | 32 complexity | 9a9767241205c6df74f4daef0c63ea92 MD5 | raw file
  1/* gnu/regexp/REMatch.java
  2   Copyright (C) 2006 Free Software Foundation, Inc.
  3
  4This file is part of GNU Classpath.
  5
  6GNU Classpath is free software; you can redistribute it and/or modify
  7it under the terms of the GNU General Public License as published by
  8the Free Software Foundation; either version 2, or (at your option)
  9any later version.
 10
 11GNU Classpath is distributed in the hope that it will be useful, but
 12WITHOUT ANY WARRANTY; without even the implied warranty of
 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14General Public License for more details.
 15
 16You should have received a copy of the GNU General Public License
 17along with GNU Classpath; see the file COPYING.  If not, write to the
 18Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 1902110-1301 USA.
 20
 21Linking this library statically or dynamically with other modules is
 22making a combined work based on this library.  Thus, the terms and
 23conditions of the GNU General Public License cover the whole
 24combination.
 25
 26As a special exception, the copyright holders of this library give you
 27permission to link this library with independent modules to produce an
 28executable, regardless of the license terms of these independent
 29modules, and to copy and distribute the resulting executable under
 30terms of your choice, provided that you also meet, for each linked
 31independent module, the terms and conditions of the license of that
 32module.  An independent module is a module which is not derived from
 33or based on this library.  If you modify this library, you may extend
 34this exception to your version of the library, but you are not
 35obligated to do so.  If you do not wish to do so, delete this
 36exception statement from your version. */
 37
 38
 39package edu.vub.util.regexp;
 40import java.io.Serializable;
 41
 42/**
 43 * An instance of this class represents a match
 44 * completed by a edu.vub.util.regexp matching function. It can be used
 45 * to obtain relevant information about the location of a match
 46 * or submatch.
 47 *
 48 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
 49 */
 50public final class REMatch implements Serializable, Cloneable {
 51    private String matchedText;
 52
 53    // These variables are package scope for fast access within the engine
 54    int eflags; // execution flags this match was made using
 55
 56    // Offset in source text where match was tried.  This is zero-based;
 57    // the actual position in the source text is given by (offset + anchor).
 58    int offset;
 59
 60    // Anchor position refers to the index into the source input
 61    // at which the matching operation began.
 62    // This is also useful for the ANCHORINDEX option.
 63    int anchor;
 64
 65    // Package scope; used by RE.
 66    int index; // used while matching to mark current match position in input
 67    int[] start; // start positions (relative to offset) for each (sub)exp.
 68    int[] end;   // end positions for the same
 69    REMatch next; // other possibility (to avoid having to use arrays)
 70    boolean empty; // empty string matched. This flag is used only within
 71		   // RETokenRepeated.
 72    int matchFlags; // flags passed to match methods
 73    static final int MF_FIND_ALL = 0x01;
 74
 75    public Object clone() {
 76	try {
 77	    REMatch copy = (REMatch) super.clone();
 78	    copy.next = null;
 79
 80	    copy.start = (int[]) start.clone();
 81	    copy.end = (int[]) end.clone();
 82
 83	    return copy;
 84	} catch (CloneNotSupportedException e) {
 85	    throw new Error(); // doesn't happen
 86	}
 87    }
 88
 89    void assignFrom(REMatch other) {
 90	start = other.start;
 91	end = other.end;
 92	index = other.index;
 93	// need to deep clone?
 94	next = other.next;
 95    }
 96
 97    REMatch(int subs, int anchor, int eflags) {
 98	start = new int[subs+1];
 99	end = new int[subs+1];
100	this.anchor = anchor;
101	this.eflags = eflags;
102	clear(anchor);
103    }
104
105    void finish(CharIndexed text) {
106	start[0] = 0;
107	StringBuffer sb = new StringBuffer();
108	int i;
109	for (i = 0; i < end[0]; i++)
110	    sb.append(text.charAt(i));
111	matchedText = sb.toString();
112	for (i = 0; i < start.length; i++) {
113	    // If any subexpressions didn't terminate, they don't count
114	    // TODO check if this code ever gets hit
115	    if ((start[i] == -1) ^ (end[i] == -1)) {
116		start[i] = -1;
117		end[i] = -1;
118	    }
119	}
120	next = null; // cut off alternates
121    }
122    
123    /** Clears the current match and moves the offset to the new index. */
124    void clear(int index) {
125	offset = index;
126	this.index = 0;
127	for (int i = 0; i < start.length; i++) {
128	    start[i] = end[i] = -1;
129	}
130	next = null; // cut off alternates
131    }
132    
133    /**
134     * Returns the string matching the pattern.  This makes it convenient
135     * to write code like the following:
136     * <P>
137     * <code> 
138     * REMatch myMatch = myExpression.getMatch(myString);<br>
139     * if (myMatch != null) System.out.println("Regexp found: "+myMatch);
140     * </code>
141     */
142    public String toString() {
143	return matchedText;
144    }
145    
146    /**
147     * Returns the index within the input text where the match in its entirety
148     * began.
149     */
150    public int getStartIndex() {
151	return offset + start[0];
152    }
153    
154    /**
155     * Returns the index within the input string where the match in
156     * its entirety ends.  The return value is the next position after
157     * the end of the string; therefore, a match created by the
158     * following call:
159     *
160     * <P>
161     * <code>REMatch myMatch = myExpression.getMatch(myString);</code>
162     * <P>
163     * can be viewed (given that myMatch is not null) by creating
164     * <P>
165     * <code>String theMatch = myString.substring(myMatch.getStartIndex(),
166     * myMatch.getEndIndex());</code>
167     * <P>
168     * But you can save yourself that work, since the <code>toString()</code>
169     * method (above) does exactly that for you.  
170     */
171    public int getEndIndex() {
172	return offset + end[0];
173    }
174  
175    /**
176     * Returns the string matching the given subexpression.  The subexpressions
177     * are indexed starting with one, not zero.  That is, the subexpression
178     * identified by the first set of parentheses in a regular expression
179     * could be retrieved from an REMatch by calling match.toString(1).
180     *
181     * @param sub Index of the subexpression.
182     */
183    public String toString(int sub) {
184	if ((sub >= start.length) || sub < 0)
185	    throw new IndexOutOfBoundsException("No group " + sub);
186	if (start[sub] == -1) return null;
187	return (matchedText.substring(start[sub],end[sub]));
188    }
189    
190    /** 
191     * Returns the index within the input string used to generate this match
192     * where subexpression number <i>sub</i> begins, or <code>-1</code> if
193     * the subexpression does not exist.  The initial position is zero.
194     *
195     * @param sub Subexpression index
196     * @deprecated Use getStartIndex(int) instead.
197     */
198    public int getSubStartIndex(int sub) {
199	if (sub >= start.length) return -1;
200	int x = start[sub];
201	return (x == -1) ? x : offset + x;
202    }
203    
204    /** 
205     * Returns the index within the input string used to generate this match
206     * where subexpression number <i>sub</i> begins, or <code>-1</code> if
207     * the subexpression does not exist.  The initial position is zero.
208     *
209     * @param sub Subexpression index
210     * @since edu.vub.util.regexp 1.1.0
211     */
212    public int getStartIndex(int sub) {
213	if (sub >= start.length) return -1;
214	int x = start[sub];
215	return (x == -1) ? x : offset + x;
216    }
217  
218    /** 
219     * Returns the index within the input string used to generate this match
220     * where subexpression number <i>sub</i> ends, or <code>-1</code> if
221     * the subexpression does not exist.  The initial position is zero.
222     *
223     * @param sub Subexpression index
224     * @deprecated Use getEndIndex(int) instead
225     */
226    public int getSubEndIndex(int sub) {
227	if (sub >= start.length) return -1;
228	int x = end[sub];
229	return (x == -1) ? x : offset + x;
230    }
231    
232    /** 
233     * Returns the index within the input string used to generate this match
234     * where subexpression number <i>sub</i> ends, or <code>-1</code> if
235     * the subexpression does not exist.  The initial position is zero.
236     *
237     * @param sub Subexpression index
238     */
239    public int getEndIndex(int sub) {
240	if (sub >= start.length) return -1;
241	int x = end[sub];
242	return (x == -1) ? x : offset + x;
243    }
244    
245    /**
246     * Substitute the results of this match to create a new string.
247     * This is patterned after PERL, so the tokens to watch out for are
248     * <code>$0</code> through <code>$9</code>.  <code>$0</code> matches
249     * the full substring matched; <code>$<i>n</i></code> matches
250     * subexpression number <i>n</i>.
251     * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
252     * if such subexpressions exist.
253     *
254     * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
255     */
256    public String substituteInto(String input) {
257	// a la Perl, $0 is whole thing, $1 - $9 are subexpressions
258	StringBuffer output = new StringBuffer();
259	int pos;
260	for (pos = 0; pos < input.length()-1; pos++) {
261	    if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
262		int val = Character.digit(input.charAt(++pos),10);
263		int pos1 = pos + 1;
264		while (pos1 < input.length() &&
265		       Character.isDigit(input.charAt(pos1))) {
266		    int val1 = val*10 + Character.digit(input.charAt(pos1),10);
267		    if (val1 >= start.length) break;
268		    pos1++;
269		    val = val1;
270		}
271		pos = pos1 - 1;
272
273		if (val < start.length) {
274		    output.append(toString(val));
275		} 
276	    } else output.append(input.charAt(pos));
277	}
278	if (pos < input.length()) output.append(input.charAt(pos));
279	return output.toString();
280    }
281
282    static class REMatchList {
283        REMatch head;
284	REMatch tail;
285        REMatchList() {
286	    head = tail = null;
287	}
288	/* Not used now. But we may need this some day?
289	void addHead(REMatch newone) {
290            if (head == null) {
291                head = newone;
292                tail = newone;
293                while (tail.next != null) {
294                    tail = tail.next;
295                }
296            }
297	    else {
298                REMatch tmp = newone;
299                while (tmp.next != null) tmp = tmp.next;
300                tmp.next = head;
301	        head = newone;
302	    }
303	}
304	*/
305	void addTail(REMatch newone) {
306            if (head == null) {
307                head = newone;
308                tail = newone;
309            }
310            else {
311                tail.next = newone;
312            }
313            while (tail.next != null) {
314                tail = tail.next;
315            }
316	}
317    }
318
319}