PageRenderTime 116ms CodeModel.GetById 99ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/jEdit/tags/jedit-4-2-pre14/gnu/regexp/RESyntax.java

#
Java | 502 lines | 180 code | 81 blank | 241 comment | 3 complexity | 53fdb50ff98add51b9ff71eb42db3733 MD5 | raw file
  1/*
  2 *  gnu/regexp/RESyntax.java
  3 *  Copyright (C) 1998-2002 Wes Biggs
  4 *
  5 *  This library is free software; you can redistribute it and/or modify
  6 *  it under the terms of the GNU Lesser General Public License as published
  7 *  by the Free Software Foundation; either version 2.1 of the License, or
  8 *  (at your option) any later version.
  9 *
 10 *  This library is distributed in the hope that it will be useful,
 11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 *  GNU Lesser General Public License for more details.
 14 *
 15 *  You should have received a copy of the GNU Lesser General Public License
 16 *  along with this program; if not, write to the Free Software
 17 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 18 */
 19
 20package gnu.regexp;
 21import java.io.Serializable;
 22import java.util.BitSet;
 23
 24/**
 25 * An RESyntax specifies the way a regular expression will be compiled.
 26 * This class provides a number of predefined useful constants for
 27 * emulating popular regular expression syntaxes.  Additionally the
 28 * user may construct his or her own syntax, using any combination of the
 29 * syntax bit constants.  The syntax is an optional argument to any of the
 30 * matching methods on class RE.
 31 *
 32 * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
 33 */
 34
 35public final class RESyntax implements Serializable {
 36    static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator");
 37
 38    private static final String SYNTAX_IS_FINAL = RE.getLocalizedMessage("syntax.final");
 39
 40    private BitSet bits;
 41
 42    // true for the constant defined syntaxes
 43    private boolean isFinal = false;
 44
 45    private String lineSeparator = DEFAULT_LINE_SEPARATOR;
 46
 47  // Values for constants are bit indexes
 48
 49  /**
 50   * Syntax bit. Backslash is an escape character in lists.
 51   */
 52  public static final int RE_BACKSLASH_ESCAPE_IN_LISTS =  0;
 53
 54  /**
 55   * Syntax bit. Use \? instead of ? and \+ instead of +.
 56   */
 57  public static final int RE_BK_PLUS_QM                =  1;
 58
 59  /**
 60   * Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
 61   */
 62  public static final int RE_CHAR_CLASSES              =  2;
 63
 64  /**
 65   * Syntax bit. ^ and $ are special everywhere.
 66   * <B>Not implemented.</B>
 67   */
 68  public static final int RE_CONTEXT_INDEP_ANCHORS     =  3; 
 69
 70  /**
 71   * Syntax bit. Repetition operators are only special in valid positions.
 72   * <B>Not implemented.</B>
 73   */
 74  public static final int RE_CONTEXT_INDEP_OPS         =  4; 
 75
 76  /**
 77   * Syntax bit. Repetition and alternation operators are invalid
 78   * at start and end of pattern and other places. 
 79   * <B>Not implemented</B>.
 80   */
 81  public static final int RE_CONTEXT_INVALID_OPS       =  5; 
 82
 83  /**
 84   * Syntax bit. Match-any-character operator (.) matches a newline.
 85   */
 86  public static final int RE_DOT_NEWLINE               =  6;
 87
 88  /**
 89   * Syntax bit. Match-any-character operator (.) does not match a null.
 90   */
 91  public static final int RE_DOT_NOT_NULL              =  7;
 92
 93  /**
 94   * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
 95   */
 96  public static final int RE_INTERVALS                 =  8;
 97
 98  /**
 99   * Syntax bit. No alternation (|), match one-or-more (+), or 
100   * match zero-or-one (?) operators.
101   */
102  public static final int RE_LIMITED_OPS               =  9;
103
104  /**
105   * Syntax bit. Newline is an alternation operator.
106   */
107  public static final int RE_NEWLINE_ALT               = 10; // impl.
108
109  /**
110   * Syntax bit. Intervals use { } instead of \{ \}
111   */
112  public static final int RE_NO_BK_BRACES              = 11; 
113
114  /**
115   * Syntax bit. Grouping uses ( ) instead of \( \).
116   */
117  public static final int RE_NO_BK_PARENS              = 12;
118
119  /**
120   * Syntax bit. Backreferences not allowed.
121   */
122  public static final int RE_NO_BK_REFS                = 13;
123
124  /**
125   * Syntax bit. Alternation uses | instead of \|
126   */
127  public static final int RE_NO_BK_VBAR                = 14;
128
129  /**
130   * Syntax bit. <B>Not implemented</B>.
131   */
132  public static final int RE_NO_EMPTY_RANGES           = 15;
133
134  /**
135   * Syntax bit. An unmatched right parenthesis (')' or '\)', depending
136   * on RE_NO_BK_PARENS) will throw an exception when compiling.
137   */
138  public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
139
140  /**
141   * Syntax bit. <B>Not implemented.</B>
142   */
143  public static final int RE_HAT_LISTS_NOT_NEWLINE     = 17;
144
145  /**
146   * Syntax bit.  Stingy matching is allowed (+?, *?, ??, {x,y}?).
147   */
148  public static final int RE_STINGY_OPS                = 18;
149
150  /**
151   * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
152   */
153  public static final int RE_CHAR_CLASS_ESCAPES        = 19;
154
155  /**
156   * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
157   */
158  public static final int RE_PURE_GROUPING             = 20;
159
160  /**
161   * Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
162   * to the text following the current position without consuming that text.
163   */
164  public static final int RE_LOOKAHEAD                 = 21;
165
166  /**
167   * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
168   */
169  public static final int RE_STRING_ANCHORS            = 22;
170
171  /**
172   * Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
173   */
174  public static final int RE_COMMENTS                  = 23;
175
176  /**
177   * Syntax bit. Allow character class escapes within lists, as in Perl5.
178   */
179  public static final int RE_CHAR_CLASS_ESC_IN_LISTS   = 24;
180
181  private static final int BIT_TOTAL                   = 25;
182
183  /**
184   * Predefined syntax.
185   * Emulates regular expression support in the awk utility.
186   */
187  public static final RESyntax RE_SYNTAX_AWK;
188
189  /**
190   * Predefined syntax.
191   * Emulates regular expression support in the ed utility.
192   */
193  public static final RESyntax RE_SYNTAX_ED;
194
195  /**
196   * Predefined syntax.
197   * Emulates regular expression support in the egrep utility.
198   */
199  public static final RESyntax RE_SYNTAX_EGREP;
200
201  /**
202   * Predefined syntax.
203   * Emulates regular expression support in the GNU Emacs editor.
204   */
205  public static final RESyntax RE_SYNTAX_EMACS;
206
207  /**
208   * Predefined syntax.
209   * Emulates regular expression support in the grep utility.
210   */
211  public static final RESyntax RE_SYNTAX_GREP;
212
213  /**
214   * Predefined syntax.
215   * Emulates regular expression support in the POSIX awk specification.
216   */
217  public static final RESyntax RE_SYNTAX_POSIX_AWK;
218
219  /**
220   * Predefined syntax.
221   * Emulates POSIX basic regular expression support.
222   */
223  public static final RESyntax RE_SYNTAX_POSIX_BASIC;
224
225  /**
226   * Predefined syntax.
227   * Emulates regular expression support in the POSIX egrep specification.
228   */
229  public static final RESyntax RE_SYNTAX_POSIX_EGREP;
230
231  /**
232   * Predefined syntax.
233   * Emulates POSIX extended regular expression support.
234   */
235  public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
236
237  /**
238   * Predefined syntax.
239   * Emulates POSIX basic minimal regular expressions.
240   */
241  public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
242
243  /**
244   * Predefined syntax.
245   * Emulates POSIX extended minimal regular expressions.
246   */
247  public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
248
249  /**
250   * Predefined syntax.
251   * Emulates regular expression support in the sed utility.
252   */
253  public static final RESyntax RE_SYNTAX_SED;
254
255  /**
256   * Predefined syntax.
257   * Emulates regular expression support in Larry Wall's perl, version 4,
258   */
259  public static final RESyntax RE_SYNTAX_PERL4;
260
261  /**
262   * Predefined syntax.
263   * Emulates regular expression support in Larry Wall's perl, version 4,
264   * using single line mode (/s modifier).
265   */
266  public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
267
268  /**
269   * Predefined syntax.
270   * Emulates regular expression support in Larry Wall's perl, version 5.
271   */
272  public static final RESyntax RE_SYNTAX_PERL5;  
273
274  /**
275   * Predefined syntax.
276   * Emulates regular expression support in Larry Wall's perl, version 5,
277   * using single line mode (/s modifier).
278   */
279  public static final RESyntax RE_SYNTAX_PERL5_S;
280
281    /**
282     * Predefined syntax.
283     * Emulates regular expression support in Java 1.4's java.util.regex
284     * package.
285     */
286    public static final RESyntax RE_SYNTAX_JAVA_1_4;
287
288  static {
289      // Define syntaxes
290      
291      RE_SYNTAX_EMACS = new RESyntax().makeFinal();
292      
293      RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax()
294	  .set(RE_CHAR_CLASSES)
295	  .set(RE_DOT_NEWLINE)
296	  .set(RE_DOT_NOT_NULL)
297	  .set(RE_INTERVALS)
298	  .set(RE_NO_EMPTY_RANGES)
299	  .makeFinal();
300      
301      RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
302	  .set(RE_BK_PLUS_QM)
303	  .makeFinal();
304      
305      RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
306	  .set(RE_CONTEXT_INDEP_ANCHORS)
307	  .set(RE_CONTEXT_INDEP_OPS)
308	  .set(RE_NO_BK_BRACES)
309	  .set(RE_NO_BK_PARENS)
310	  .set(RE_NO_BK_VBAR)
311	  .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
312	  .makeFinal();
313
314      RE_SYNTAX_AWK = new RESyntax()
315	  .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
316	  .set(RE_DOT_NOT_NULL)
317	  .set(RE_NO_BK_PARENS)
318	  .set(RE_NO_BK_REFS)
319	  .set(RE_NO_BK_VBAR)
320	  .set(RE_NO_EMPTY_RANGES)
321	  .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
322	  .makeFinal();
323      
324      RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED)
325	  .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
326	  .makeFinal();
327      
328      RE_SYNTAX_GREP = new RESyntax()
329	  .set(RE_BK_PLUS_QM)
330	  .set(RE_CHAR_CLASSES)
331	  .set(RE_HAT_LISTS_NOT_NEWLINE)
332	  .set(RE_INTERVALS)
333	  .set(RE_NEWLINE_ALT)
334	  .makeFinal();
335      
336      RE_SYNTAX_EGREP = new RESyntax()
337	  .set(RE_CHAR_CLASSES)
338	  .set(RE_CONTEXT_INDEP_ANCHORS)
339	  .set(RE_CONTEXT_INDEP_OPS)
340	  .set(RE_HAT_LISTS_NOT_NEWLINE)
341	  .set(RE_NEWLINE_ALT)
342	  .set(RE_NO_BK_PARENS)
343	  .set(RE_NO_BK_VBAR)
344	  .makeFinal();
345    
346      RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP)
347	  .set(RE_INTERVALS)
348	  .set(RE_NO_BK_BRACES)
349	  .makeFinal();
350    
351      /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
352    
353      RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
354	  .makeFinal();
355    
356      RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
357	  .makeFinal();
358      
359      RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
360	  .set(RE_LIMITED_OPS)
361	  .makeFinal();
362      
363      /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
364	 replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
365      
366      RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
367	  .set(RE_CONTEXT_INDEP_ANCHORS)
368	  .set(RE_CONTEXT_INVALID_OPS)
369	  .set(RE_NO_BK_BRACES)
370	  .set(RE_NO_BK_PARENS)
371	  .set(RE_NO_BK_REFS)
372	  .set(RE_NO_BK_VBAR)
373	  .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
374	  .makeFinal();
375      
376      /* There is no official Perl spec, but here's a "best guess" */
377      
378      RE_SYNTAX_PERL4 = new RESyntax()
379	  .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
380	  .set(RE_CONTEXT_INDEP_ANCHORS)
381	  .set(RE_CONTEXT_INDEP_OPS)          // except for '{', apparently
382	  .set(RE_INTERVALS)
383	  .set(RE_NO_BK_BRACES)
384	  .set(RE_NO_BK_PARENS)
385	  .set(RE_NO_BK_VBAR)
386	  .set(RE_NO_EMPTY_RANGES)
387	  .set(RE_CHAR_CLASS_ESCAPES)    // \d,\D,\w,\W,\s,\S
388	  .makeFinal();
389      
390      RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4)
391	  .set(RE_DOT_NEWLINE)
392	  .makeFinal();
393      
394      RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4)
395	  .set(RE_PURE_GROUPING)          // (?:)
396	  .set(RE_STINGY_OPS)             // *?,??,+?,{}?
397	  .set(RE_LOOKAHEAD)              // (?=)(?!)
398	  .set(RE_STRING_ANCHORS)         // \A,\Z
399	  .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
400	  .set(RE_COMMENTS)              // (?#)
401	  .makeFinal();
402      
403      RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
404	  .set(RE_DOT_NEWLINE)
405	  .makeFinal();
406
407      RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
408	  // XXX
409	  .makeFinal();
410  }
411
412  /**
413   * Construct a new syntax object with all bits turned off.
414   * This is equivalent to RE_SYNTAX_EMACS.
415   */
416  public RESyntax() {
417    bits = new BitSet(BIT_TOTAL);
418  }
419
420    /**
421     * Called internally when constructing predefined syntaxes
422     * so their interpretation cannot vary.  Conceivably useful
423     * for your syntaxes as well.  Causes IllegalAccessError to
424     * be thrown if any attempt to modify the syntax is made.
425     *
426     * @return this object for convenient chaining
427     */
428    public RESyntax makeFinal() {
429	isFinal = true;
430	return this;
431    }
432
433  /**
434   * Construct a new syntax object with all bits set the same 
435   * as the other syntax.
436   */
437  public RESyntax(RESyntax other) {
438    bits = (BitSet) other.bits.clone();
439  }
440
441  /**
442   * Check if a given bit is set in this syntax.
443   */
444  public boolean get(int index) {
445    return bits.get(index);
446  }
447
448  /**
449   * Set a given bit in this syntax. 
450   *
451   * @param index the constant (RESyntax.RE_xxx) bit to set.
452   * @return a reference to this object for easy chaining.
453   */
454  public RESyntax set(int index) {
455      if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
456    bits.set(index);
457    return this;
458  }
459
460  /**
461   * Clear a given bit in this syntax. 
462   *
463   * @param index the constant (RESyntax.RE_xxx) bit to clear.
464   * @return a reference to this object for easy chaining.
465   */
466  public RESyntax clear(int index) {
467      if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
468      bits.clear(index);
469      return this;
470  }
471
472    /**
473     * Changes the line separator string for regular expressions
474     * created using this RESyntax.  The default separator is the
475     * value returned by the system property "line.separator", which
476     * should be correct when reading platform-specific files from a
477     * filesystem.  However, many programs may collect input from
478     * sources where the line separator is differently specified (for
479     * example, in the applet environment, the text box widget
480     * interprets line breaks as single-character newlines,
481     * regardless of the host platform.
482     *
483     * Note that setting the line separator to a character or
484     * characters that have specific meaning within the current syntax
485     * can cause unexpected chronosynclastic infundibula.
486     *
487     * @return this object for convenient chaining 
488     */
489    public RESyntax setLineSeparator(String aSeparator) {
490	if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
491	lineSeparator = aSeparator;
492	return this;
493    }
494
495    /**
496     * Returns the currently active line separator string.  The default
497     * is the platform-dependent system property "line.separator".
498     */
499    public String getLineSeparator() {
500	return lineSeparator;
501    }
502}