PageRenderTime 25ms CodeModel.GetById 2ms app.highlight 19ms RepoModel.GetById 1ms app.codeStats 0ms

/interpreter/tags/at2dist130208/src/edu/vub/util/regexp/RETokenNamedProperty.java

http://ambienttalk.googlecode.com/
Java | 311 lines | 255 code | 17 blank | 39 comment | 6 complexity | af77e9b84d789bd8c606cdcb5bce3f96 MD5 | raw file
  1/* gnu/regexp/RETokenNamedProperty.java
  2   Copyright (C) 2006 Free Software Foundation, Inc.
  3
  4This file is part of GNU Classpath.
  5
  6GNU Classpath is free software; you can redistribute it and/or modify
  7it under the terms of the GNU General Public License as published by
  8the Free Software Foundation; either version 2, or (at your option)
  9any later version.
 10
 11GNU Classpath is distributed in the hope that it will be useful, but
 12WITHOUT ANY WARRANTY; without even the implied warranty of
 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 14General Public License for more details.
 15
 16You should have received a copy of the GNU General Public License
 17along with GNU Classpath; see the file COPYING.  If not, write to the
 18Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 1902110-1301 USA.
 20
 21Linking this library statically or dynamically with other modules is
 22making a combined work based on this library.  Thus, the terms and
 23conditions of the GNU General Public License cover the whole
 24combination.
 25
 26As a special exception, the copyright holders of this library give you
 27permission to link this library with independent modules to produce an
 28executable, regardless of the license terms of these independent
 29modules, and to copy and distribute the resulting executable under
 30terms of your choice, provided that you also meet, for each linked
 31independent module, the terms and conditions of the license of that
 32module.  An independent module is a module which is not derived from
 33or based on this library.  If you modify this library, you may extend
 34this exception to your version of the library, but you are not
 35obligated to do so.  If you do not wish to do so, delete this
 36exception statement from your version. */
 37
 38
 39package edu.vub.util.regexp;
 40
 41final class RETokenNamedProperty extends REToken {
 42  String name;
 43  boolean insens;
 44  boolean negate;
 45  Handler handler;
 46
 47  // Grouped properties
 48  static final byte[] LETTER = new byte[]
 49  { Character.LOWERCASE_LETTER,
 50    Character.UPPERCASE_LETTER,
 51    Character.TITLECASE_LETTER,
 52    Character.MODIFIER_LETTER,
 53    Character.OTHER_LETTER };
 54  
 55  static final byte[] MARK = new byte[]
 56  { Character.NON_SPACING_MARK,
 57    Character.COMBINING_SPACING_MARK,
 58    Character.ENCLOSING_MARK };
 59  
 60  static final byte[] SEPARATOR = new byte[]
 61  { Character.SPACE_SEPARATOR,
 62    Character.LINE_SEPARATOR,
 63    Character.PARAGRAPH_SEPARATOR };
 64  
 65  static final byte[] SYMBOL = new byte[]
 66  { Character.MATH_SYMBOL,
 67    Character.CURRENCY_SYMBOL,
 68    Character.MODIFIER_SYMBOL,
 69    Character.OTHER_SYMBOL };
 70  
 71  static final byte[] NUMBER = new byte[]
 72  { Character.DECIMAL_DIGIT_NUMBER,
 73    Character.LETTER_NUMBER,
 74    Character.OTHER_NUMBER };
 75  
 76  static final byte[] PUNCTUATION = new byte[]
 77  { Character.DASH_PUNCTUATION,
 78    Character.START_PUNCTUATION,
 79    Character.END_PUNCTUATION,
 80    Character.CONNECTOR_PUNCTUATION,
 81    Character.OTHER_PUNCTUATION }; // ,
 82//    Character.INITIAL_QUOTE_PUNCTUATION,
 83//    Character.FINAL_QUOTE_PUNCTUATION};
 84  
 85  static final byte[] OTHER = new byte[]
 86  { Character.CONTROL,
 87    Character.FORMAT,
 88    Character.PRIVATE_USE,
 89    Character.SURROGATE,
 90    Character.UNASSIGNED };
 91
 92  RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
 93    super(subIndex);
 94    this.name = name;
 95    this.insens = insens;
 96    this.negate = negate;
 97    handler = getHandler(name); 
 98  }
 99
100    int getMinimumLength() {
101	return 1;
102    }
103
104    int getMaximumLength() {
105	return 1;
106    }
107
108    boolean match(CharIndexed input, REMatch mymatch) {
109    char ch = input.charAt(mymatch.index);
110    if (ch == CharIndexed.OUT_OF_BOUNDS)
111      return false;
112    
113    boolean retval = handler.includes(ch);
114    if (insens) {
115        retval = retval ||
116                 handler.includes(Character.toUpperCase(ch)) ||
117                 handler.includes(Character.toLowerCase(ch));
118    }
119
120    if (negate) retval = !retval;
121    if (retval) {
122	++mymatch.index;
123	return next(input, mymatch);
124    }
125    else return false;
126  }
127
128  void dump(StringBuffer os) {
129    os.append("\\")
130      .append(negate ? "P" : "p")
131      .append("{" + name + "}");
132  }
133
134  private abstract static class Handler {
135      abstract boolean includes(char c);
136  }
137
138  private Handler getHandler(String name) throws REException {
139      if (name.equals("Lower") ||
140          name.equals("Upper") ||
141          // name.equals("ASCII") ||
142          name.equals("Alpha") ||
143          name.equals("Digit") ||
144          name.equals("Alnum") ||
145          name.equals("Punct") ||
146          name.equals("Graph") ||
147          name.equals("Print") ||
148          name.equals("Blank") ||
149          name.equals("Cntrl") ||
150          name.equals("XDigit") ||
151          name.equals("Space") ) {
152         return new POSIXHandler(name);
153      }
154//      if (name.startsWith("In")) {
155//	  try {
156//	      name = name.substring(2);
157//	      Character.UnicodeBlock block = Character.UnicodeBlock.forName(name);
158//	      return new UnicodeBlockHandler(block);
159//	  }
160//	  catch (IllegalArgumentException e) {
161//              throw new REException("Invalid Unicode block name: " + name, REException.REG_ESCAPE, 0);
162//	  }
163//      }
164      if (name.startsWith("Is")) {
165          name = name.substring(2);
166      }
167
168      // "grouped properties"
169      if (name.equals("L"))
170	  return new UnicodeCategoriesHandler(LETTER);
171      if (name.equals("M"))
172	  return new UnicodeCategoriesHandler(MARK);
173      if (name.equals("Z"))
174	  return new UnicodeCategoriesHandler(SEPARATOR);
175      if (name.equals("S"))
176	  return new UnicodeCategoriesHandler(SYMBOL);
177      if (name.equals("N"))
178	  return new UnicodeCategoriesHandler(NUMBER);
179      if (name.equals("P"))
180	  return new UnicodeCategoriesHandler(PUNCTUATION);
181      if (name.equals("C"))
182	  return new UnicodeCategoriesHandler(OTHER);
183
184      if (name.equals("Mc"))
185          return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
186      if (name.equals("Pc"))
187          return new UnicodeCategoryHandler(Character.CONNECTOR_PUNCTUATION);
188      if (name.equals("Cc"))
189          return new UnicodeCategoryHandler(Character.CONTROL);
190      if (name.equals("Sc"))
191          return new UnicodeCategoryHandler(Character.CURRENCY_SYMBOL);
192      if (name.equals("Pd"))
193          return new UnicodeCategoryHandler(Character.DASH_PUNCTUATION);
194      if (name.equals("Nd"))
195          return new UnicodeCategoryHandler(Character.DECIMAL_DIGIT_NUMBER);
196      if (name.equals("Me"))
197          return new UnicodeCategoryHandler(Character.ENCLOSING_MARK);
198      if (name.equals("Pe"))
199          return new UnicodeCategoryHandler(Character.END_PUNCTUATION);
200//      if (name.equals("Pf"))
201//          return new UnicodeCategoryHandler(Character.FINAL_QUOTE_PUNCTUATION);
202      if (name.equals("Cf"))
203          return new UnicodeCategoryHandler(Character.FORMAT);
204//      if (name.equals("Pi"))
205//          return new UnicodeCategoryHandler(Character.INITIAL_QUOTE_PUNCTUATION);
206      if (name.equals("Nl"))
207          return new UnicodeCategoryHandler(Character.LETTER_NUMBER);
208      if (name.equals("Zl"))
209          return new UnicodeCategoryHandler(Character.LINE_SEPARATOR);
210      if (name.equals("Ll"))
211          return new UnicodeCategoryHandler(Character.LOWERCASE_LETTER);
212      if (name.equals("Sm"))
213          return new UnicodeCategoryHandler(Character.MATH_SYMBOL);
214      if (name.equals("Lm"))
215          return new UnicodeCategoryHandler(Character.MODIFIER_LETTER);
216      if (name.equals("Sk"))
217          return new UnicodeCategoryHandler(Character.MODIFIER_SYMBOL);
218      if (name.equals("Mn"))
219          return new UnicodeCategoryHandler(Character.NON_SPACING_MARK);
220      if (name.equals("Lo"))
221          return new UnicodeCategoryHandler(Character.OTHER_LETTER);
222      if (name.equals("No"))
223          return new UnicodeCategoryHandler(Character.OTHER_NUMBER);
224      if (name.equals("Po"))
225          return new UnicodeCategoryHandler(Character.OTHER_PUNCTUATION);
226      if (name.equals("So"))
227          return new UnicodeCategoryHandler(Character.OTHER_SYMBOL);
228      if (name.equals("Zp"))
229          return new UnicodeCategoryHandler(Character.PARAGRAPH_SEPARATOR);
230      if (name.equals("Co"))
231          return new UnicodeCategoryHandler(Character.PRIVATE_USE);
232      if (name.equals("Zs"))
233          return new UnicodeCategoryHandler(Character.SPACE_SEPARATOR);
234      if (name.equals("Ps"))
235          return new UnicodeCategoryHandler(Character.START_PUNCTUATION);
236      if (name.equals("Cs"))
237          return new UnicodeCategoryHandler(Character.SURROGATE);
238      if (name.equals("Lt"))
239          return new UnicodeCategoryHandler(Character.TITLECASE_LETTER);
240      if (name.equals("Cn"))
241          return new UnicodeCategoryHandler(Character.UNASSIGNED);
242      if (name.equals("Lu"))
243          return new UnicodeCategoryHandler(Character.UPPERCASE_LETTER);
244      throw new REException("unsupported name " + name, REException.REG_ESCAPE, 0);
245  }
246
247  private static class POSIXHandler extends Handler {
248		private RETokenPOSIX retoken;
249
250		private REMatch mymatch = new REMatch(0, 0, 0);
251
252		private char[] chars = new char[1];
253
254		private CharIndexedCharArray ca = new CharIndexedCharArray(chars, 0);
255
256		POSIXHandler(String name) {
257			int posixId = RETokenPOSIX.intValue(name.toLowerCase());
258			if (posixId != -1)
259				retoken = new RETokenPOSIX(0, posixId, false, false);
260			else
261				throw new RuntimeException("Unknown posix ID: " + name);
262		}
263
264		boolean includes(char c) {
265			chars[0] = c;
266			mymatch.index = 0;
267			return retoken.match(ca, mymatch);
268		}
269	}
270
271  private static class UnicodeCategoryHandler extends Handler {
272		UnicodeCategoryHandler(byte category) {
273			this.category = (int) category;
274		}
275
276		private int category;
277
278		boolean includes(char c) {
279			return Character.getType(c) == category;
280		}
281	}
282
283	private static class UnicodeCategoriesHandler extends Handler {
284		UnicodeCategoriesHandler(byte[] categories) {
285			this.categories = categories;
286		}
287
288		private byte[] categories;
289
290		boolean includes(char c) {
291			int category = Character.getType(c);
292			for (int i = 0; i < categories.length; i++)
293				if (category == categories[i])
294					return true;
295			return false;
296		}
297	}
298
299// Backport from JDK 1.4 to 1.3
300// private static class UnicodeBlockHandler extends Handler {
301// public UnicodeBlockHandler(Character.UnicodeBlock block) {
302//	  this.block = block;
303//      }
304//      private Character.UnicodeBlock block;
305//      public boolean includes(char c) {
306//	  Character.UnicodeBlock cblock = Character.UnicodeBlock.of(c);
307//	  return (cblock != null && cblock.equals(block));
308//      }
309//  }
310
311}