/interpreter/tags/at2dist170907/src/edu/vub/util/regexp/RETokenNamedProperty.java
Java | 311 lines | 255 code | 17 blank | 39 comment | 6 complexity | af77e9b84d789bd8c606cdcb5bce3f96 MD5 | raw file
1/* gnu/regexp/RETokenNamedProperty.java 2 Copyright (C) 2006 Free Software Foundation, Inc. 3 4This file is part of GNU Classpath. 5 6GNU Classpath is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License as published by 8the Free Software Foundation; either version 2, or (at your option) 9any later version. 10 11GNU Classpath is distributed in the hope that it will be useful, but 12WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14General Public License for more details. 15 16You should have received a copy of the GNU General Public License 17along with GNU Classpath; see the file COPYING. If not, write to the 18Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 1902110-1301 USA. 20 21Linking this library statically or dynamically with other modules is 22making a combined work based on this library. Thus, the terms and 23conditions of the GNU General Public License cover the whole 24combination. 25 26As a special exception, the copyright holders of this library give you 27permission to link this library with independent modules to produce an 28executable, regardless of the license terms of these independent 29modules, and to copy and distribute the resulting executable under 30terms of your choice, provided that you also meet, for each linked 31independent module, the terms and conditions of the license of that 32module. An independent module is a module which is not derived from 33or based on this library. If you modify this library, you may extend 34this exception to your version of the library, but you are not 35obligated to do so. If you do not wish to do so, delete this 36exception statement from your version. */ 37 38 39package edu.vub.util.regexp; 40 41final class RETokenNamedProperty extends REToken { 42 String name; 43 boolean insens; 44 boolean negate; 45 Handler handler; 46 47 // Grouped properties 48 static final byte[] LETTER = new byte[] 49 { Character.LOWERCASE_LETTER, 50 Character.UPPERCASE_LETTER, 51 Character.TITLECASE_LETTER, 52 Character.MODIFIER_LETTER, 53 Character.OTHER_LETTER }; 54 55 static final byte[] MARK = new byte[] 56 { Character.NON_SPACING_MARK, 57 Character.COMBINING_SPACING_MARK, 58 Character.ENCLOSING_MARK }; 59 60 static final byte[] SEPARATOR = new byte[] 61 { Character.SPACE_SEPARATOR, 62 Character.LINE_SEPARATOR, 63 Character.PARAGRAPH_SEPARATOR }; 64 65 static final byte[] SYMBOL = new byte[] 66 { Character.MATH_SYMBOL, 67 Character.CURRENCY_SYMBOL, 68 Character.MODIFIER_SYMBOL, 69 Character.OTHER_SYMBOL }; 70 71 static final byte[] NUMBER = new byte[] 72 { Character.DECIMAL_DIGIT_NUMBER, 73 Character.LETTER_NUMBER, 74 Character.OTHER_NUMBER }; 75 76 static final byte[] PUNCTUATION = new byte[] 77 { Character.DASH_PUNCTUATION, 78 Character.START_PUNCTUATION, 79 Character.END_PUNCTUATION, 80 Character.CONNECTOR_PUNCTUATION, 81 Character.OTHER_PUNCTUATION }; // , 82// Character.INITIAL_QUOTE_PUNCTUATION, 83// Character.FINAL_QUOTE_PUNCTUATION}; 84 85 static final byte[] OTHER = new byte[] 86 { Character.CONTROL, 87 Character.FORMAT, 88 Character.PRIVATE_USE, 89 Character.SURROGATE, 90 Character.UNASSIGNED }; 91 92 RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException { 93 super(subIndex); 94 this.name = name; 95 this.insens = insens; 96 this.negate = negate; 97 handler = getHandler(name); 98 } 99 100 int getMinimumLength() { 101 return 1; 102 } 103 104 int getMaximumLength() { 105 return 1; 106 } 107 108 boolean match(CharIndexed input, REMatch mymatch) { 109 char ch = input.charAt(mymatch.index); 110 if (ch == CharIndexed.OUT_OF_BOUNDS) 111 return false; 112 113 boolean retval = handler.includes(ch); 114 if (insens) { 115 retval = retval || 116 handler.includes(Character.toUpperCase(ch)) || 117 handler.includes(Character.toLowerCase(ch)); 118 } 119 120 if (negate) retval = !retval; 121 if (retval) { 122 ++mymatch.index; 123 return next(input, mymatch); 124 } 125 else return false; 126 } 127 128 void dump(StringBuffer os) { 129 os.append("\\") 130 .append(negate ? "P" : "p") 131 .append("{" + name + "}"); 132 } 133 134 private abstract static class Handler { 135 abstract boolean includes(char c); 136 } 137 138 private Handler getHandler(String name) throws REException { 139 if (name.equals("Lower") || 140 name.equals("Upper") || 141 // name.equals("ASCII") || 142 name.equals("Alpha") || 143 name.equals("Digit") || 144 name.equals("Alnum") || 145 name.equals("Punct") || 146 name.equals("Graph") || 147 name.equals("Print") || 148 name.equals("Blank") || 149 name.equals("Cntrl") || 150 name.equals("XDigit") || 151 name.equals("Space") ) { 152 return new POSIXHandler(name); 153 } 154// if (name.startsWith("In")) { 155// try { 156// name = name.substring(2); 157// Character.UnicodeBlock block = Character.UnicodeBlock.forName(name); 158// return new UnicodeBlockHandler(block); 159// } 160// catch (IllegalArgumentException e) { 161// throw new REException("Invalid Unicode block name: " + name, REException.REG_ESCAPE, 0); 162// } 163// } 164 if (name.startsWith("Is")) { 165 name = name.substring(2); 166 } 167 168 // "grouped properties" 169 if (name.equals("L")) 170 return new UnicodeCategoriesHandler(LETTER); 171 if (name.equals("M")) 172 return new UnicodeCategoriesHandler(MARK); 173 if (name.equals("Z")) 174 return new UnicodeCategoriesHandler(SEPARATOR); 175 if (name.equals("S")) 176 return new UnicodeCategoriesHandler(SYMBOL); 177 if (name.equals("N")) 178 return new UnicodeCategoriesHandler(NUMBER); 179 if (name.equals("P")) 180 return new UnicodeCategoriesHandler(PUNCTUATION); 181 if (name.equals("C")) 182 return new UnicodeCategoriesHandler(OTHER); 183 184 if (name.equals("Mc")) 185 return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK); 186 if (name.equals("Pc")) 187 return new UnicodeCategoryHandler(Character.CONNECTOR_PUNCTUATION); 188 if (name.equals("Cc")) 189 return new UnicodeCategoryHandler(Character.CONTROL); 190 if (name.equals("Sc")) 191 return new UnicodeCategoryHandler(Character.CURRENCY_SYMBOL); 192 if (name.equals("Pd")) 193 return new UnicodeCategoryHandler(Character.DASH_PUNCTUATION); 194 if (name.equals("Nd")) 195 return new UnicodeCategoryHandler(Character.DECIMAL_DIGIT_NUMBER); 196 if (name.equals("Me")) 197 return new UnicodeCategoryHandler(Character.ENCLOSING_MARK); 198 if (name.equals("Pe")) 199 return new UnicodeCategoryHandler(Character.END_PUNCTUATION); 200// if (name.equals("Pf")) 201// return new UnicodeCategoryHandler(Character.FINAL_QUOTE_PUNCTUATION); 202 if (name.equals("Cf")) 203 return new UnicodeCategoryHandler(Character.FORMAT); 204// if (name.equals("Pi")) 205// return new UnicodeCategoryHandler(Character.INITIAL_QUOTE_PUNCTUATION); 206 if (name.equals("Nl")) 207 return new UnicodeCategoryHandler(Character.LETTER_NUMBER); 208 if (name.equals("Zl")) 209 return new UnicodeCategoryHandler(Character.LINE_SEPARATOR); 210 if (name.equals("Ll")) 211 return new UnicodeCategoryHandler(Character.LOWERCASE_LETTER); 212 if (name.equals("Sm")) 213 return new UnicodeCategoryHandler(Character.MATH_SYMBOL); 214 if (name.equals("Lm")) 215 return new UnicodeCategoryHandler(Character.MODIFIER_LETTER); 216 if (name.equals("Sk")) 217 return new UnicodeCategoryHandler(Character.MODIFIER_SYMBOL); 218 if (name.equals("Mn")) 219 return new UnicodeCategoryHandler(Character.NON_SPACING_MARK); 220 if (name.equals("Lo")) 221 return new UnicodeCategoryHandler(Character.OTHER_LETTER); 222 if (name.equals("No")) 223 return new UnicodeCategoryHandler(Character.OTHER_NUMBER); 224 if (name.equals("Po")) 225 return new UnicodeCategoryHandler(Character.OTHER_PUNCTUATION); 226 if (name.equals("So")) 227 return new UnicodeCategoryHandler(Character.OTHER_SYMBOL); 228 if (name.equals("Zp")) 229 return new UnicodeCategoryHandler(Character.PARAGRAPH_SEPARATOR); 230 if (name.equals("Co")) 231 return new UnicodeCategoryHandler(Character.PRIVATE_USE); 232 if (name.equals("Zs")) 233 return new UnicodeCategoryHandler(Character.SPACE_SEPARATOR); 234 if (name.equals("Ps")) 235 return new UnicodeCategoryHandler(Character.START_PUNCTUATION); 236 if (name.equals("Cs")) 237 return new UnicodeCategoryHandler(Character.SURROGATE); 238 if (name.equals("Lt")) 239 return new UnicodeCategoryHandler(Character.TITLECASE_LETTER); 240 if (name.equals("Cn")) 241 return new UnicodeCategoryHandler(Character.UNASSIGNED); 242 if (name.equals("Lu")) 243 return new UnicodeCategoryHandler(Character.UPPERCASE_LETTER); 244 throw new REException("unsupported name " + name, REException.REG_ESCAPE, 0); 245 } 246 247 private static class POSIXHandler extends Handler { 248 private RETokenPOSIX retoken; 249 250 private REMatch mymatch = new REMatch(0, 0, 0); 251 252 private char[] chars = new char[1]; 253 254 private CharIndexedCharArray ca = new CharIndexedCharArray(chars, 0); 255 256 POSIXHandler(String name) { 257 int posixId = RETokenPOSIX.intValue(name.toLowerCase()); 258 if (posixId != -1) 259 retoken = new RETokenPOSIX(0, posixId, false, false); 260 else 261 throw new RuntimeException("Unknown posix ID: " + name); 262 } 263 264 boolean includes(char c) { 265 chars[0] = c; 266 mymatch.index = 0; 267 return retoken.match(ca, mymatch); 268 } 269 } 270 271 private static class UnicodeCategoryHandler extends Handler { 272 UnicodeCategoryHandler(byte category) { 273 this.category = (int) category; 274 } 275 276 private int category; 277 278 boolean includes(char c) { 279 return Character.getType(c) == category; 280 } 281 } 282 283 private static class UnicodeCategoriesHandler extends Handler { 284 UnicodeCategoriesHandler(byte[] categories) { 285 this.categories = categories; 286 } 287 288 private byte[] categories; 289 290 boolean includes(char c) { 291 int category = Character.getType(c); 292 for (int i = 0; i < categories.length; i++) 293 if (category == categories[i]) 294 return true; 295 return false; 296 } 297 } 298 299// Backport from JDK 1.4 to 1.3 300// private static class UnicodeBlockHandler extends Handler { 301// public UnicodeBlockHandler(Character.UnicodeBlock block) { 302// this.block = block; 303// } 304// private Character.UnicodeBlock block; 305// public boolean includes(char c) { 306// Character.UnicodeBlock cblock = Character.UnicodeBlock.of(c); 307// return (cblock != null && cblock.equals(block)); 308// } 309// } 310 311}