/library/java/util/regex/ASCII.java

https://bitbucket.org/chancey/z · Java · 274 lines · 205 code · 40 blank · 29 comment · 3 complexity · 12717b4deeb41a61f8bef59420d7620e MD5 · raw file

  1. /*
  2. * Copyright (c) 1999, 2000, Oracle and/or its affiliates. All rights reserved.
  3. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4. *
  5. * This code is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 only, as
  7. * published by the Free Software Foundation. Oracle designates this
  8. * particular file as subject to the "Classpath" exception as provided
  9. * by Oracle in the LICENSE file that accompanied this code.
  10. *
  11. * This code is distributed in the hope that it will be useful, but WITHOUT
  12. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  14. * version 2 for more details (a copy is included in the LICENSE file that
  15. * accompanied this code).
  16. *
  17. * You should have received a copy of the GNU General Public License version
  18. * 2 along with this work; if not, write to the Free Software Foundation,
  19. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20. *
  21. * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22. * or visit www.oracle.com if you need additional information or have any
  23. * questions.
  24. */
  25. package java.util.regex;
  26. /**
  27. * Utility class that implements the standard C ctype functionality.
  28. *
  29. * @author Hong Zhang
  30. */
  31. final class ASCII {
  32. static final int UPPER = 0x00000100;
  33. static final int LOWER = 0x00000200;
  34. static final int DIGIT = 0x00000400;
  35. static final int SPACE = 0x00000800;
  36. static final int PUNCT = 0x00001000;
  37. static final int CNTRL = 0x00002000;
  38. static final int BLANK = 0x00004000;
  39. static final int HEX = 0x00008000;
  40. static final int UNDER = 0x00010000;
  41. static final int ASCII = 0x0000FF00;
  42. static final int ALPHA = (UPPER|LOWER);
  43. static final int ALNUM = (UPPER|LOWER|DIGIT);
  44. static final int GRAPH = (PUNCT|UPPER|LOWER|DIGIT);
  45. static final int WORD = (UPPER|LOWER|UNDER|DIGIT);
  46. static final int XDIGIT = (HEX);
  47. private static final int[] ctype = new int[] {
  48. CNTRL, /* 00 (NUL) */
  49. CNTRL, /* 01 (SOH) */
  50. CNTRL, /* 02 (STX) */
  51. CNTRL, /* 03 (ETX) */
  52. CNTRL, /* 04 (EOT) */
  53. CNTRL, /* 05 (ENQ) */
  54. CNTRL, /* 06 (ACK) */
  55. CNTRL, /* 07 (BEL) */
  56. CNTRL, /* 08 (BS) */
  57. SPACE+CNTRL+BLANK, /* 09 (HT) */
  58. SPACE+CNTRL, /* 0A (LF) */
  59. SPACE+CNTRL, /* 0B (VT) */
  60. SPACE+CNTRL, /* 0C (FF) */
  61. SPACE+CNTRL, /* 0D (CR) */
  62. CNTRL, /* 0E (SI) */
  63. CNTRL, /* 0F (SO) */
  64. CNTRL, /* 10 (DLE) */
  65. CNTRL, /* 11 (DC1) */
  66. CNTRL, /* 12 (DC2) */
  67. CNTRL, /* 13 (DC3) */
  68. CNTRL, /* 14 (DC4) */
  69. CNTRL, /* 15 (NAK) */
  70. CNTRL, /* 16 (SYN) */
  71. CNTRL, /* 17 (ETB) */
  72. CNTRL, /* 18 (CAN) */
  73. CNTRL, /* 19 (EM) */
  74. CNTRL, /* 1A (SUB) */
  75. CNTRL, /* 1B (ESC) */
  76. CNTRL, /* 1C (FS) */
  77. CNTRL, /* 1D (GS) */
  78. CNTRL, /* 1E (RS) */
  79. CNTRL, /* 1F (US) */
  80. SPACE+BLANK, /* 20 SPACE */
  81. PUNCT, /* 21 ! */
  82. PUNCT, /* 22 " */
  83. PUNCT, /* 23 # */
  84. PUNCT, /* 24 $ */
  85. PUNCT, /* 25 % */
  86. PUNCT, /* 26 & */
  87. PUNCT, /* 27 ' */
  88. PUNCT, /* 28 ( */
  89. PUNCT, /* 29 ) */
  90. PUNCT, /* 2A * */
  91. PUNCT, /* 2B + */
  92. PUNCT, /* 2C , */
  93. PUNCT, /* 2D - */
  94. PUNCT, /* 2E . */
  95. PUNCT, /* 2F / */
  96. DIGIT+HEX+0, /* 30 0 */
  97. DIGIT+HEX+1, /* 31 1 */
  98. DIGIT+HEX+2, /* 32 2 */
  99. DIGIT+HEX+3, /* 33 3 */
  100. DIGIT+HEX+4, /* 34 4 */
  101. DIGIT+HEX+5, /* 35 5 */
  102. DIGIT+HEX+6, /* 36 6 */
  103. DIGIT+HEX+7, /* 37 7 */
  104. DIGIT+HEX+8, /* 38 8 */
  105. DIGIT+HEX+9, /* 39 9 */
  106. PUNCT, /* 3A : */
  107. PUNCT, /* 3B ; */
  108. PUNCT, /* 3C < */
  109. PUNCT, /* 3D = */
  110. PUNCT, /* 3E > */
  111. PUNCT, /* 3F ? */
  112. PUNCT, /* 40 @ */
  113. UPPER+HEX+10, /* 41 A */
  114. UPPER+HEX+11, /* 42 B */
  115. UPPER+HEX+12, /* 43 C */
  116. UPPER+HEX+13, /* 44 D */
  117. UPPER+HEX+14, /* 45 E */
  118. UPPER+HEX+15, /* 46 F */
  119. UPPER+16, /* 47 G */
  120. UPPER+17, /* 48 H */
  121. UPPER+18, /* 49 I */
  122. UPPER+19, /* 4A J */
  123. UPPER+20, /* 4B K */
  124. UPPER+21, /* 4C L */
  125. UPPER+22, /* 4D M */
  126. UPPER+23, /* 4E N */
  127. UPPER+24, /* 4F O */
  128. UPPER+25, /* 50 P */
  129. UPPER+26, /* 51 Q */
  130. UPPER+27, /* 52 R */
  131. UPPER+28, /* 53 S */
  132. UPPER+29, /* 54 T */
  133. UPPER+30, /* 55 U */
  134. UPPER+31, /* 56 V */
  135. UPPER+32, /* 57 W */
  136. UPPER+33, /* 58 X */
  137. UPPER+34, /* 59 Y */
  138. UPPER+35, /* 5A Z */
  139. PUNCT, /* 5B [ */
  140. PUNCT, /* 5C \ */
  141. PUNCT, /* 5D ] */
  142. PUNCT, /* 5E ^ */
  143. PUNCT|UNDER, /* 5F _ */
  144. PUNCT, /* 60 ` */
  145. LOWER+HEX+10, /* 61 a */
  146. LOWER+HEX+11, /* 62 b */
  147. LOWER+HEX+12, /* 63 c */
  148. LOWER+HEX+13, /* 64 d */
  149. LOWER+HEX+14, /* 65 e */
  150. LOWER+HEX+15, /* 66 f */
  151. LOWER+16, /* 67 g */
  152. LOWER+17, /* 68 h */
  153. LOWER+18, /* 69 i */
  154. LOWER+19, /* 6A j */
  155. LOWER+20, /* 6B k */
  156. LOWER+21, /* 6C l */
  157. LOWER+22, /* 6D m */
  158. LOWER+23, /* 6E n */
  159. LOWER+24, /* 6F o */
  160. LOWER+25, /* 70 p */
  161. LOWER+26, /* 71 q */
  162. LOWER+27, /* 72 r */
  163. LOWER+28, /* 73 s */
  164. LOWER+29, /* 74 t */
  165. LOWER+30, /* 75 u */
  166. LOWER+31, /* 76 v */
  167. LOWER+32, /* 77 w */
  168. LOWER+33, /* 78 x */
  169. LOWER+34, /* 79 y */
  170. LOWER+35, /* 7A z */
  171. PUNCT, /* 7B { */
  172. PUNCT, /* 7C | */
  173. PUNCT, /* 7D } */
  174. PUNCT, /* 7E ~ */
  175. CNTRL, /* 7F (DEL) */
  176. };
  177. static int getType(int ch) {
  178. return ((ch & 0xFFFFFF80) == 0 ? ctype[ch] : 0);
  179. }
  180. static boolean isType(int ch, int type) {
  181. return (getType(ch) & type) != 0;
  182. }
  183. static boolean isAscii(int ch) {
  184. return ((ch & 0xFFFFFF80) == 0);
  185. }
  186. static boolean isAlpha(int ch) {
  187. return isType(ch, ALPHA);
  188. }
  189. static boolean isDigit(int ch) {
  190. return ((ch-'0')|('9'-ch)) >= 0;
  191. }
  192. static boolean isAlnum(int ch) {
  193. return isType(ch, ALNUM);
  194. }
  195. static boolean isGraph(int ch) {
  196. return isType(ch, GRAPH);
  197. }
  198. static boolean isPrint(int ch) {
  199. return ((ch-0x20)|(0x7E-ch)) >= 0;
  200. }
  201. static boolean isPunct(int ch) {
  202. return isType(ch, PUNCT);
  203. }
  204. static boolean isSpace(int ch) {
  205. return isType(ch, SPACE);
  206. }
  207. static boolean isHexDigit(int ch) {
  208. return isType(ch, HEX);
  209. }
  210. static boolean isOctDigit(int ch) {
  211. return ((ch-'0')|('7'-ch)) >= 0;
  212. }
  213. static boolean isCntrl(int ch) {
  214. return isType(ch, CNTRL);
  215. }
  216. static boolean isLower(int ch) {
  217. return ((ch-'a')|('z'-ch)) >= 0;
  218. }
  219. static boolean isUpper(int ch) {
  220. return ((ch-'A')|('Z'-ch)) >= 0;
  221. }
  222. static boolean isWord(int ch) {
  223. return isType(ch, WORD);
  224. }
  225. static int toDigit(int ch) {
  226. return (ctype[ch & 0x7F] & 0x3F);
  227. }
  228. static int toLower(int ch) {
  229. return isUpper(ch) ? (ch + 0x20) : ch;
  230. }
  231. static int toUpper(int ch) {
  232. return isLower(ch) ? (ch - 0x20) : ch;
  233. }
  234. }