PageRenderTime 40ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/jEdit/tags/jedit-4-1-pre5/gnu/regexp/RESyntax.java

#
Java | 502 lines | 180 code | 81 blank | 241 comment | 3 complexity | 53fdb50ff98add51b9ff71eb42db3733 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1. /*
  2. * gnu/regexp/RESyntax.java
  3. * Copyright (C) 1998-2002 Wes Biggs
  4. *
  5. * This library is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published
  7. * by the Free Software Foundation; either version 2.1 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. package gnu.regexp;
  20. import java.io.Serializable;
  21. import java.util.BitSet;
  22. /**
  23. * An RESyntax specifies the way a regular expression will be compiled.
  24. * This class provides a number of predefined useful constants for
  25. * emulating popular regular expression syntaxes. Additionally the
  26. * user may construct his or her own syntax, using any combination of the
  27. * syntax bit constants. The syntax is an optional argument to any of the
  28. * matching methods on class RE.
  29. *
  30. * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
  31. */
  32. public final class RESyntax implements Serializable {
  33. static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator");
  34. private static final String SYNTAX_IS_FINAL = RE.getLocalizedMessage("syntax.final");
  35. private BitSet bits;
  36. // true for the constant defined syntaxes
  37. private boolean isFinal = false;
  38. private String lineSeparator = DEFAULT_LINE_SEPARATOR;
  39. // Values for constants are bit indexes
  40. /**
  41. * Syntax bit. Backslash is an escape character in lists.
  42. */
  43. public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
  44. /**
  45. * Syntax bit. Use \? instead of ? and \+ instead of +.
  46. */
  47. public static final int RE_BK_PLUS_QM = 1;
  48. /**
  49. * Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
  50. */
  51. public static final int RE_CHAR_CLASSES = 2;
  52. /**
  53. * Syntax bit. ^ and $ are special everywhere.
  54. * <B>Not implemented.</B>
  55. */
  56. public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
  57. /**
  58. * Syntax bit. Repetition operators are only special in valid positions.
  59. * <B>Not implemented.</B>
  60. */
  61. public static final int RE_CONTEXT_INDEP_OPS = 4;
  62. /**
  63. * Syntax bit. Repetition and alternation operators are invalid
  64. * at start and end of pattern and other places.
  65. * <B>Not implemented</B>.
  66. */
  67. public static final int RE_CONTEXT_INVALID_OPS = 5;
  68. /**
  69. * Syntax bit. Match-any-character operator (.) matches a newline.
  70. */
  71. public static final int RE_DOT_NEWLINE = 6;
  72. /**
  73. * Syntax bit. Match-any-character operator (.) does not match a null.
  74. */
  75. public static final int RE_DOT_NOT_NULL = 7;
  76. /**
  77. * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
  78. */
  79. public static final int RE_INTERVALS = 8;
  80. /**
  81. * Syntax bit. No alternation (|), match one-or-more (+), or
  82. * match zero-or-one (?) operators.
  83. */
  84. public static final int RE_LIMITED_OPS = 9;
  85. /**
  86. * Syntax bit. Newline is an alternation operator.
  87. */
  88. public static final int RE_NEWLINE_ALT = 10; // impl.
  89. /**
  90. * Syntax bit. Intervals use { } instead of \{ \}
  91. */
  92. public static final int RE_NO_BK_BRACES = 11;
  93. /**
  94. * Syntax bit. Grouping uses ( ) instead of \( \).
  95. */
  96. public static final int RE_NO_BK_PARENS = 12;
  97. /**
  98. * Syntax bit. Backreferences not allowed.
  99. */
  100. public static final int RE_NO_BK_REFS = 13;
  101. /**
  102. * Syntax bit. Alternation uses | instead of \|
  103. */
  104. public static final int RE_NO_BK_VBAR = 14;
  105. /**
  106. * Syntax bit. <B>Not implemented</B>.
  107. */
  108. public static final int RE_NO_EMPTY_RANGES = 15;
  109. /**
  110. * Syntax bit. An unmatched right parenthesis (')' or '\)', depending
  111. * on RE_NO_BK_PARENS) will throw an exception when compiling.
  112. */
  113. public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
  114. /**
  115. * Syntax bit. <B>Not implemented.</B>
  116. */
  117. public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
  118. /**
  119. * Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?).
  120. */
  121. public static final int RE_STINGY_OPS = 18;
  122. /**
  123. * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
  124. */
  125. public static final int RE_CHAR_CLASS_ESCAPES = 19;
  126. /**
  127. * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
  128. */
  129. public static final int RE_PURE_GROUPING = 20;
  130. /**
  131. * Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
  132. * to the text following the current position without consuming that text.
  133. */
  134. public static final int RE_LOOKAHEAD = 21;
  135. /**
  136. * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
  137. */
  138. public static final int RE_STRING_ANCHORS = 22;
  139. /**
  140. * Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
  141. */
  142. public static final int RE_COMMENTS = 23;
  143. /**
  144. * Syntax bit. Allow character class escapes within lists, as in Perl5.
  145. */
  146. public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
  147. private static final int BIT_TOTAL = 25;
  148. /**
  149. * Predefined syntax.
  150. * Emulates regular expression support in the awk utility.
  151. */
  152. public static final RESyntax RE_SYNTAX_AWK;
  153. /**
  154. * Predefined syntax.
  155. * Emulates regular expression support in the ed utility.
  156. */
  157. public static final RESyntax RE_SYNTAX_ED;
  158. /**
  159. * Predefined syntax.
  160. * Emulates regular expression support in the egrep utility.
  161. */
  162. public static final RESyntax RE_SYNTAX_EGREP;
  163. /**
  164. * Predefined syntax.
  165. * Emulates regular expression support in the GNU Emacs editor.
  166. */
  167. public static final RESyntax RE_SYNTAX_EMACS;
  168. /**
  169. * Predefined syntax.
  170. * Emulates regular expression support in the grep utility.
  171. */
  172. public static final RESyntax RE_SYNTAX_GREP;
  173. /**
  174. * Predefined syntax.
  175. * Emulates regular expression support in the POSIX awk specification.
  176. */
  177. public static final RESyntax RE_SYNTAX_POSIX_AWK;
  178. /**
  179. * Predefined syntax.
  180. * Emulates POSIX basic regular expression support.
  181. */
  182. public static final RESyntax RE_SYNTAX_POSIX_BASIC;
  183. /**
  184. * Predefined syntax.
  185. * Emulates regular expression support in the POSIX egrep specification.
  186. */
  187. public static final RESyntax RE_SYNTAX_POSIX_EGREP;
  188. /**
  189. * Predefined syntax.
  190. * Emulates POSIX extended regular expression support.
  191. */
  192. public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
  193. /**
  194. * Predefined syntax.
  195. * Emulates POSIX basic minimal regular expressions.
  196. */
  197. public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
  198. /**
  199. * Predefined syntax.
  200. * Emulates POSIX extended minimal regular expressions.
  201. */
  202. public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
  203. /**
  204. * Predefined syntax.
  205. * Emulates regular expression support in the sed utility.
  206. */
  207. public static final RESyntax RE_SYNTAX_SED;
  208. /**
  209. * Predefined syntax.
  210. * Emulates regular expression support in Larry Wall's perl, version 4,
  211. */
  212. public static final RESyntax RE_SYNTAX_PERL4;
  213. /**
  214. * Predefined syntax.
  215. * Emulates regular expression support in Larry Wall's perl, version 4,
  216. * using single line mode (/s modifier).
  217. */
  218. public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
  219. /**
  220. * Predefined syntax.
  221. * Emulates regular expression support in Larry Wall's perl, version 5.
  222. */
  223. public static final RESyntax RE_SYNTAX_PERL5;
  224. /**
  225. * Predefined syntax.
  226. * Emulates regular expression support in Larry Wall's perl, version 5,
  227. * using single line mode (/s modifier).
  228. */
  229. public static final RESyntax RE_SYNTAX_PERL5_S;
  230. /**
  231. * Predefined syntax.
  232. * Emulates regular expression support in Java 1.4's java.util.regex
  233. * package.
  234. */
  235. public static final RESyntax RE_SYNTAX_JAVA_1_4;
  236. static {
  237. // Define syntaxes
  238. RE_SYNTAX_EMACS = new RESyntax().makeFinal();
  239. RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax()
  240. .set(RE_CHAR_CLASSES)
  241. .set(RE_DOT_NEWLINE)
  242. .set(RE_DOT_NOT_NULL)
  243. .set(RE_INTERVALS)
  244. .set(RE_NO_EMPTY_RANGES)
  245. .makeFinal();
  246. RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
  247. .set(RE_BK_PLUS_QM)
  248. .makeFinal();
  249. RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
  250. .set(RE_CONTEXT_INDEP_ANCHORS)
  251. .set(RE_CONTEXT_INDEP_OPS)
  252. .set(RE_NO_BK_BRACES)
  253. .set(RE_NO_BK_PARENS)
  254. .set(RE_NO_BK_VBAR)
  255. .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
  256. .makeFinal();
  257. RE_SYNTAX_AWK = new RESyntax()
  258. .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
  259. .set(RE_DOT_NOT_NULL)
  260. .set(RE_NO_BK_PARENS)
  261. .set(RE_NO_BK_REFS)
  262. .set(RE_NO_BK_VBAR)
  263. .set(RE_NO_EMPTY_RANGES)
  264. .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
  265. .makeFinal();
  266. RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED)
  267. .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
  268. .makeFinal();
  269. RE_SYNTAX_GREP = new RESyntax()
  270. .set(RE_BK_PLUS_QM)
  271. .set(RE_CHAR_CLASSES)
  272. .set(RE_HAT_LISTS_NOT_NEWLINE)
  273. .set(RE_INTERVALS)
  274. .set(RE_NEWLINE_ALT)
  275. .makeFinal();
  276. RE_SYNTAX_EGREP = new RESyntax()
  277. .set(RE_CHAR_CLASSES)
  278. .set(RE_CONTEXT_INDEP_ANCHORS)
  279. .set(RE_CONTEXT_INDEP_OPS)
  280. .set(RE_HAT_LISTS_NOT_NEWLINE)
  281. .set(RE_NEWLINE_ALT)
  282. .set(RE_NO_BK_PARENS)
  283. .set(RE_NO_BK_VBAR)
  284. .makeFinal();
  285. RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP)
  286. .set(RE_INTERVALS)
  287. .set(RE_NO_BK_BRACES)
  288. .makeFinal();
  289. /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
  290. RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
  291. .makeFinal();
  292. RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
  293. .makeFinal();
  294. RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
  295. .set(RE_LIMITED_OPS)
  296. .makeFinal();
  297. /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
  298. replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
  299. RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
  300. .set(RE_CONTEXT_INDEP_ANCHORS)
  301. .set(RE_CONTEXT_INVALID_OPS)
  302. .set(RE_NO_BK_BRACES)
  303. .set(RE_NO_BK_PARENS)
  304. .set(RE_NO_BK_REFS)
  305. .set(RE_NO_BK_VBAR)
  306. .set(RE_UNMATCHED_RIGHT_PAREN_ORD)
  307. .makeFinal();
  308. /* There is no official Perl spec, but here's a "best guess" */
  309. RE_SYNTAX_PERL4 = new RESyntax()
  310. .set(RE_BACKSLASH_ESCAPE_IN_LISTS)
  311. .set(RE_CONTEXT_INDEP_ANCHORS)
  312. .set(RE_CONTEXT_INDEP_OPS) // except for '{', apparently
  313. .set(RE_INTERVALS)
  314. .set(RE_NO_BK_BRACES)
  315. .set(RE_NO_BK_PARENS)
  316. .set(RE_NO_BK_VBAR)
  317. .set(RE_NO_EMPTY_RANGES)
  318. .set(RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
  319. .makeFinal();
  320. RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4)
  321. .set(RE_DOT_NEWLINE)
  322. .makeFinal();
  323. RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4)
  324. .set(RE_PURE_GROUPING) // (?:)
  325. .set(RE_STINGY_OPS) // *?,??,+?,{}?
  326. .set(RE_LOOKAHEAD) // (?=)(?!)
  327. .set(RE_STRING_ANCHORS) // \A,\Z
  328. .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
  329. .set(RE_COMMENTS) // (?#)
  330. .makeFinal();
  331. RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
  332. .set(RE_DOT_NEWLINE)
  333. .makeFinal();
  334. RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
  335. // XXX
  336. .makeFinal();
  337. }
  338. /**
  339. * Construct a new syntax object with all bits turned off.
  340. * This is equivalent to RE_SYNTAX_EMACS.
  341. */
  342. public RESyntax() {
  343. bits = new BitSet(BIT_TOTAL);
  344. }
  345. /**
  346. * Called internally when constructing predefined syntaxes
  347. * so their interpretation cannot vary. Conceivably useful
  348. * for your syntaxes as well. Causes IllegalAccessError to
  349. * be thrown if any attempt to modify the syntax is made.
  350. *
  351. * @return this object for convenient chaining
  352. */
  353. public RESyntax makeFinal() {
  354. isFinal = true;
  355. return this;
  356. }
  357. /**
  358. * Construct a new syntax object with all bits set the same
  359. * as the other syntax.
  360. */
  361. public RESyntax(RESyntax other) {
  362. bits = (BitSet) other.bits.clone();
  363. }
  364. /**
  365. * Check if a given bit is set in this syntax.
  366. */
  367. public boolean get(int index) {
  368. return bits.get(index);
  369. }
  370. /**
  371. * Set a given bit in this syntax.
  372. *
  373. * @param index the constant (RESyntax.RE_xxx) bit to set.
  374. * @return a reference to this object for easy chaining.
  375. */
  376. public RESyntax set(int index) {
  377. if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
  378. bits.set(index);
  379. return this;
  380. }
  381. /**
  382. * Clear a given bit in this syntax.
  383. *
  384. * @param index the constant (RESyntax.RE_xxx) bit to clear.
  385. * @return a reference to this object for easy chaining.
  386. */
  387. public RESyntax clear(int index) {
  388. if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
  389. bits.clear(index);
  390. return this;
  391. }
  392. /**
  393. * Changes the line separator string for regular expressions
  394. * created using this RESyntax. The default separator is the
  395. * value returned by the system property "line.separator", which
  396. * should be correct when reading platform-specific files from a
  397. * filesystem. However, many programs may collect input from
  398. * sources where the line separator is differently specified (for
  399. * example, in the applet environment, the text box widget
  400. * interprets line breaks as single-character newlines,
  401. * regardless of the host platform.
  402. *
  403. * Note that setting the line separator to a character or
  404. * characters that have specific meaning within the current syntax
  405. * can cause unexpected chronosynclastic infundibula.
  406. *
  407. * @return this object for convenient chaining
  408. */
  409. public RESyntax setLineSeparator(String aSeparator) {
  410. if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
  411. lineSeparator = aSeparator;
  412. return this;
  413. }
  414. /**
  415. * Returns the currently active line separator string. The default
  416. * is the platform-dependent system property "line.separator".
  417. */
  418. public String getLineSeparator() {
  419. return lineSeparator;
  420. }
  421. }