PageRenderTime 43ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/jEdit/tags/jedit-4-3-pre5/gnu/regexp/REMatch.java

#
Java | 244 lines | 106 code | 22 blank | 116 comment | 24 complexity | 2c3aef086b530c31e1219d7830194060 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1. /*
  2. * gnu/regexp/REMatch.java
  3. * Copyright (C) 1998-2001 Wes Biggs
  4. *
  5. * This library is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU Lesser General Public License as published
  7. * by the Free Software Foundation; either version 2.1 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. package gnu.regexp;
  20. import java.io.Serializable;
  21. /**
  22. * An instance of this class represents a match
  23. * completed by a gnu.regexp matching function. It can be used
  24. * to obtain relevant information about the location of a match
  25. * or submatch.
  26. *
  27. * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
  28. */
  29. public final class REMatch implements Serializable, Cloneable {
  30. private String matchedText;
  31. // These variables are package scope for fast access within the engine
  32. int eflags; // execution flags this match was made using
  33. // Offset in source text where match was tried. This is zero-based;
  34. // the actual position in the source text is given by (offset + anchor).
  35. int offset;
  36. // Anchor position refers to the index into the source input
  37. // at which the matching operation began.
  38. // This is also useful for the ANCHORINDEX option.
  39. int anchor;
  40. // Package scope; used by RE.
  41. int index; // used while matching to mark current match position in input
  42. int[] start; // start positions (relative to offset) for each (sub)exp.
  43. int[] end; // end positions for the same
  44. REMatch next; // other possibility (to avoid having to use arrays)
  45. public Object clone() {
  46. try {
  47. REMatch copy = (REMatch) super.clone();
  48. copy.next = null;
  49. copy.start = (int[]) start.clone();
  50. copy.end = (int[]) end.clone();
  51. return copy;
  52. } catch (CloneNotSupportedException e) {
  53. throw new Error(); // doesn't happen
  54. }
  55. }
  56. void assignFrom(REMatch other) {
  57. start = other.start;
  58. end = other.end;
  59. index = other.index;
  60. // need to deep clone?
  61. next = other.next;
  62. }
  63. REMatch(int subs, int anchor, int eflags) {
  64. start = new int[subs+1];
  65. end = new int[subs+1];
  66. this.anchor = anchor;
  67. this.eflags = eflags;
  68. clear(anchor);
  69. }
  70. void finish(CharIndexed text) {
  71. start[0] = 0;
  72. StringBuffer sb = new StringBuffer();
  73. int i;
  74. for (i = 0; i < end[0]; i++)
  75. sb.append(text.charAt(i));
  76. matchedText = sb.toString();
  77. for (i = 0; i < start.length; i++) {
  78. // If any subexpressions didn't terminate, they don't count
  79. // TODO check if this code ever gets hit
  80. if ((start[i] == -1) ^ (end[i] == -1)) {
  81. start[i] = -1;
  82. end[i] = -1;
  83. }
  84. }
  85. next = null; // cut off alternates
  86. }
  87. /** Clears the current match and moves the offset to the new index. */
  88. void clear(int index) {
  89. offset = index;
  90. this.index = 0;
  91. for (int i = 0; i < start.length; i++) {
  92. start[i] = end[i] = -1;
  93. }
  94. next = null; // cut off alternates
  95. }
  96. /**
  97. * Returns the string matching the pattern. This makes it convenient
  98. * to write code like the following:
  99. * <P>
  100. * <code>
  101. * REMatch myMatch = myExpression.getMatch(myString);<br>
  102. * if (myMatch != null) System.out.println("Regexp found: "+myMatch);
  103. * </code>
  104. */
  105. public String toString() {
  106. return matchedText;
  107. }
  108. /**
  109. * Returns the index within the input text where the match in its entirety
  110. * began.
  111. */
  112. public int getStartIndex() {
  113. return offset + start[0];
  114. }
  115. /**
  116. * Returns the index within the input string where the match in
  117. * its entirety ends. The return value is the next position after
  118. * the end of the string; therefore, a match created by the
  119. * following call:
  120. *
  121. * <P>
  122. * <code>REMatch myMatch = myExpression.getMatch(myString);</code>
  123. * <P>
  124. * can be viewed (given that myMatch is not null) by creating
  125. * <P>
  126. * <code>String theMatch = myString.substring(myMatch.getStartIndex(),
  127. * myMatch.getEndIndex());</code>
  128. * <P>
  129. * But you can save yourself that work, since the <code>toString()</code>
  130. * method (above) does exactly that for you.
  131. */
  132. public int getEndIndex() {
  133. return offset + end[0];
  134. }
  135. /**
  136. * Returns the string matching the given subexpression. The subexpressions
  137. * are indexed starting with one, not zero. That is, the subexpression
  138. * identified by the first set of parentheses in a regular expression
  139. * could be retrieved from an REMatch by calling match.toString(1).
  140. *
  141. * @param sub Index of the subexpression.
  142. */
  143. public String toString(int sub) {
  144. if ((sub >= start.length) || (start[sub] == -1)) return "";
  145. return (matchedText.substring(start[sub],end[sub]));
  146. }
  147. /**
  148. * Returns the index within the input string used to generate this match
  149. * where subexpression number <i>sub</i> begins, or <code>-1</code> if
  150. * the subexpression does not exist. The initial position is zero.
  151. *
  152. * @param sub Subexpression index
  153. * @deprecated Use getStartIndex(int) instead.
  154. */
  155. public int getSubStartIndex(int sub) {
  156. if (sub >= start.length) return -1;
  157. int x = start[sub];
  158. return (x == -1) ? x : offset + x;
  159. }
  160. /**
  161. * Returns the index within the input string used to generate this match
  162. * where subexpression number <i>sub</i> begins, or <code>-1</code> if
  163. * the subexpression does not exist. The initial position is zero.
  164. *
  165. * @param sub Subexpression index
  166. * @since gnu.regexp 1.1.0
  167. */
  168. public int getStartIndex(int sub) {
  169. if (sub >= start.length) return -1;
  170. int x = start[sub];
  171. return (x == -1) ? x : offset + x;
  172. }
  173. /**
  174. * Returns the index within the input string used to generate this match
  175. * where subexpression number <i>sub</i> ends, or <code>-1</code> if
  176. * the subexpression does not exist. The initial position is zero.
  177. *
  178. * @param sub Subexpression index
  179. * @deprecated Use getEndIndex(int) instead
  180. */
  181. public int getSubEndIndex(int sub) {
  182. if (sub >= start.length) return -1;
  183. int x = end[sub];
  184. return (x == -1) ? x : offset + x;
  185. }
  186. /**
  187. * Returns the index within the input string used to generate this match
  188. * where subexpression number <i>sub</i> ends, or <code>-1</code> if
  189. * the subexpression does not exist. The initial position is zero.
  190. *
  191. * @param sub Subexpression index
  192. */
  193. public int getEndIndex(int sub) {
  194. if (sub >= start.length) return -1;
  195. int x = end[sub];
  196. return (x == -1) ? x : offset + x;
  197. }
  198. /**
  199. * Substitute the results of this match to create a new string.
  200. * This is patterned after PERL, so the tokens to watch out for are
  201. * <code>$0</code> through <code>$9</code>. <code>$0</code> matches
  202. * the full substring matched; <code>$<i>n</i></code> matches
  203. * subexpression number <i>n</i>.
  204. *
  205. * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
  206. */
  207. public String substituteInto(String input) {
  208. // a la Perl, $0 is whole thing, $1 - $9 are subexpressions
  209. StringBuffer output = new StringBuffer();
  210. int pos;
  211. for (pos = 0; pos < input.length()-1; pos++) {
  212. if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
  213. int val = Character.digit(input.charAt(++pos),10);
  214. if (val < start.length) {
  215. output.append(toString(val));
  216. }
  217. } else output.append(input.charAt(pos));
  218. }
  219. if (pos < input.length()) output.append(input.charAt(pos));
  220. return output.toString();
  221. }
  222. }