/interpreter/tags/at2dist130208/src/edu/vub/util/regexp/RETokenRepeated.java

http://ambienttalk.googlecode.com/ · Java · 330 lines · 212 code · 48 blank · 70 comment · 79 complexity · ed07b97f569621ca384b513a2b54737c MD5 · raw file

  1. /* gnu/regexp/RETokenRepeated.java
  2. Copyright (C) 2006 Free Software Foundation, Inc.
  3. This file is part of GNU Classpath.
  4. GNU Classpath is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2, or (at your option)
  7. any later version.
  8. GNU Classpath is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNU Classpath; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  15. 02110-1301 USA.
  16. Linking this library statically or dynamically with other modules is
  17. making a combined work based on this library. Thus, the terms and
  18. conditions of the GNU General Public License cover the whole
  19. combination.
  20. As a special exception, the copyright holders of this library give you
  21. permission to link this library with independent modules to produce an
  22. executable, regardless of the license terms of these independent
  23. modules, and to copy and distribute the resulting executable under
  24. terms of your choice, provided that you also meet, for each linked
  25. independent module, the terms and conditions of the license of that
  26. module. An independent module is a module which is not derived from
  27. or based on this library. If you modify this library, you may extend
  28. this exception to your version of the library, but you are not
  29. obligated to do so. If you do not wish to do so, delete this
  30. exception statement from your version. */
  31. package edu.vub.util.regexp;
  32. final class RETokenRepeated extends REToken {
  33. private REToken token;
  34. private int min,max;
  35. private boolean stingy;
  36. private boolean possessive;
  37. RETokenRepeated(int subIndex, REToken token, int min, int max) {
  38. super(subIndex);
  39. this.token = token;
  40. this.min = min;
  41. this.max = max;
  42. }
  43. /** Sets the minimal matching mode to true. */
  44. void makeStingy() {
  45. stingy = true;
  46. }
  47. /** Queries if this token has minimal matching enabled. */
  48. boolean isStingy() {
  49. return stingy;
  50. }
  51. /** Sets possessive matching mode to true. */
  52. void makePossessive() {
  53. possessive = true;
  54. }
  55. /** Queries if this token has possessive matching enabled. */
  56. boolean isPossessive() {
  57. return possessive;
  58. }
  59. /**
  60. * The minimum length of a repeated token is the minimum length
  61. * of the token multiplied by the minimum number of times it must
  62. * match.
  63. */
  64. int getMinimumLength() {
  65. return (min * token.getMinimumLength());
  66. }
  67. int getMaximumLength() {
  68. if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
  69. int tmax = token.getMaximumLength();
  70. if (tmax == Integer.MAX_VALUE) return tmax;
  71. return (max * tmax);
  72. }
  73. private static REMatch findDoables(REToken tk,
  74. CharIndexed input, REMatch mymatch) {
  75. REMatch.REMatchList doables = new REMatch.REMatchList();
  76. // try next repeat at all possible positions
  77. for (REMatch current = mymatch;
  78. current != null; current = current.next) {
  79. REMatch recurrent = (REMatch) current.clone();
  80. int origin = recurrent.index;
  81. tk = (REToken) tk.clone();
  82. tk.next = tk.uncle = null;
  83. recurrent.matchFlags |= REMatch.MF_FIND_ALL;
  84. if (tk.match(input, recurrent)) {
  85. for (REMatch m = recurrent; m != null; m = m.next) {
  86. m.matchFlags &= ~REMatch.MF_FIND_ALL;
  87. }
  88. if (recurrent.index == origin) recurrent.empty = true;
  89. // add all items in current to doables array
  90. doables.addTail(recurrent);
  91. }
  92. }
  93. return doables.head;
  94. }
  95. // We do need to save every possible point, but the number of clone()
  96. // invocations here is really a killer for performance on non-stingy
  97. // repeat operators. I'm open to suggestions...
  98. // Hypothetical question: can you have a RE that matches 1 times,
  99. // 3 times, 5 times, but not 2 times or 4 times? Does having
  100. // the subexpression back-reference operator allow that?
  101. boolean match(CharIndexed input, REMatch mymatch) {
  102. boolean stopMatchingIfSatisfied =
  103. (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
  104. REMatch newMatch = matchMinimum(input, mymatch);
  105. if (newMatch == null) return false;
  106. // Array of positions we have already visited
  107. int[] visited = initVisited();
  108. for (REMatch m = newMatch; m != null; m = m.next) {
  109. visited = addVisited(m.index, visited);
  110. }
  111. int max1 = decreaseMax(max, min);
  112. newMatch = _match(input, newMatch, max1,
  113. stopMatchingIfSatisfied, visited);
  114. if (newMatch != null) {
  115. mymatch.assignFrom(newMatch);
  116. return true;
  117. }
  118. return false;
  119. }
  120. private static int decreaseMax(int m, int n) {
  121. if (m == Integer.MAX_VALUE) return m;
  122. return m - n;
  123. }
  124. // Array visited is an array of character positions we have already
  125. // visited. visited[0] is used to store the effective length of the
  126. // array.
  127. private static int[] initVisited() {
  128. int[] visited = new int[32];
  129. visited[0] = 0;
  130. return visited;
  131. }
  132. private static boolean visitedContains(int n, int[] visited) {
  133. // Experience tells that for a small array like this,
  134. // simple linear search is faster than binary search.
  135. for (int i = 1; i < visited[0]; i++) {
  136. if (n == visited[i]) return true;
  137. }
  138. return false;
  139. }
  140. private static int[] addVisited(int n, int[] visited) {
  141. if (visitedContains(n, visited)) return visited;
  142. if (visited[0] >= visited.length - 1) {
  143. int[] newvisited = new int[visited.length + 32];
  144. System.arraycopy(visited, 0, newvisited, 0, visited.length);
  145. visited = newvisited;
  146. }
  147. visited[0]++;
  148. visited[visited[0]] = n;
  149. return visited;
  150. }
  151. private REMatch _match(CharIndexed input, REMatch mymatch,
  152. int max1, boolean stopMatchingIfSatisfied,
  153. int[] visited) {
  154. if (max1 == 0) {
  155. return matchRest(input, mymatch);
  156. }
  157. max1 = decreaseMax(max1, 1);
  158. REMatch.REMatchList allResults = new REMatch.REMatchList();
  159. // Depth-first search
  160. for (REMatch cur = mymatch; cur != null; cur = cur.next) {
  161. REMatch cur1 = (REMatch) cur.clone();
  162. if (stingy) {
  163. REMatch results = matchRest(input, cur1);
  164. if (results != null) {
  165. if (stopMatchingIfSatisfied) {
  166. return results;
  167. }
  168. allResults.addTail(results);
  169. }
  170. }
  171. DO_THIS:
  172. do {
  173. boolean emptyMatchFound = false;
  174. REMatch doables = findDoables(token, input, cur1);
  175. if (doables == null) break DO_THIS;
  176. if (doables.empty) emptyMatchFound = true;
  177. if (!emptyMatchFound) {
  178. REMatch.REMatchList list = new REMatch.REMatchList();
  179. for (REMatch m = doables; m != null; m = m.next) {
  180. REMatch m1 = (REMatch) m.clone();
  181. int n = m1.index;
  182. if (! visitedContains(n, visited)) {
  183. visited = addVisited(n, visited);
  184. list.addTail(m1);
  185. }
  186. }
  187. if (list.head == null) break DO_THIS;
  188. doables = list.head;
  189. }
  190. for (REMatch m = doables; m != null; m = m.next) {
  191. if (! emptyMatchFound) {
  192. REMatch m1 = _match(input, m, max1,
  193. stopMatchingIfSatisfied, visited);
  194. if (possessive) return m1;
  195. if (m1 != null) {
  196. if (stopMatchingIfSatisfied) {
  197. return m1;
  198. }
  199. allResults.addTail(m1);
  200. }
  201. }
  202. else {
  203. REMatch m1 = matchRest(input, m);
  204. if (m1 != null) {
  205. if (stopMatchingIfSatisfied) {
  206. return m1;
  207. }
  208. allResults.addTail(m1);
  209. }
  210. }
  211. }
  212. } while (false); // DO_THIS only once;
  213. // This point itself is a candidate.
  214. if (!stingy) {
  215. REMatch m2 = matchRest(input, cur1);
  216. if (m2 != null) {
  217. if (stopMatchingIfSatisfied) {
  218. return m2;
  219. }
  220. allResults.addTail(m2);
  221. }
  222. }
  223. }
  224. return allResults.head;
  225. }
  226. private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) {
  227. // Possible positions for the next repeat to match at
  228. REMatch newMatch = mymatch;
  229. // number of times we've matched so far
  230. int numRepeats = 0;
  231. while (numRepeats < min) {
  232. REMatch doables = findDoables(token, input, newMatch);
  233. // if none of the possibilities worked out,
  234. // it means that minimum number of repeats could not be found.
  235. if (doables == null) return null;
  236. // reassign where the next repeat can match
  237. newMatch = doables;
  238. // increment how many repeats we've successfully found
  239. ++numRepeats;
  240. if (newMatch.empty) break;
  241. }
  242. return newMatch;
  243. }
  244. private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
  245. REMatch current, single;
  246. REMatch.REMatchList doneIndex = new REMatch.REMatchList();
  247. // Test all possible matches for this number of repeats
  248. for (current = newMatch; current != null; current = current.next) {
  249. // clone() separates a single match from the chain
  250. single = (REMatch) current.clone();
  251. if (next(input, single)) {
  252. // chain results to doneIndex
  253. doneIndex.addTail(single);
  254. }
  255. }
  256. return doneIndex.head;
  257. }
  258. void dump(StringBuffer os) {
  259. os.append("(?:");
  260. token.dumpAll(os);
  261. os.append(')');
  262. if ((max == Integer.MAX_VALUE) && (min <= 1))
  263. os.append( (min == 0) ? '*' : '+' );
  264. else if ((min == 0) && (max == 1))
  265. os.append('?');
  266. else {
  267. os.append('{').append(min);
  268. if (max > min) {
  269. os.append(',');
  270. if (max != Integer.MAX_VALUE) os.append(max);
  271. }
  272. os.append('}');
  273. }
  274. if (stingy) os.append('?');
  275. }
  276. }