/interpreter/tags/at2dist130208/src/edu/vub/util/regexp/RETokenRepeated.java
Java | 330 lines | 212 code | 48 blank | 70 comment | 79 complexity | ed07b97f569621ca384b513a2b54737c MD5 | raw file
1/* gnu/regexp/RETokenRepeated.java 2 Copyright (C) 2006 Free Software Foundation, Inc. 3 4This file is part of GNU Classpath. 5 6GNU Classpath is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License as published by 8the Free Software Foundation; either version 2, or (at your option) 9any later version. 10 11GNU Classpath is distributed in the hope that it will be useful, but 12WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14General Public License for more details. 15 16You should have received a copy of the GNU General Public License 17along with GNU Classpath; see the file COPYING. If not, write to the 18Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 1902110-1301 USA. 20 21Linking this library statically or dynamically with other modules is 22making a combined work based on this library. Thus, the terms and 23conditions of the GNU General Public License cover the whole 24combination. 25 26As a special exception, the copyright holders of this library give you 27permission to link this library with independent modules to produce an 28executable, regardless of the license terms of these independent 29modules, and to copy and distribute the resulting executable under 30terms of your choice, provided that you also meet, for each linked 31independent module, the terms and conditions of the license of that 32module. An independent module is a module which is not derived from 33or based on this library. If you modify this library, you may extend 34this exception to your version of the library, but you are not 35obligated to do so. If you do not wish to do so, delete this 36exception statement from your version. */ 37 38 39package edu.vub.util.regexp; 40 41final class RETokenRepeated extends REToken { 42 private REToken token; 43 private int min,max; 44 private boolean stingy; 45 private boolean possessive; 46 47 RETokenRepeated(int subIndex, REToken token, int min, int max) { 48 super(subIndex); 49 this.token = token; 50 this.min = min; 51 this.max = max; 52 } 53 54 /** Sets the minimal matching mode to true. */ 55 void makeStingy() { 56 stingy = true; 57 } 58 59 /** Queries if this token has minimal matching enabled. */ 60 boolean isStingy() { 61 return stingy; 62 } 63 64 /** Sets possessive matching mode to true. */ 65 void makePossessive() { 66 possessive = true; 67 } 68 69 /** Queries if this token has possessive matching enabled. */ 70 boolean isPossessive() { 71 return possessive; 72 } 73 74 /** 75 * The minimum length of a repeated token is the minimum length 76 * of the token multiplied by the minimum number of times it must 77 * match. 78 */ 79 int getMinimumLength() { 80 return (min * token.getMinimumLength()); 81 } 82 83 int getMaximumLength() { 84 if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE; 85 int tmax = token.getMaximumLength(); 86 if (tmax == Integer.MAX_VALUE) return tmax; 87 return (max * tmax); 88 } 89 90 private static REMatch findDoables(REToken tk, 91 CharIndexed input, REMatch mymatch) { 92 93 REMatch.REMatchList doables = new REMatch.REMatchList(); 94 95 // try next repeat at all possible positions 96 for (REMatch current = mymatch; 97 current != null; current = current.next) { 98 REMatch recurrent = (REMatch) current.clone(); 99 int origin = recurrent.index; 100 tk = (REToken) tk.clone(); 101 tk.next = tk.uncle = null; 102 recurrent.matchFlags |= REMatch.MF_FIND_ALL; 103 if (tk.match(input, recurrent)) { 104 for (REMatch m = recurrent; m != null; m = m.next) { 105 m.matchFlags &= ~REMatch.MF_FIND_ALL; 106 } 107 if (recurrent.index == origin) recurrent.empty = true; 108 // add all items in current to doables array 109 doables.addTail(recurrent); 110 } 111 } 112 return doables.head; 113 } 114 115 // We do need to save every possible point, but the number of clone() 116 // invocations here is really a killer for performance on non-stingy 117 // repeat operators. I'm open to suggestions... 118 119 // Hypothetical question: can you have a RE that matches 1 times, 120 // 3 times, 5 times, but not 2 times or 4 times? Does having 121 // the subexpression back-reference operator allow that? 122 123 boolean match(CharIndexed input, REMatch mymatch) { 124 125 boolean stopMatchingIfSatisfied = 126 (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0; 127 128 REMatch newMatch = matchMinimum(input, mymatch); 129 if (newMatch == null) return false; 130 131 // Array of positions we have already visited 132 int[] visited = initVisited(); 133 for (REMatch m = newMatch; m != null; m = m.next) { 134 visited = addVisited(m.index, visited); 135 } 136 137 int max1 = decreaseMax(max, min); 138 139 newMatch = _match(input, newMatch, max1, 140 stopMatchingIfSatisfied, visited); 141 if (newMatch != null) { 142 mymatch.assignFrom(newMatch); 143 return true; 144 } 145 return false; 146 } 147 148 private static int decreaseMax(int m, int n) { 149 if (m == Integer.MAX_VALUE) return m; 150 return m - n; 151 } 152 153 // Array visited is an array of character positions we have already 154 // visited. visited[0] is used to store the effective length of the 155 // array. 156 private static int[] initVisited() { 157 int[] visited = new int[32]; 158 visited[0] = 0; 159 return visited; 160 } 161 162 private static boolean visitedContains(int n, int[] visited) { 163 // Experience tells that for a small array like this, 164 // simple linear search is faster than binary search. 165 for (int i = 1; i < visited[0]; i++) { 166 if (n == visited[i]) return true; 167 } 168 return false; 169 } 170 171 private static int[] addVisited(int n, int[] visited) { 172 if (visitedContains(n, visited)) return visited; 173 if (visited[0] >= visited.length - 1) { 174 int[] newvisited = new int[visited.length + 32]; 175 System.arraycopy(visited, 0, newvisited, 0, visited.length); 176 visited = newvisited; 177 } 178 visited[0]++; 179 visited[visited[0]] = n; 180 return visited; 181 } 182 183 private REMatch _match(CharIndexed input, REMatch mymatch, 184 int max1, boolean stopMatchingIfSatisfied, 185 int[] visited) { 186 187 if (max1 == 0) { 188 return matchRest(input, mymatch); 189 } 190 max1 = decreaseMax(max1, 1); 191 192 REMatch.REMatchList allResults = new REMatch.REMatchList(); 193 194 // Depth-first search 195 196 for (REMatch cur = mymatch; cur != null; cur = cur.next) { 197 198 REMatch cur1 = (REMatch) cur.clone(); 199 200 if (stingy) { 201 REMatch results = matchRest(input, cur1); 202 if (results != null) { 203 if (stopMatchingIfSatisfied) { 204 return results; 205 } 206 allResults.addTail(results); 207 } 208 } 209 210 DO_THIS: 211 do { 212 213 boolean emptyMatchFound = false; 214 REMatch doables = findDoables(token, input, cur1); 215 if (doables == null) break DO_THIS; 216 if (doables.empty) emptyMatchFound = true; 217 218 if (!emptyMatchFound) { 219 REMatch.REMatchList list = new REMatch.REMatchList(); 220 for (REMatch m = doables; m != null; m = m.next) { 221 REMatch m1 = (REMatch) m.clone(); 222 int n = m1.index; 223 if (! visitedContains(n, visited)) { 224 visited = addVisited(n, visited); 225 list.addTail(m1); 226 } 227 } 228 if (list.head == null) break DO_THIS; 229 doables = list.head; 230 } 231 232 for (REMatch m = doables; m != null; m = m.next) { 233 if (! emptyMatchFound) { 234 REMatch m1 = _match(input, m, max1, 235 stopMatchingIfSatisfied, visited); 236 if (possessive) return m1; 237 if (m1 != null) { 238 if (stopMatchingIfSatisfied) { 239 return m1; 240 } 241 allResults.addTail(m1); 242 } 243 } 244 else { 245 REMatch m1 = matchRest(input, m); 246 if (m1 != null) { 247 if (stopMatchingIfSatisfied) { 248 return m1; 249 } 250 allResults.addTail(m1); 251 } 252 } 253 } 254 255 } while (false); // DO_THIS only once; 256 257 // This point itself is a candidate. 258 if (!stingy) { 259 REMatch m2 = matchRest(input, cur1); 260 if (m2 != null) { 261 if (stopMatchingIfSatisfied) { 262 return m2; 263 } 264 allResults.addTail(m2); 265 } 266 } 267 } 268 269 return allResults.head; 270 } 271 272 private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) { 273 // Possible positions for the next repeat to match at 274 REMatch newMatch = mymatch; 275 276 // number of times we've matched so far 277 int numRepeats = 0; 278 279 while (numRepeats < min) { 280 REMatch doables = findDoables(token, input, newMatch); 281 282 // if none of the possibilities worked out, 283 // it means that minimum number of repeats could not be found. 284 if (doables == null) return null; 285 286 // reassign where the next repeat can match 287 newMatch = doables; 288 289 // increment how many repeats we've successfully found 290 ++numRepeats; 291 292 if (newMatch.empty) break; 293 } 294 return newMatch; 295 } 296 297 private REMatch matchRest(CharIndexed input, final REMatch newMatch) { 298 REMatch current, single; 299 REMatch.REMatchList doneIndex = new REMatch.REMatchList(); 300 // Test all possible matches for this number of repeats 301 for (current = newMatch; current != null; current = current.next) { 302 // clone() separates a single match from the chain 303 single = (REMatch) current.clone(); 304 if (next(input, single)) { 305 // chain results to doneIndex 306 doneIndex.addTail(single); 307 } 308 } 309 return doneIndex.head; 310 } 311 312 void dump(StringBuffer os) { 313 os.append("(?:"); 314 token.dumpAll(os); 315 os.append(')'); 316 if ((max == Integer.MAX_VALUE) && (min <= 1)) 317 os.append( (min == 0) ? '*' : '+' ); 318 else if ((min == 0) && (max == 1)) 319 os.append('?'); 320 else { 321 os.append('{').append(min); 322 if (max > min) { 323 os.append(','); 324 if (max != Integer.MAX_VALUE) os.append(max); 325 } 326 os.append('}'); 327 } 328 if (stingy) os.append('?'); 329 } 330}