PageRenderTime 59ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/src/ikj/main/org/jregex/Matcher.java

https://github.com/olabini/ioke
Java | 2298 lines | 1603 code | 195 blank | 500 comment | 321 complexity | 0d0e7c3b6f93f4dbb8346221df8dd5ab MD5 | raw file
Possible License(s): BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /**
  2. * Copyright (c) 2001, Sergey A. Samokhodkin
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without modification,
  6. * are permitted provided that the following conditions are met:
  7. *
  8. * - Redistributions of source code must retain the above copyright notice,
  9. * this list of conditions and the following disclaimer.
  10. * - Redistributions in binary form
  11. * must reproduce the above copyright notice, this list of conditions and the following
  12. * disclaimer in the documentation and/or other materials provided with the distribution.
  13. * - Neither the name of jregex nor the names of its contributors may be used
  14. * to endorse or promote products derived from this software without specific prior
  15. * written permission.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
  18. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20. * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  21. * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  23. * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  25. * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. *
  27. * @version 1.2_01
  28. */
  29. package org.jregex;
  30. import java.util.*;
  31. import java.io.*;
  32. import org.jregex.Term.TermType;
  33. /**
  34. * Matcher instance is an automaton that actually performs matching. It provides the following methods:
  35. * <li> searching for a matching substrings : matcher.find() or matcher.findAll();
  36. * <li> testing whether a text matches a whole pattern : matcher.matches();
  37. * <li> testing whether the text matches the beginning of a pattern : matcher.matchesPrefix();
  38. * <li> searching with custom options : matcher.find(int options)
  39. * <p>
  40. * <b>Obtaining results</b><br>
  41. * After the search succeded, i.e. if one of above methods returned <code>true</code>
  42. * one may obtain an information on the match:
  43. * <li> may check whether some group is captured : matcher.isCaptured(int);
  44. * <li> may obtain start and end positions of the match and its length : matcher.start(int),matcher.end(int),matcher.length(int);
  45. * <li> may obtain match contents as String : matcher.group(int).<br>
  46. * The same way can be obtained the match prefix and suffix information.
  47. * The appropriate methods are grouped in MatchResult interface, which the Matcher class implements.<br>
  48. * Matcher objects are not thread-safe, so only one thread may use a matcher instance at a time.
  49. * Note, that Pattern objects are thread-safe(the same instanse may be shared between
  50. * multiple threads), and the typical tactics in multithreaded applications is to have one Pattern instance per expression(a singleton),
  51. * and one Matcher object per thread.
  52. */
  53. public class Matcher implements MatchResult{
  54. /* Matching options*/
  55. /**
  56. * The same effect as "^" without REFlags.MULTILINE.
  57. * @see Matcher#find(int)
  58. */
  59. public static final int ANCHOR_START=1;
  60. /**
  61. * The same effect as "\\G".
  62. * @see Matcher#find(int)
  63. */
  64. public static final int ANCHOR_LASTMATCH=2;
  65. /**
  66. * The same effect as "$" without REFlags.MULTILINE.
  67. * @see Matcher#find(int)
  68. */
  69. public static final int ANCHOR_END=4;
  70. /**
  71. * Experimental option; if a text ends up before the end of a pattern,report a match.
  72. * @see Matcher#find(int)
  73. */
  74. public static final int ACCEPT_INCOMPLETE=8;
  75. //see search(ANCHOR_START|...)
  76. private static Term startAnchor=new Term(TermType.START);
  77. //see search(ANCHOR_LASTMATCH|...)
  78. private static Term lastMatchAnchor=new Term(TermType.LAST_MATCH_END);
  79. private Pattern re;
  80. private int[] counters;
  81. private MemReg[] memregs;
  82. private LAEntry[] lookaheads;
  83. private int counterCount;
  84. private int memregCount;
  85. private int lookaheadCount;
  86. private char[] data;
  87. private int offset,end,wOffset,wEnd;
  88. private boolean shared;
  89. private SearchEntry top; //stack entry
  90. private SearchEntry first; //object pool entry
  91. private SearchEntry defaultEntry; //called when moving the window
  92. private boolean called;
  93. private int minQueueLength;
  94. private String cache;
  95. //cache may be longer than the actual data
  96. //and contrariwise; so cacheOffset may have both signs.
  97. //cacheOffset is actually -(data offset).
  98. private int cacheOffset,cacheLength;
  99. private MemReg prefixBounds,suffixBounds,targetBounds;
  100. Matcher(Pattern regex){
  101. this.re=regex;
  102. //int memregCount=(memregs=new MemReg[regex.memregs]).length;
  103. //for(int i=0;i<memregCount;i++){
  104. // this.memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition
  105. //}
  106. //counters=new int[regex.counters];
  107. //int lookaheadCount=(lookaheads=new LAEntry[regex.lookaheads]).length;
  108. //for(int i=0;i<lookaheadCount;i++){
  109. // this.lookaheads[i]=new LAEntry();
  110. //}
  111. int memregCount,counterCount,lookaheadCount;
  112. if((memregCount=regex.memregs)>0){
  113. MemReg[] memregs=new MemReg[memregCount];
  114. for(int i=0;i<memregCount;i++){
  115. memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition
  116. }
  117. this.memregs=memregs;
  118. }
  119. if((counterCount=regex.counters)>0) counters=new int[counterCount];
  120. if((lookaheadCount=regex.lookaheads)>0){
  121. LAEntry[] lookaheads=new LAEntry[lookaheadCount];
  122. for(int i=0;i<lookaheadCount;i++){
  123. lookaheads[i]=new LAEntry();
  124. }
  125. this.lookaheads=lookaheads;
  126. }
  127. this.memregCount=memregCount;
  128. this.counterCount=counterCount;
  129. this.lookaheadCount=lookaheadCount;
  130. first=new SearchEntry();
  131. defaultEntry=new SearchEntry();
  132. minQueueLength=regex.stringRepr.length()/2; // just evaluation!!!
  133. }
  134. /**
  135. * This method allows to efficiently pass data between matchers.
  136. * Note that a matcher may pass data to itself:<pre>
  137. * Matcher m=new Pattern("\\w+").matcher(myString);
  138. * if(m.find())m.setTarget(m,m.SUFFIX); //forget all that is not a suffix
  139. * </pre>
  140. * Resets current search position to zero.
  141. * @param m - a matcher that is a source of data
  142. * @param groupId - which group to take data from
  143. * @see Matcher#setTarget(java.lang.String)
  144. * @see Matcher#setTarget(java.lang.String,int,int)
  145. * @see Matcher#setTarget(char[],int,int)
  146. * @see Matcher#setTarget(java.io.Reader,int)
  147. */
  148. public final void setTarget(Matcher m, int groupId){
  149. MemReg mr=m.bounds(groupId);
  150. //System.out.println("setTarget("+m+","+groupId+")");
  151. //System.out.println(" in="+mr.in);
  152. //System.out.println(" out="+mr.out);
  153. if(mr==null) throw new IllegalArgumentException("group #"+groupId+" is not assigned");
  154. data=m.data;
  155. offset=mr.in;
  156. end=mr.out;
  157. cache=m.cache;
  158. cacheLength=m.cacheLength;
  159. cacheOffset=m.cacheOffset;
  160. if(m!=this){
  161. shared=true;
  162. m.shared=true;
  163. }
  164. init();
  165. }
  166. /**
  167. * Supplies a text to search in/match with.
  168. * Resets current search position to zero.
  169. * @param text - a data
  170. * @see Matcher#setTarget(jregex.Matcher,int)
  171. * @see Matcher#setTarget(java.lang.String,int,int)
  172. * @see Matcher#setTarget(char[],int,int)
  173. * @see Matcher#setTarget(java.io.Reader,int)
  174. */
  175. public void setTarget(String text){
  176. setTarget(text,0,text.length());
  177. }
  178. /**
  179. * Supplies a text to search in/match with, as a part of String.
  180. * Resets current search position to zero.
  181. * @param text - a data source
  182. * @param start - where the target starts
  183. * @param len - how long is the target
  184. * @see Matcher#setTarget(jregex.Matcher,int)
  185. * @see Matcher#setTarget(java.lang.String)
  186. * @see Matcher#setTarget(char[],int,int)
  187. * @see Matcher#setTarget(java.io.Reader,int)
  188. */
  189. public void setTarget(String text,int start,int len){
  190. char[] mychars=data;
  191. if(mychars==null || shared || mychars.length<len){
  192. data=mychars=new char[(int)(1.7f*len)];
  193. shared=false;
  194. }
  195. text.getChars(start,len,mychars,0); //(srcBegin,srcEnd,dst[],dstBegin)
  196. offset=0;
  197. end=len;
  198. cache=text;
  199. cacheOffset=-start;
  200. cacheLength=text.length();
  201. init();
  202. }
  203. /**
  204. * Supplies a text to search in/match with, as a part of char array.
  205. * Resets current search position to zero.
  206. * @param text - a data source
  207. * @param start - where the target starts
  208. * @param len - how long is the target
  209. * @see Matcher#setTarget(jregex.Matcher,int)
  210. * @see Matcher#setTarget(java.lang.String)
  211. * @see Matcher#setTarget(java.lang.String,int,int)
  212. * @see Matcher#setTarget(java.io.Reader,int)
  213. */
  214. public void setTarget(char[] text,int start,int len){
  215. setTarget(text,start,len,true);
  216. }
  217. /**
  218. * To be used with much care.
  219. * Supplies a text to search in/match with, as a part of a char array, as above, but also allows to permit
  220. * to use the array as internal buffer for subsequent inputs. That is, if we call it with <code>shared=false</code>:<pre>
  221. * myMatcher.setTarget(myCharArray,x,y,<b>false</b>); //we declare that array contents is NEITHER shared NOR will be used later, so may modifications on it are permitted
  222. * </pre>
  223. * then we should expect the array contents to be changed on subsequent setTarget(..) operations.
  224. * Such method may yield some increase in perfomanse in the case of multiple setTarget() calls.
  225. * Resets current search position to zero.
  226. * @param text - a data source
  227. * @param start - where the target starts
  228. * @param len - how long is the target
  229. * @param shared - if <code>true<code>: data are shared or used later, <b>don't</b> modify it; if <code>false<code>: possible modifications of the text on subsequent <code>setTarget()</code> calls are perceived and allowed.
  230. * @see Matcher#setTarget(jregex.Matcher,int)
  231. * @see Matcher#setTarget(java.lang.String)
  232. * @see Matcher#setTarget(java.lang.String,int,int)
  233. * @see Matcher#setTarget(char[],int,int)
  234. * @see Matcher#setTarget(java.io.Reader,int)
  235. */
  236. public final void setTarget(char[] text,int start,int len,boolean shared){
  237. cache=null;
  238. data=text;
  239. offset=start;
  240. end=start+len;
  241. this.shared=shared;
  242. init();
  243. }
  244. /**
  245. * Supplies a text to search in/match with through a stream.
  246. * Resets current search position to zero.
  247. * @param in - a data stream;
  248. * @param len - how much characters should be read; if len is -1, read the entire stream.
  249. * @see Matcher#setTarget(jregex.Matcher,int)
  250. * @see Matcher#setTarget(java.lang.String)
  251. * @see Matcher#setTarget(java.lang.String,int,int)
  252. * @see Matcher#setTarget(char[],int,int)
  253. */
  254. public void setTarget(Reader in,int len)throws IOException{
  255. if(len<0){
  256. setAll(in);
  257. return;
  258. }
  259. char[] mychars=data;
  260. boolean shared=this.shared;
  261. if(mychars==null || shared || mychars.length<len){
  262. mychars=new char[len];
  263. shared=false;
  264. }
  265. int count=0;
  266. int c;
  267. while((c=in.read(mychars,count,len))>=0){
  268. len-=c;
  269. count+=c;
  270. if(len==0) break;
  271. }
  272. setTarget(mychars,0,count,shared);
  273. }
  274. private void setAll(Reader in)throws IOException{
  275. char[] mychars=data;
  276. int free;
  277. boolean shared=this.shared;
  278. if(mychars==null || shared){
  279. mychars=new char[free=1024];
  280. shared=false;
  281. }
  282. else free=mychars.length;
  283. int count=0;
  284. int c;
  285. while((c=in.read(mychars,count,free))>=0){
  286. free-=c;
  287. count+=c;
  288. if(free==0){
  289. int newsize=count*3;
  290. char[] newchars=new char[newsize];
  291. System.arraycopy(mychars,0,newchars,0,count);
  292. mychars=newchars;
  293. free=newsize-count;
  294. shared=false;
  295. }
  296. }
  297. setTarget(mychars,0,count,shared);
  298. }
  299. private final String getString(int start,int end){
  300. String src=cache;
  301. if(src!=null){
  302. int co=cacheOffset;
  303. return src.substring(start-co,end-co);
  304. }
  305. int tOffset,tEnd,tLen=(tEnd=this.end)-(tOffset=this.offset);
  306. char[] data=this.data;
  307. if((end-start)>=(tLen/3)){
  308. //it makes sence to make a cache
  309. cache=src=new String(data,tOffset,tLen);
  310. cacheOffset=tOffset;
  311. cacheLength=tLen;
  312. return src.substring(start-tOffset,end-tOffset);
  313. }
  314. return new String(data,start,end-start);
  315. }
  316. /* Matching */
  317. /**
  318. * Tells whether the entire target matches the beginning of the pattern.
  319. * The whole pattern is also regarded as its beginning.<br>
  320. * This feature allows to find a mismatch by examining only a beginning part of
  321. * the target (as if the beginning of the target doesn't match the beginning of the pattern, then the entire target
  322. * also couldn't match).<br>
  323. * For example the following assertions yield <code>true<code>:<pre>
  324. * Pattern p=new Pattern("abcd");
  325. * p.matcher("").matchesPrefix();
  326. * p.matcher("a").matchesPrefix();
  327. * p.matcher("ab").matchesPrefix();
  328. * p.matcher("abc").matchesPrefix();
  329. * p.matcher("abcd").matchesPrefix();
  330. * </pre>
  331. * and the following yield <code>false<code>:<pre>
  332. * p.matcher("b").isPrefix();
  333. * p.matcher("abcdef").isPrefix();
  334. * p.matcher("x").isPrefix();
  335. * </pre>
  336. * @return true if the entire target matches the beginning of the pattern
  337. */
  338. public final boolean matchesPrefix(){
  339. setPosition(0);
  340. return search(ANCHOR_START|ACCEPT_INCOMPLETE|ANCHOR_END);
  341. }
  342. /**
  343. * Just an old name for isPrefix().<br>
  344. * Retained for backwards compatibility.
  345. * @deprecated Replaced by isPrefix()
  346. */
  347. public final boolean isStart(){
  348. return matchesPrefix();
  349. }
  350. /**
  351. * Tells whether a current target matches the whole pattern.
  352. * For example the following yields the <code>true<code>:<pre>
  353. * Pattern p=new Pattern("\\w+");
  354. * p.matcher("a").matches();
  355. * p.matcher("ab").matches();
  356. * p.matcher("abc").matches();
  357. * </pre>
  358. * and the following yields the <code>false<code>:<pre>
  359. * p.matcher("abc def").matches();
  360. * p.matcher("bcd ").matches();
  361. * p.matcher(" bcd").matches();
  362. * p.matcher("#xyz#").matches();
  363. * </pre>
  364. * @return whether a current target matches the whole pattern.
  365. */
  366. public final boolean matches(){
  367. if(called) setPosition(0);
  368. return search(ANCHOR_START|ANCHOR_END);
  369. }
  370. /**
  371. * Just a combination of setTarget(String) and matches().
  372. * @param s the target string;
  373. * @return whether the specified string matches the whole pattern.
  374. */
  375. public final boolean matches(String s){
  376. setTarget(s);
  377. return search(ANCHOR_START|ANCHOR_END);
  378. }
  379. /**
  380. * Allows to set a position the subsequent find()/find(int) will start from.
  381. * @param pos the position to start from;
  382. * @see Matcher#find()
  383. * @see Matcher#find(int)
  384. */
  385. public void setPosition(int pos){
  386. wOffset=offset+pos;
  387. wEnd=-1;
  388. called=false;
  389. flush();
  390. }
  391. public void setOffset(int offset){
  392. this.offset = offset;
  393. wOffset=offset;
  394. wEnd=-1;
  395. called=false;
  396. flush();
  397. }
  398. /**
  399. * Searches through a target for a matching substring, starting from just after the end of last match.
  400. * If there wasn't any search performed, starts from zero.
  401. * @return <code>true</code> if a match found.
  402. */
  403. public final boolean find(){
  404. if(called) skip();
  405. return search(0);
  406. }
  407. /**
  408. * Searches through a target for a matching substring, starting from just after the end of last match.
  409. * If there wasn't any search performed, starts from zero.
  410. * @param anchors a zero or a combination(bitwise OR) of ANCHOR_START,ANCHOR_END,ANCHOR_LASTMATCH,ACCEPT_INCOMPLETE
  411. * @return <code>true</code> if a match found.
  412. */
  413. public final boolean find(int anchors){
  414. if(called) skip();
  415. return search(anchors);
  416. }
  417. /**
  418. * The same as findAll(int), but with default behaviour;
  419. */
  420. public MatchIterator findAll(){
  421. return findAll(0);
  422. }
  423. /**
  424. * Returns an iterator over the matches found by subsequently calling find(options), the search starts from the zero position.
  425. */
  426. public MatchIterator findAll(final int options){
  427. //setPosition(0);
  428. return new MatchIterator(){
  429. private boolean checked=false;
  430. private boolean hasMore=false;
  431. public boolean hasMore(){
  432. if(!checked) check();
  433. return hasMore;
  434. }
  435. public MatchResult nextMatch(){
  436. if(!checked) check();
  437. if(!hasMore) throw new NoSuchElementException();
  438. checked=false;
  439. return Matcher.this;
  440. }
  441. private final void check(){
  442. hasMore=find(options);
  443. checked=true;
  444. }
  445. public int count(){
  446. if(!checked) check();
  447. if(!hasMore) return 0;
  448. int c=1;
  449. while(find(options))c++;
  450. checked=false;
  451. return c;
  452. }
  453. };
  454. }
  455. /**
  456. * Continues to search from where the last search left off.
  457. * The same as proceed(0).
  458. * @see Matcher#proceed(int)
  459. */
  460. public final boolean proceed(){
  461. return proceed(0);
  462. }
  463. /**
  464. * Continues to search from where the last search left off using specified options:<pre>
  465. * Matcher m=new Pattern("\\w+").matcher("abc");
  466. * while(m.proceed(0)){
  467. * System.out.println(m.group(0));
  468. * }
  469. * </pre>
  470. * Output:<pre>
  471. * abc
  472. * ab
  473. * a
  474. * bc
  475. * b
  476. * c
  477. * </pre>
  478. * For example, let's find all odd nubmers occuring in a text:<pre>
  479. * Matcher m=new Pattern("\\d+").matcher("123");
  480. * while(m.proceed(0)){
  481. * String match=m.group(0);
  482. * if(isOdd(Integer.parseInt(match))) System.out.println(match);
  483. * }
  484. *
  485. * static boolean isOdd(int i){
  486. * return (i&1)>0;
  487. * }
  488. * </pre>
  489. * This outputs:<pre>
  490. * 123
  491. * 1
  492. * 23
  493. * 3
  494. * </pre>
  495. * Note that using <code>find()</code> method we would find '123' only.
  496. * @param options search options, some of ANCHOR_START|ANCHOR_END|ANCHOR_LASTMATCH|ACCEPT_INCOMPLETE; zero value(default) stands for usual search for substring.
  497. */
  498. public final boolean proceed(int options){
  499. //System.out.println("next() : top="+top);
  500. if(called){
  501. if(top==null){
  502. wOffset++;
  503. }
  504. }
  505. return search(0);
  506. }
  507. /**
  508. * Sets the current search position just after the end of last match.
  509. */
  510. public final void skip(){
  511. int we=wEnd;
  512. if(wOffset==we){ //requires special handling
  513. //if no variants at 'wOutside',advance pointer and clear
  514. if(top==null){
  515. wOffset++;
  516. flush();
  517. }
  518. //otherwise, if there exist a variant,
  519. //don't clear(), i.e. allow it to match
  520. return;
  521. }
  522. else{
  523. if(we<0) wOffset=0;
  524. else wOffset=we;
  525. }
  526. //rflush(); //rflush() works faster on simple regexes (with a small group/branch number)
  527. flush();
  528. }
  529. private final void init(){
  530. //wOffset=-1;
  531. //System.out.println("init(): offset="+offset+", end="+end);
  532. wOffset=offset;
  533. wEnd=-1;
  534. called=false;
  535. flush();
  536. }
  537. /**
  538. * Resets the internal state.
  539. */
  540. private final void flush(){
  541. top=null;
  542. defaultEntry.reset(0);
  543. /*
  544. int c=0;
  545. SearchEntry se=first;
  546. while(se!=null){
  547. c++;
  548. se=se.on;
  549. }
  550. System.out.println("queue: allocated="+c+", truncating to "+minQueueLength);
  551. new Exception().printStackTrace();
  552. */
  553. first.reset(minQueueLength);
  554. //first.reset(0);
  555. for(int i=memregs.length-1;i>0;i--){
  556. MemReg mr=memregs[i];
  557. mr.in=mr.out=-1;
  558. }
  559. for(int i=memregs.length-1;i>0;i--){
  560. MemReg mr=memregs[i];
  561. mr.in=mr.out=-1;
  562. }
  563. called=false;
  564. }
  565. //reverse flush
  566. //may work significantly faster,
  567. //need testing
  568. private final void rflush(){
  569. SearchEntry entry=top;
  570. top=null;
  571. MemReg[] memregs=this.memregs;
  572. int[] counters=this.counters;
  573. while(entry!=null){
  574. SearchEntry next=entry.sub;
  575. SearchEntry.popState(entry,memregs,counters);
  576. entry=next;
  577. }
  578. SearchEntry.popState(defaultEntry,memregs,counters);
  579. }
  580. /**
  581. */
  582. public String toString(){
  583. return getString(wOffset,wEnd);
  584. }
  585. public Pattern pattern(){
  586. return re;
  587. }
  588. public String target(){
  589. return getString(offset,end);
  590. }
  591. /**
  592. */
  593. public char[] targetChars(){
  594. shared=true;
  595. return data;
  596. }
  597. /**
  598. */
  599. public int targetStart(){
  600. return offset;
  601. }
  602. /**
  603. */
  604. public int targetEnd(){
  605. return end;
  606. }
  607. public char charAt(int i){
  608. int in=this.wOffset;
  609. int out=this.wEnd;
  610. if(in<0 || out<in) throw new IllegalStateException("unassigned");
  611. return data[in+i];
  612. }
  613. public char charAt(int i,int groupId){
  614. MemReg mr=bounds(groupId);
  615. if(mr==null) throw new IllegalStateException("group #"+groupId+" is not assigned");
  616. int in=mr.in;
  617. if(i<0 || i>(mr.out-in)) throw new StringIndexOutOfBoundsException(""+i);
  618. return data[in+i];
  619. }
  620. public final int length(){
  621. return wEnd-wOffset;
  622. }
  623. /**
  624. */
  625. public final int start(){
  626. return wOffset-offset;
  627. }
  628. /**
  629. */
  630. public final int end(){
  631. return wEnd-offset;
  632. }
  633. /**
  634. */
  635. public String prefix(){
  636. return getString(offset,wOffset);
  637. }
  638. /**
  639. */
  640. public String suffix(){
  641. return getString(wEnd,end);
  642. }
  643. /**
  644. */
  645. public int groupCount(){
  646. return memregs.length;
  647. }
  648. /**
  649. */
  650. public String group(int n){
  651. MemReg mr=bounds(n);
  652. if(mr==null) return null;
  653. return getString(mr.in,mr.out);
  654. }
  655. /**
  656. */
  657. public String group(String name){
  658. Integer id=re.groupId(name);
  659. if(id==null) throw new IllegalArgumentException("<"+name+"> isn't defined");
  660. return group(id.intValue());
  661. }
  662. /**
  663. */
  664. public boolean getGroup(int n,TextBuffer tb){
  665. MemReg mr=bounds(n);
  666. if(mr==null) return false;
  667. int in;
  668. tb.append(data,in=mr.in,mr.out-in);
  669. return true;
  670. }
  671. /**
  672. */
  673. public boolean getGroup(String name,TextBuffer tb){
  674. Integer id=re.groupId(name);
  675. if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\"");
  676. return getGroup(id.intValue(),tb);
  677. }
  678. /**
  679. */
  680. public boolean getGroup(int n,StringBuffer sb){
  681. MemReg mr=bounds(n);
  682. if(mr==null) return false;
  683. int in;
  684. sb.append(data,in=mr.in,mr.out-in);
  685. return true;
  686. }
  687. /**
  688. */
  689. public boolean getGroup(String name,StringBuffer sb){
  690. Integer id=re.groupId(name);
  691. if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\"");
  692. return getGroup(id.intValue(),sb);
  693. }
  694. /**
  695. */
  696. public String[] groups(){
  697. MemReg[] memregs=this.memregs;
  698. String[] groups=new String[memregs.length];
  699. int in,out;
  700. MemReg mr;
  701. for(int i=0;i<memregs.length;i++){
  702. in=(mr=memregs[i]).in;
  703. out=mr.out;
  704. if((in=mr.in)<0 || mr.out<in) continue;
  705. groups[i]=getString(in,out);
  706. }
  707. return groups;
  708. }
  709. /**
  710. */
  711. public Vector groupv(){
  712. MemReg[] memregs=this.memregs;
  713. Vector v=new Vector();
  714. int in,out;
  715. MemReg mr;
  716. for(int i=0;i<memregs.length;i++){
  717. mr=bounds(i);
  718. if(mr==null){
  719. v.addElement("empty");
  720. continue;
  721. }
  722. String s=getString(mr.in,mr.out);
  723. v.addElement(s);
  724. }
  725. return v;
  726. }
  727. private final MemReg bounds(int id){
  728. //System.out.println("Matcher.bounds("+id+"):");
  729. MemReg mr;
  730. if(id>=0){
  731. mr=memregs[id];
  732. }
  733. else switch(id){
  734. case PREFIX:
  735. mr=prefixBounds;
  736. if(mr==null) prefixBounds=mr=new MemReg(PREFIX);
  737. mr.in=offset;
  738. mr.out=wOffset;
  739. break;
  740. case SUFFIX:
  741. mr=suffixBounds;
  742. if(mr==null) suffixBounds=mr=new MemReg(SUFFIX);
  743. mr.in=wEnd;
  744. mr.out=end;
  745. break;
  746. case TARGET:
  747. mr=targetBounds;
  748. if(mr==null) targetBounds=mr=new MemReg(TARGET);
  749. mr.in=offset;
  750. mr.out=end;
  751. break;
  752. default:
  753. throw new IllegalArgumentException("illegal group id: "+id+"; must either nonnegative int, or MatchResult.PREFIX, or MatchResult.SUFFIX");
  754. }
  755. //System.out.println(" mr=["+mr.in+","+mr.out+"]");
  756. int in;
  757. if((in=mr.in)<0 || mr.out<in) return null;
  758. return mr;
  759. }
  760. /**
  761. */
  762. public final boolean isCaptured(){
  763. return wOffset>=0 && wEnd>=wOffset;
  764. }
  765. /**
  766. */
  767. public final boolean isCaptured(int id){
  768. return bounds(id)!=null;
  769. }
  770. /**
  771. */
  772. public final boolean isCaptured(String groupName){
  773. Integer id=re.groupId(groupName);
  774. if(id==null) throw new IllegalArgumentException("unknown group: \""+groupName+"\"");
  775. return isCaptured(id.intValue());
  776. }
  777. /**
  778. */
  779. public final int length(int id){
  780. MemReg mr=bounds(id);
  781. return mr.out-mr.in;
  782. }
  783. /**
  784. */
  785. public final int start(int id){
  786. return bounds(id).in-offset;
  787. }
  788. /**
  789. */
  790. public final int end(int id){
  791. return bounds(id).out-offset;
  792. }
  793. private final boolean search(int anchors){
  794. called=true;
  795. final int end=this.end;
  796. int offset=this.offset;
  797. char[] data=this.data;
  798. int wOffset=this.wOffset;
  799. int wEnd=this.wEnd;
  800. MemReg[] memregs=this.memregs;
  801. int[] counters=this.counters;
  802. LAEntry[] lookaheads=this.lookaheads;
  803. //int memregCount=memregs.length;
  804. //int cntCount=counters.length;
  805. int memregCount=this.memregCount;
  806. int cntCount=this.counterCount;
  807. SearchEntry defaultEntry=this.defaultEntry;
  808. SearchEntry first=this.first;
  809. SearchEntry top=this.top;
  810. SearchEntry actual=null;
  811. int cnt,regLen;
  812. int i;
  813. final boolean matchEnd=(anchors&ANCHOR_END)>0;
  814. final boolean allowIncomplete=(anchors&ACCEPT_INCOMPLETE)>0;
  815. Pattern re=this.re;
  816. Term root=re.root;
  817. Term term;
  818. if(top==null){
  819. if((anchors&ANCHOR_START)>0){
  820. term=re.root0; //raw root
  821. root=startAnchor;
  822. }
  823. else if((anchors&ANCHOR_LASTMATCH)>0){
  824. term=re.root0; //raw root
  825. root=lastMatchAnchor;
  826. }
  827. else{
  828. term=root; //optimized root
  829. }
  830. i=wOffset;
  831. actual=first;
  832. SearchEntry.popState(defaultEntry,memregs,counters);
  833. }
  834. else{
  835. top=(actual=top).sub;
  836. term=actual.term;
  837. i=actual.index;
  838. SearchEntry.popState(actual,memregs,counters);
  839. }
  840. cnt=actual.cnt;
  841. regLen=actual.regLen;
  842. main:
  843. while(wOffset<=end){
  844. matchHere:
  845. for(;;){
  846. /*
  847. System.out.print("char: "+i+", term: ");
  848. System.out.print(term.toString());
  849. System.out.print(" // mrs:{");
  850. for(int dbi=0;dbi<memregs.length;dbi++){
  851. System.out.print('[');
  852. System.out.print(memregs[dbi].in);
  853. System.out.print(',');
  854. System.out.print(memregs[dbi].out);
  855. System.out.print(']');
  856. System.out.print(' ');
  857. }
  858. System.out.print("}, crs:{");
  859. for(int dbi=0;dbi<counters.length;dbi++){
  860. System.out.print(counters[dbi]);
  861. if(dbi<counters.length-1)System.out.print(',');
  862. }
  863. System.out.println("}");
  864. */
  865. int memreg,cntreg;
  866. char c;
  867. switch(term.type){
  868. case FIND:{
  869. int jump=find(data,i+term.distance,end,term.target); //don't eat the last match
  870. if(jump<0) break main; //return false
  871. i+=jump;
  872. wOffset=i; //force window to move
  873. if(term.eat){
  874. if(i==end) break;
  875. i++;
  876. }
  877. term=term.next;
  878. continue matchHere;
  879. }
  880. case FINDREG:{
  881. MemReg mr=memregs[term.target.memreg];
  882. int sampleOff=mr.in;
  883. int sampleLen=mr.out-sampleOff;
  884. //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
  885. /*@since 1.2*/
  886. if(sampleOff<0 || sampleLen<0){
  887. break;
  888. }
  889. else if(sampleLen==0){
  890. term=term.next;
  891. continue matchHere;
  892. }
  893. int jump=findReg(data,i+term.distance,sampleOff,sampleLen,term.target,end); //don't eat the last match
  894. if(jump<0) break main; //return false
  895. i+=jump;
  896. wOffset=i; //force window to move
  897. if(term.eat){
  898. i+=sampleLen;
  899. if(i>end) break;
  900. }
  901. term=term.next;
  902. continue matchHere;
  903. }
  904. case VOID:
  905. term=term.next;
  906. continue matchHere;
  907. case CHAR:
  908. //can only be 1-char-wide
  909. // \/
  910. if(i>=end || data[i]!=term.c) break;
  911. //System.out.println("CHAR: "+data[i]+", i="+i);
  912. i++;
  913. term=term.next;
  914. continue matchHere;
  915. case ANY_CHAR:
  916. //can only be 1-char-wide
  917. // \/
  918. if(i>=end) break;
  919. i++;
  920. term=term.next;
  921. continue matchHere;
  922. case ANY_CHAR_NE:
  923. //can only be 1-char-wide
  924. // \/
  925. if(i>=end || data[i]=='\n') break;
  926. i++;
  927. term=term.next;
  928. continue matchHere;
  929. case END:
  930. if(i>=end){ //meets
  931. term=term.next;
  932. continue matchHere;
  933. }
  934. break;
  935. case END_EOL: //perl's $
  936. if(i>=end){ //meets
  937. term=term.next;
  938. continue matchHere;
  939. }
  940. else{
  941. boolean matches=
  942. i>=end |
  943. ((i+1)==end && data[i]=='\n');
  944. if(matches){
  945. term=term.next;
  946. continue matchHere;
  947. }
  948. else break;
  949. }
  950. case LINE_END:
  951. if(i>=end){ //meets
  952. term=term.next;
  953. continue matchHere;
  954. }
  955. else{
  956. /*
  957. if(((c=data[i])=='\r' || c=='\n') &&
  958. (c=data[i-1])!='\r' && c!='\n'){
  959. term=term.next;
  960. continue matchHere;
  961. }
  962. */
  963. //5 aug 2001
  964. if(data[i]=='\n'){
  965. term=term.next;
  966. continue matchHere;
  967. }
  968. }
  969. break;
  970. case START: //Perl's "^"
  971. if(i==offset){ //meets
  972. term=term.next;
  973. continue matchHere;
  974. }
  975. //break;
  976. //changed on 27-04-2002
  977. //due to a side effect: if ALLOW_INCOMPLETE is enabled,
  978. //the anchorStart moves up to the end and succeeds
  979. //(see comments at the last lines of matchHere, ~line 1830)
  980. //Solution: if there are some entries on the stack ("^a|b$"),
  981. //try them; otherwise it's a final 'no'
  982. //if(top!=null) break;
  983. //else break main;
  984. //changed on 25-05-2002
  985. //rationale: if the term is startAnchor,
  986. //it's the root term by definition,
  987. //so if it doesn't match, the entire pattern
  988. //couldn't match too;
  989. //otherwise we could have the following problem:
  990. //"c|^a" against "abc" finds only "a"
  991. if(top!=null) break;
  992. if(term!=startAnchor) break;
  993. else break main;
  994. case LAST_MATCH_END:
  995. if(i==wEnd || wEnd == -1){ //meets
  996. term=term.next;
  997. continue matchHere;
  998. }
  999. break main; //return false
  1000. case LINE_START:
  1001. if(i==offset){ //meets
  1002. term=term.next;
  1003. continue matchHere;
  1004. }
  1005. else if(i<end){
  1006. /*
  1007. if(((c=data[i-1])=='\r' || c=='\n') &&
  1008. (c=data[i])!='\r' && c!='\n'){
  1009. term=term.next;
  1010. continue matchHere;
  1011. }
  1012. */
  1013. //5 aug 2001
  1014. //if((c=data[i-1])=='\r' || c=='\n'){ ??
  1015. if((c=data[i-1])=='\n'){
  1016. term=term.next;
  1017. continue matchHere;
  1018. }
  1019. }
  1020. break;
  1021. case BITSET:{
  1022. //can only be 1-char-wide
  1023. // \/
  1024. if(i>=end) break;
  1025. c=data[i];
  1026. if(!(c<=255 && term.bitset[c])^term.inverse) break;
  1027. i++;
  1028. term=term.next;
  1029. continue matchHere;
  1030. }
  1031. case BITSET2:{
  1032. //can only be 1-char-wide
  1033. // \/
  1034. if(i>=end) break;
  1035. c=data[i];
  1036. boolean[] arr=term.bitset2[c>>8];
  1037. if(arr==null || !arr[c&255]^term.inverse) break;
  1038. i++;
  1039. term=term.next;
  1040. continue matchHere;
  1041. }
  1042. case BOUNDARY:{
  1043. boolean ch1Meets=false,ch2Meets=false;
  1044. boolean[] bitset=term.bitset;
  1045. test1:{
  1046. int j=i-1;
  1047. //if(j<offset || j>=end) break test1;
  1048. if(j<offset) break test1;
  1049. c= data[j];
  1050. ch1Meets= (c<256 && bitset[c]);
  1051. }
  1052. test2:{
  1053. //if(i<offset || i>=end) break test2;
  1054. if(i>=end) break test2;
  1055. c= data[i];
  1056. ch2Meets= (c<256 && bitset[c]);
  1057. }
  1058. if(ch1Meets^ch2Meets^term.inverse){ //meets
  1059. term=term.next;
  1060. continue matchHere;
  1061. }
  1062. else break;
  1063. }
  1064. case UBOUNDARY:{
  1065. boolean ch1Meets=false,ch2Meets=false;
  1066. boolean[][] bitset2=term.bitset2;
  1067. test1:{
  1068. int j=i-1;
  1069. //if(j<offset || j>=end) break test1;
  1070. if(j<offset) break test1;
  1071. c= data[j];
  1072. boolean[] bits=bitset2[c>>8];
  1073. ch1Meets= bits!=null && bits[c&0xff];
  1074. }
  1075. test2:{
  1076. //if(i<offset || i>=end) break test2;
  1077. if(i>=end) break test2;
  1078. c= data[i];
  1079. boolean[] bits=bitset2[c>>8];
  1080. ch2Meets= bits!=null && bits[c&0xff];
  1081. }
  1082. if(ch1Meets^ch2Meets^term.inverse){ //is boundary ^ inv
  1083. term=term.next;
  1084. continue matchHere;
  1085. }
  1086. else break;
  1087. }
  1088. case DIRECTION:{
  1089. boolean ch1Meets=false,ch2Meets=false;
  1090. boolean[] bitset=term.bitset;
  1091. boolean inv=term.inverse;
  1092. //System.out.println("i="+i+", inv="+inv+", bitset="+CharacterClass.stringValue0(bitset));
  1093. int j=i-1;
  1094. //if(j>=offset && j<end){
  1095. if(j>=offset){
  1096. c= data[j];
  1097. ch1Meets= c<256 && bitset[c];
  1098. //System.out.println(" ch1Meets="+ch1Meets);
  1099. }
  1100. if(ch1Meets^inv) break;
  1101. //if(i>=offset && i<end){
  1102. if(i<end){
  1103. c= data[i];
  1104. ch2Meets= c<256 && bitset[c];
  1105. //System.out.println(" ch2Meets="+ch2Meets);
  1106. }
  1107. if(!ch2Meets^inv) break;
  1108. //System.out.println(" Ok");
  1109. term=term.next;
  1110. continue matchHere;
  1111. }
  1112. case UDIRECTION:{
  1113. boolean ch1Meets=false,ch2Meets=false;
  1114. boolean[][] bitset2=term.bitset2;
  1115. boolean inv=term.inverse;
  1116. int j=i-1;
  1117. //if(j>=offset && j<end){
  1118. if(j>=offset){
  1119. c= data[j];
  1120. boolean[] bits=bitset2[c>>8];
  1121. ch1Meets= bits!=null && bits[c&0xff];
  1122. }
  1123. if(ch1Meets^inv) break;
  1124. //if(i>=offset && i<end){
  1125. if(i<end){
  1126. c= data[i];
  1127. boolean[] bits=bitset2[c>>8];
  1128. ch2Meets= bits!=null && bits[c&0xff];
  1129. }
  1130. if(!ch2Meets^inv) break;
  1131. term=term.next;
  1132. continue matchHere;
  1133. }
  1134. case REG:{
  1135. MemReg mr=memregs[term.memreg];
  1136. int sampleOffset=mr.in;
  1137. int sampleOutside=mr.out;
  1138. int rLen;
  1139. if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){
  1140. break;
  1141. }
  1142. else if(rLen==0){
  1143. term=term.next;
  1144. continue matchHere;
  1145. }
  1146. // don't prevent us from reaching the 'end'
  1147. if((i+rLen)>end) break;
  1148. if(compareRegions(data,sampleOffset,i,rLen,end)){
  1149. i+=rLen;
  1150. term=term.next;
  1151. continue matchHere;
  1152. }
  1153. break;
  1154. }
  1155. case REG_I:{
  1156. MemReg mr=memregs[term.memreg];
  1157. int sampleOffset=mr.in;
  1158. int sampleOutside=mr.out;
  1159. int rLen;
  1160. if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){
  1161. break;
  1162. }
  1163. else if(rLen==0){
  1164. term=term.next;
  1165. continue matchHere;
  1166. }
  1167. // don't prevent us from reaching the 'end'
  1168. if((i+rLen)>end) break;
  1169. if(compareRegionsI(data,sampleOffset,i,rLen,end)){
  1170. i+=rLen;
  1171. term=term.next;
  1172. continue matchHere;
  1173. }
  1174. break;
  1175. }
  1176. case REPEAT_0_INF:{
  1177. //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
  1178. //i+=(cnt=repeat(data,i,end,term.target));
  1179. if((cnt=repeat(data,i,end,term.target))<=0){
  1180. term=term.next;
  1181. continue;
  1182. }
  1183. i+=cnt;
  1184. //branch out the backtracker (that is term.failNext, see make*())
  1185. actual.cnt=cnt;
  1186. actual.term=term.failNext;
  1187. actual.index=i;
  1188. actual=(top=actual).on;
  1189. if(actual==null){
  1190. actual=new SearchEntry();
  1191. top.on=actual;
  1192. actual.sub=top;
  1193. }
  1194. term=term.next;
  1195. continue;
  1196. }
  1197. case REPEAT_MIN_INF:{
  1198. //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
  1199. cnt=repeat(data,i,end,term.target);
  1200. if(cnt<term.minCount) break;
  1201. i+=cnt;
  1202. //branch out the backtracker (that is term.failNext, see make*())
  1203. actual.cnt=cnt;
  1204. actual.term=term.failNext;
  1205. actual.index=i;
  1206. actual=(top=actual).on;
  1207. if(actual==null){
  1208. actual=new SearchEntry();
  1209. top.on=actual;
  1210. actual.sub=top;
  1211. }
  1212. term=term.next;
  1213. continue;
  1214. }
  1215. case REPEAT_MIN_MAX:{
  1216. //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
  1217. int out1=end;
  1218. int out2=i+term.maxCount;
  1219. cnt=repeat(data,i,out1<out2? out1: out2,term.target);
  1220. if(cnt<term.minCount) break;
  1221. i+=cnt;
  1222. //branch out the backtracker (that is term.failNext, see make*())
  1223. actual.cnt=cnt;
  1224. actual.term=term.failNext;
  1225. actual.index=i;
  1226. actual=(top=actual).on;
  1227. if(actual==null){
  1228. actual=new SearchEntry();
  1229. top.on=actual;
  1230. actual.sub=top;
  1231. }
  1232. term=term.next;
  1233. continue;
  1234. }
  1235. case REPEAT_REG_MIN_INF:{
  1236. MemReg mr=memregs[term.memreg];
  1237. int sampleOffset=mr.in;
  1238. int sampleOutside=mr.out;
  1239. //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg);
  1240. //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg);
  1241. /*@since 1.2*/
  1242. int bitset;
  1243. if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){
  1244. break;
  1245. }
  1246. else if(bitset==0){
  1247. term=term.next;
  1248. continue matchHere;
  1249. }
  1250. cnt=0;
  1251. while(compareRegions(data,i,sampleOffset,bitset,end)){
  1252. cnt++;
  1253. i+=bitset;
  1254. }
  1255. if(cnt<term.minCount) break;
  1256. actual.cnt=cnt;
  1257. actual.term=term.failNext;
  1258. actual.index=i;
  1259. actual.regLen=bitset;
  1260. actual=(top=actual).on;
  1261. if(actual==null){
  1262. actual=new SearchEntry();
  1263. top.on=actual;
  1264. actual.sub=top;
  1265. }
  1266. term=term.next;
  1267. continue;
  1268. }
  1269. case REPEAT_REG_MIN_MAX:{
  1270. MemReg mr=memregs[term.memreg];
  1271. int sampleOffset=mr.in;
  1272. int sampleOutside=mr.out;
  1273. //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg);
  1274. //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg);
  1275. /*@since 1.2*/
  1276. int bitset;
  1277. if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){
  1278. break;
  1279. }
  1280. else if(bitset==0){
  1281. term=term.next;
  1282. continue matchHere;
  1283. }
  1284. cnt=0;
  1285. int countBack=term.maxCount;
  1286. while(countBack>0 && compareRegions(data,i,sampleOffset,bitset,end)){
  1287. cnt++;
  1288. i+=bitset;
  1289. countBack--;
  1290. }
  1291. if(cnt<term.minCount) break;
  1292. actual.cnt=cnt;
  1293. actual.term=term.failNext;
  1294. actual.index=i;
  1295. actual.regLen=bitset;
  1296. actual=(top=actual).on;
  1297. if(actual==null){
  1298. actual=new SearchEntry();
  1299. top.on=actual;
  1300. actual.sub=top;
  1301. }
  1302. term=term.next;
  1303. continue;
  1304. }
  1305. case BACKTRACK_0:
  1306. //System.out.println("<<");
  1307. cnt=actual.cnt;
  1308. if(cnt>0){
  1309. cnt--;
  1310. i--;
  1311. actual.cnt=cnt;
  1312. actual.index=i;
  1313. actual.term=term;
  1314. actual=(top=actual).on;
  1315. if(actual==null){
  1316. actual=new SearchEntry();
  1317. top.on=actual;
  1318. actual.sub=top;
  1319. }
  1320. term=term.next;
  1321. continue;
  1322. }
  1323. else break;
  1324. case BACKTRACK_MIN:
  1325. //System.out.println("<<");
  1326. cnt=actual.cnt;
  1327. if(cnt>term.minCount){
  1328. cnt--;
  1329. i--;
  1330. actual.cnt=cnt;
  1331. actual.index=i;
  1332. actual.term=term;
  1333. actual=(top=actual).on;
  1334. if(actual==null){
  1335. actual=new SearchEntry();
  1336. top.on=actual;
  1337. actual.sub=top;
  1338. }
  1339. term=term.next;
  1340. continue;
  1341. }
  1342. else break;
  1343. case BACKTRACK_FIND_MIN:{
  1344. //System.out.print("<<<[cnt=");
  1345. cnt=actual.cnt;
  1346. //System.out.print(cnt+", minCnt=");
  1347. //System.out.print(term.minCount+", target=");
  1348. //System.out.print(term.target+"]");
  1349. int minCnt;
  1350. if(cnt>(minCnt=term.minCount)){
  1351. int start=i+term.distance;
  1352. if(start>end){
  1353. int exceed=start-end;
  1354. cnt-=exceed;
  1355. if(cnt<=minCnt) break;
  1356. i-=exceed;
  1357. start=end;
  1358. }
  1359. int back=findBack(data,i+term.distance,cnt-minCnt,term.target);
  1360. //System.out.print("[back="+back+"]");
  1361. if(back<0) break;
  1362. //cnt-=back;
  1363. //i-=back;
  1364. if((cnt-=back)<=minCnt){
  1365. i-=back;
  1366. if(term.eat)i++;
  1367. term=term.next;
  1368. continue;
  1369. }
  1370. i-=back;
  1371. actual.cnt=cnt;
  1372. actual.index=i;
  1373. if(term.eat)i++;
  1374. actual.term=term;
  1375. actual=(top=actual).on;
  1376. if(actual==null){
  1377. actual=new SearchEntry();
  1378. top.on=actual;
  1379. actual.sub=top;
  1380. }
  1381. term=term.next;
  1382. continue;
  1383. }
  1384. else break;
  1385. }
  1386. case BACKTRACK_FINDREG_MIN:{
  1387. //System.out.print("<<<[cnt=");
  1388. cnt=actual.cnt;
  1389. //System.out.print(cnt+", minCnt=");
  1390. //System.out.print(term.minCount+", target=");
  1391. //System.out.print(term.target);
  1392. //System.out.print("reg=<"+memregs[term.target.memreg].in+","+memregs[term.target.memreg].out+">]");
  1393. int minCnt;
  1394. if(cnt>(minCnt=term.minCount)){
  1395. int start=i+term.distance;
  1396. if(start>end){
  1397. int exceed=start-end;
  1398. cnt-=exceed;
  1399. if(cnt<=minCnt) break;
  1400. i-=exceed;
  1401. start=end;
  1402. }
  1403. MemReg mr=memregs[term.target.memreg];
  1404. int sampleOff=mr.in;
  1405. int sampleLen=mr.out-sampleOff;
  1406. //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
  1407. //int back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end);
  1408. //if(back<0) break;
  1409. /*@since 1.2*/
  1410. int back;
  1411. if(sampleOff<0 || sampleLen<0){
  1412. //the group is not def., as in the case of '(\w+)\1'
  1413. //treat as usual BACKTRACK_MIN

Large files files are truncated, but you can click here to view the full file