Matcher.java | searchcode

/src/ikj/main/org/jregex/Matcher.java

https://github.com/olabini/ioke · Java · 2298 lines · 1603 code · 195 blank · 500 comment · 321 complexity · 0d0e7c3b6f93f4dbb8346221df8dd5ab MD5 · raw file
Large files are truncated click here to view the full file

/**
 * Copyright (c) 2001, Sergey A. Samokhodkin
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * - Redistributions of source code must retain the above copyright notice,
 * this list of conditions and the following disclaimer.
 * - Redistributions in binary form
 * must reproduce the above copyright notice, this list of conditions and the following
 * disclaimer in the documentation and/or other materials provided with the distribution.
 * - Neither the name of jregex nor the names of its contributors may be used
 * to endorse or promote products derived from this software without specific prior
 * written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @version 1.2_01
 */

package org.jregex;

import java.util.*;
import java.io.*;

import org.jregex.Term.TermType;

/**
 * Matcher instance is an automaton that actually performs matching. It provides the following methods:
 * <li> searching for a matching substrings : matcher.find() or matcher.findAll();
 * <li> testing whether a text matches a whole pattern : matcher.matches();
 * <li> testing whether the text matches the beginning of a pattern : matcher.matchesPrefix();
 * <li> searching with custom options : matcher.find(int options)
 * <p>
 * <b>Obtaining results</b><br>
 * After the search succeded, i.e. if one of above methods returned <code>true</code>
 * one may obtain an information on the match:
 * <li> may check whether some group is captured : matcher.isCaptured(int);
 * <li> may obtain start and end positions of the match and its length : matcher.start(int),matcher.end(int),matcher.length(int);
 * <li> may obtain match contents as String : matcher.group(int).<br>
 * The same way can be obtained the match prefix and suffix information.
 * The appropriate methods are grouped in MatchResult interface, which the Matcher class implements.<br>
 * Matcher objects are not thread-safe, so only one thread may use a matcher instance at a time.
 * Note, that Pattern objects are thread-safe(the same instanse may be shared between
 * multiple threads), and the typical tactics in multithreaded applications is to have one Pattern instance per expression(a singleton),
 * and one Matcher object per thread.
 */

public class Matcher implements MatchResult{
  /* Matching options*/
  /**
   * The same effect as "^" without REFlags.MULTILINE.
   * @see Matcher#find(int)
   */
   public static final int ANCHOR_START=1;

  /**
   * The same effect as "\\G".
   * @see Matcher#find(int)
   */
   public static final int ANCHOR_LASTMATCH=2;

  /**
   * The same effect as "$" without REFlags.MULTILINE.
   * @see Matcher#find(int)
   */
   public static final int ANCHOR_END=4;

  /**
   * Experimental option; if a text ends up before the end of a pattern,report a match.
   * @see Matcher#find(int)
   */
   public static final int ACCEPT_INCOMPLETE=8;

   //see search(ANCHOR_START|...)
   private static Term startAnchor=new Term(TermType.START);

   //see search(ANCHOR_LASTMATCH|...)
   private static Term lastMatchAnchor=new Term(TermType.LAST_MATCH_END);

   private Pattern re;
   private int[] counters;
   private MemReg[] memregs;
   private LAEntry[] lookaheads;
   private int counterCount;
   private int memregCount;
   private int lookaheadCount;

   private char[] data;
   private int offset,end,wOffset,wEnd;
   private boolean shared;

   private SearchEntry top;           //stack entry
   private SearchEntry first;         //object pool entry
   private SearchEntry defaultEntry;  //called when moving the window

   private boolean called;

   private int minQueueLength;

   private String cache;

   //cache may be longer than the actual data
   //and contrariwise; so cacheOffset may have both signs.
   //cacheOffset is actually -(data offset).
   private int cacheOffset,cacheLength;

   private MemReg prefixBounds,suffixBounds,targetBounds;

   Matcher(Pattern regex){
      this.re=regex;
      //int memregCount=(memregs=new MemReg[regex.memregs]).length;
      //for(int i=0;i<memregCount;i++){
      //   this.memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition
      //}
      //counters=new int[regex.counters];
      //int lookaheadCount=(lookaheads=new LAEntry[regex.lookaheads]).length;
      //for(int i=0;i<lookaheadCount;i++){
      //   this.lookaheads[i]=new LAEntry();
      //}

      int memregCount,counterCount,lookaheadCount;
      if((memregCount=regex.memregs)>0){
         MemReg[] memregs=new MemReg[memregCount];
         for(int i=0;i<memregCount;i++){
            memregs[i]=new MemReg(-1); //unlikely to SearchEntry, in this case we know memreg indicies by definition
         }
         this.memregs=memregs;
      }

      if((counterCount=regex.counters)>0) counters=new int[counterCount];

      if((lookaheadCount=regex.lookaheads)>0){
         LAEntry[] lookaheads=new LAEntry[lookaheadCount];
         for(int i=0;i<lookaheadCount;i++){
            lookaheads[i]=new LAEntry();
         }
         this.lookaheads=lookaheads;
      }

      this.memregCount=memregCount;
      this.counterCount=counterCount;
      this.lookaheadCount=lookaheadCount;

      first=new SearchEntry();
      defaultEntry=new SearchEntry();
      minQueueLength=regex.stringRepr.length()/2;  // just evaluation!!!
   }

  /**
   * This method allows to efficiently pass data between matchers.
   * Note that a matcher may pass data to itself:<pre>
   *   Matcher m=new Pattern("\\w+").matcher(myString);
   *   if(m.find())m.setTarget(m,m.SUFFIX); //forget all that is not a suffix
   * </pre>
   * Resets current search position to zero.
   * @param m  - a matcher that is a source of data
   * @param groupId - which group to take data from
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public final void setTarget(Matcher m, int groupId){
      MemReg mr=m.bounds(groupId);
//System.out.println("setTarget("+m+","+groupId+")");
//System.out.println("   in="+mr.in);
//System.out.println("   out="+mr.out);
      if(mr==null) throw new IllegalArgumentException("group #"+groupId+" is not assigned");
      data=m.data;
      offset=mr.in;
      end=mr.out;
      cache=m.cache;
      cacheLength=m.cacheLength;
      cacheOffset=m.cacheOffset;
      if(m!=this){
         shared=true;
         m.shared=true;
      }
      init();
   }


  /**
   * Supplies a text to search in/match with.
   * Resets current search position to zero.
   * @param text - a data
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public void setTarget(String text){
      setTarget(text,0,text.length());
   }

  /**
   * Supplies a text to search in/match with, as a part of String.
   * Resets current search position to zero.
   * @param text - a data source
   * @param start - where the target starts
   * @param len - how long is the target
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public void setTarget(String text,int start,int len){
      char[] mychars=data;
      if(mychars==null || shared || mychars.length<len){
         data=mychars=new char[(int)(1.7f*len)];
         shared=false;
      }
      text.getChars(start,len,mychars,0); //(srcBegin,srcEnd,dst[],dstBegin)
      offset=0;
      end=len;

      cache=text;
      cacheOffset=-start;
      cacheLength=text.length();

      init();
   }

  /**
   * Supplies a text to search in/match with, as a part of char array.
   * Resets current search position to zero.
   * @param text - a data source
   * @param start - where the target starts
   * @param len - how long is the target
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public void setTarget(char[] text,int start,int len){
      setTarget(text,start,len,true);
   }

  /**
   * To be used with much care.
   * Supplies a text to search in/match with, as a part of a char array, as above, but also allows to permit
   * to use the array as internal buffer for subsequent inputs. That is, if we call it with <code>shared=false</code>:<pre>
   *   myMatcher.setTarget(myCharArray,x,y,<b>false</b>); //we declare that array contents is NEITHER shared NOR will be used later, so may modifications on it are permitted
   * </pre>
   * then we should expect the array contents to be changed on subsequent setTarget(..) operations.
   * Such method may yield some increase in perfomanse in the case of multiple setTarget() calls.
   * Resets current search position to zero.
   * @param text - a data source
   * @param start - where the target starts
   * @param len - how long is the target
   * @param shared - if <code>true<code>: data are shared or used later, <b>don't</b> modify it; if <code>false<code>: possible modifications of the text on subsequent <code>setTarget()</code> calls are perceived and allowed.
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   * @see Matcher#setTarget(java.io.Reader,int)
   */
   public final void setTarget(char[] text,int start,int len,boolean shared){
      cache=null;
      data=text;
      offset=start;
      end=start+len;
      this.shared=shared;
      init();
   }


  /**
   * Supplies a text to search in/match with through a stream.
   * Resets current search position to zero.
   * @param in - a data stream;
   * @param len - how much characters should be read; if len is -1, read the entire stream.
   * @see Matcher#setTarget(jregex.Matcher,int)
   * @see Matcher#setTarget(java.lang.String)
   * @see Matcher#setTarget(java.lang.String,int,int)
   * @see Matcher#setTarget(char[],int,int)
   */
   public void setTarget(Reader in,int len)throws IOException{
      if(len<0){
         setAll(in);
         return;
      }
      char[] mychars=data;
      boolean shared=this.shared;
      if(mychars==null || shared || mychars.length<len){
         mychars=new char[len];
         shared=false;
      }
      int count=0;
      int c;
      while((c=in.read(mychars,count,len))>=0){
         len-=c;
         count+=c;
         if(len==0) break;
      }
      setTarget(mychars,0,count,shared);
   }

   private void setAll(Reader in)throws IOException{
      char[] mychars=data;
      int free;
      boolean shared=this.shared;
      if(mychars==null || shared){
         mychars=new char[free=1024];
         shared=false;
      }
      else free=mychars.length;
      int count=0;
      int c;
      while((c=in.read(mychars,count,free))>=0){
         free-=c;
         count+=c;
         if(free==0){
            int newsize=count*3;
            char[] newchars=new char[newsize];
            System.arraycopy(mychars,0,newchars,0,count);
            mychars=newchars;
            free=newsize-count;
            shared=false;
         }
      }
      setTarget(mychars,0,count,shared);
   }

   private final String getString(int start,int end){
      String src=cache;
      if(src!=null){
         int co=cacheOffset;
         return src.substring(start-co,end-co);
      }
      int tOffset,tEnd,tLen=(tEnd=this.end)-(tOffset=this.offset);
      char[] data=this.data;
      if((end-start)>=(tLen/3)){
         //it makes sence to make a cache
         cache=src=new String(data,tOffset,tLen);
         cacheOffset=tOffset;
         cacheLength=tLen;
         return src.substring(start-tOffset,end-tOffset);
      }
      return new String(data,start,end-start);
   }

  /* Matching */

  /**
   * Tells whether the entire target matches the beginning of the pattern.
   * The whole pattern is also regarded as its beginning.<br>
   * This feature allows to find a mismatch by examining only a beginning part of
   * the target (as if the beginning of the target doesn't match the beginning of the pattern, then the entire target
   * also couldn't match).<br>
   * For example the following assertions yield <code>true<code>:<pre>
   *   Pattern p=new Pattern("abcd");
   *   p.matcher("").matchesPrefix();
   *   p.matcher("a").matchesPrefix();
   *   p.matcher("ab").matchesPrefix();
   *   p.matcher("abc").matchesPrefix();
   *   p.matcher("abcd").matchesPrefix();
   * </pre>
   * and the following yield <code>false<code>:<pre>
   *   p.matcher("b").isPrefix();
   *   p.matcher("abcdef").isPrefix();
   *   p.matcher("x").isPrefix();
   * </pre>
   * @return true if the entire target matches the beginning of the pattern
   */
   public final boolean matchesPrefix(){
      setPosition(0);
      return search(ANCHOR_START|ACCEPT_INCOMPLETE|ANCHOR_END);
   }

  /**
   * Just an old name for isPrefix().<br>
   * Retained for backwards compatibility.
   * @deprecated Replaced by isPrefix()
   */
   public final boolean isStart(){
      return matchesPrefix();
   }

  /**
   * Tells whether a current target matches the whole pattern.
   * For example the following yields the <code>true<code>:<pre>
   *   Pattern p=new Pattern("\\w+");
   *   p.matcher("a").matches();
   *   p.matcher("ab").matches();
   *   p.matcher("abc").matches();
   * </pre>
   * and the following yields the <code>false<code>:<pre>
   *   p.matcher("abc def").matches();
   *   p.matcher("bcd ").matches();
   *   p.matcher(" bcd").matches();
   *   p.matcher("#xyz#").matches();
   * </pre>
   * @return whether a current target matches the whole pattern.
   */
   public final boolean matches(){
if(called) setPosition(0);
      return search(ANCHOR_START|ANCHOR_END);
   }

  /**
   * Just a combination of setTarget(String) and matches().
   * @param s the target string;
   * @return whether the specified string matches the whole pattern.
   */
   public final boolean matches(String s){
      setTarget(s);
      return search(ANCHOR_START|ANCHOR_END);
   }

  /**
   * Allows to set a position the subsequent find()/find(int) will start from.
   * @param pos the position to start from;
   * @see Matcher#find()
   * @see Matcher#find(int)
   */
   public void setPosition(int pos){
      wOffset=offset+pos;
      wEnd=-1;
      called=false;
      flush();
   }

   public void setOffset(int offset){
       this.offset = offset;
       wOffset=offset;
       wEnd=-1;
       called=false;
       flush();
   }

  /**
   * Searches through a target for a matching substring, starting from just after the end of last match.
   * If there wasn't any search performed, starts from zero.
   * @return <code>true</code> if a match found.
   */
   public final boolean find(){
      if(called) skip();
      return search(0);
   }

  /**
   * Searches through a target for a matching substring, starting from just after the end of last match.
   * If there wasn't any search performed, starts from zero.
   * @param anchors a zero or a combination(bitwise OR) of ANCHOR_START,ANCHOR_END,ANCHOR_LASTMATCH,ACCEPT_INCOMPLETE
   * @return <code>true</code> if a match found.
   */
   public final boolean find(int anchors){
      if(called) skip();
      return search(anchors);
   }


  /**
   * The same as  findAll(int), but with default behaviour;
   */
   public MatchIterator findAll(){
      return findAll(0);
   }

  /**
   * Returns an iterator over the matches found by subsequently calling find(options), the search starts from the zero position.
   */
   public MatchIterator findAll(final int options){
      //setPosition(0);
      return new MatchIterator(){
         private boolean checked=false;
         private boolean hasMore=false;
         public boolean hasMore(){
            if(!checked) check();
            return hasMore;
         }
         public MatchResult nextMatch(){
            if(!checked) check();
            if(!hasMore) throw new NoSuchElementException();
            checked=false;
            return Matcher.this;
         }
         private final void check(){
            hasMore=find(options);
            checked=true;
         }
         public int count(){
            if(!checked) check();
            if(!hasMore) return 0;
            int c=1;
            while(find(options))c++;
            checked=false;
            return c;
         }
      };
   }

  /**
   * Continues to search from where the last search left off.
   * The same as proceed(0).
   * @see Matcher#proceed(int)
   */
   public final boolean proceed(){
      return proceed(0);
   }

  /**
   * Continues to search from where the last search left off using specified options:<pre>
   * Matcher m=new Pattern("\\w+").matcher("abc");
   * while(m.proceed(0)){
   *    System.out.println(m.group(0));
   * }
   * </pre>
   * Output:<pre>
   * abc
   * ab
   * a
   * bc
   * b
   * c
   * </pre>
   * For example, let's find all odd nubmers occuring in a text:<pre>
   *    Matcher m=new Pattern("\\d+").matcher("123");
   *    while(m.proceed(0)){
   *       String match=m.group(0);
   *       if(isOdd(Integer.parseInt(match))) System.out.println(match);
   *    }
   *
   *    static boolean isOdd(int i){
   *       return (i&1)>0;
   *    }
   * </pre>
   * This outputs:<pre>
   * 123
   * 1
   * 23
   * 3
   * </pre>
   * Note that using <code>find()</code> method we would find '123' only.
   * @param options search options, some of ANCHOR_START|ANCHOR_END|ANCHOR_LASTMATCH|ACCEPT_INCOMPLETE; zero value(default) stands for usual search for substring.
   */
   public final boolean proceed(int options){
//System.out.println("next() : top="+top);
      if(called){
         if(top==null){
            wOffset++;
         }
      }
      return search(0);
   }

  /**
   * Sets the current search position just after the end of last match.
   */
   public final void skip(){
      int we=wEnd;
      if(wOffset==we){ //requires special handling
         //if no variants at 'wOutside',advance pointer and clear
         if(top==null){
            wOffset++;
            flush();
         }
         //otherwise, if there exist a variant,
         //don't clear(), i.e. allow it to match
         return;
      }
      else{
         if(we<0) wOffset=0;
         else wOffset=we;
      }
      //rflush(); //rflush() works faster on simple regexes (with a small group/branch number)
      flush();
   }

   private final void init(){
      //wOffset=-1;
//System.out.println("init(): offset="+offset+", end="+end);
      wOffset=offset;
      wEnd=-1;
      called=false;
      flush();
   }

  /**
   * Resets the internal state.
   */
   private final void flush(){
      top=null;
      defaultEntry.reset(0);

/*
int c=0;
SearchEntry se=first;
while(se!=null){
   c++;
   se=se.on;
}
System.out.println("queue: allocated="+c+", truncating to "+minQueueLength);
new Exception().printStackTrace();
*/

      first.reset(minQueueLength);
      //first.reset(0);
      for(int i=memregs.length-1;i>0;i--){
         MemReg mr=memregs[i];
         mr.in=mr.out=-1;
      }
      for(int i=memregs.length-1;i>0;i--){
         MemReg mr=memregs[i];
         mr.in=mr.out=-1;
      }
      called=false;
   }

   //reverse flush
   //may work significantly faster,
   //need testing
   private final void rflush(){
      SearchEntry entry=top;
      top=null;
      MemReg[] memregs=this.memregs;
      int[] counters=this.counters;
      while(entry!=null){
         SearchEntry next=entry.sub;
         SearchEntry.popState(entry,memregs,counters);
         entry=next;
      }
      SearchEntry.popState(defaultEntry,memregs,counters);
   }

  /**
   */
   public String toString(){
      return getString(wOffset,wEnd);
   }

   public Pattern pattern(){
      return re;
   }

   public String target(){
      return getString(offset,end);
   }

  /**
   */
   public char[] targetChars(){
      shared=true;
      return data;
   }

  /**
   */
   public int targetStart(){
      return offset;
   }

  /**
   */
   public int targetEnd(){
      return end;
   }

   public char charAt(int i){
      int in=this.wOffset;
      int out=this.wEnd;
      if(in<0 || out<in) throw new IllegalStateException("unassigned");
      return data[in+i];
   }

   public char charAt(int i,int groupId){
      MemReg mr=bounds(groupId);
      if(mr==null) throw new IllegalStateException("group #"+groupId+" is not assigned");
      int in=mr.in;
      if(i<0 || i>(mr.out-in)) throw new StringIndexOutOfBoundsException(""+i);
      return data[in+i];
   }

   public final int length(){
      return wEnd-wOffset;
   }

  /**
   */
   public final int start(){
      return wOffset-offset;
   }

  /**
   */
   public final int end(){
      return wEnd-offset;
   }

  /**
   */
   public String prefix(){
      return getString(offset,wOffset);
   }

  /**
   */
   public String suffix(){
      return getString(wEnd,end);
   }

  /**
   */
   public int groupCount(){
      return memregs.length;
   }

  /**
   */
   public String group(int n){
      MemReg mr=bounds(n);
      if(mr==null) return null;
      return getString(mr.in,mr.out);
   }

  /**
   */
   public String group(String name){
      Integer id=re.groupId(name);
      if(id==null) throw new IllegalArgumentException("<"+name+"> isn't defined");
      return group(id.intValue());
   }

  /**
   */
   public boolean getGroup(int n,TextBuffer tb){
      MemReg mr=bounds(n);
      if(mr==null) return false;
      int in;
      tb.append(data,in=mr.in,mr.out-in);
      return true;
   }

  /**
   */
   public boolean getGroup(String name,TextBuffer tb){
      Integer id=re.groupId(name);
      if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\"");
      return getGroup(id.intValue(),tb);
   }

  /**
   */
   public boolean getGroup(int n,StringBuffer sb){
      MemReg mr=bounds(n);
      if(mr==null) return false;
      int in;
      sb.append(data,in=mr.in,mr.out-in);
      return true;
   }

  /**
   */
   public boolean getGroup(String name,StringBuffer sb){
      Integer id=re.groupId(name);
      if(id==null) throw new IllegalArgumentException("unknown group: \""+name+"\"");
      return getGroup(id.intValue(),sb);
   }

  /**
   */
   public String[] groups(){
      MemReg[] memregs=this.memregs;
      String[] groups=new String[memregs.length];
      int in,out;
      MemReg mr;
      for(int i=0;i<memregs.length;i++){
         in=(mr=memregs[i]).in;
         out=mr.out;
         if((in=mr.in)<0 || mr.out<in) continue;
         groups[i]=getString(in,out);
      }
      return groups;
   }

  /**
   */
   public Vector groupv(){
      MemReg[] memregs=this.memregs;
      Vector v=new Vector();
      int in,out;
      MemReg mr;
      for(int i=0;i<memregs.length;i++){
         mr=bounds(i);
         if(mr==null){
            v.addElement("empty");
            continue;
         }
         String s=getString(mr.in,mr.out);
         v.addElement(s);
      }
      return v;
   }

   private final MemReg bounds(int id){
//System.out.println("Matcher.bounds("+id+"):");
      MemReg mr;
      if(id>=0){
         mr=memregs[id];
      }
      else switch(id){
         case PREFIX:
            mr=prefixBounds;
            if(mr==null) prefixBounds=mr=new MemReg(PREFIX);
            mr.in=offset;
            mr.out=wOffset;
            break;
         case SUFFIX:
            mr=suffixBounds;
            if(mr==null) suffixBounds=mr=new MemReg(SUFFIX);
            mr.in=wEnd;
            mr.out=end;
            break;
         case TARGET:
            mr=targetBounds;
            if(mr==null) targetBounds=mr=new MemReg(TARGET);
            mr.in=offset;
            mr.out=end;
            break;
         default:
            throw new IllegalArgumentException("illegal group id: "+id+"; must either nonnegative int, or MatchResult.PREFIX, or MatchResult.SUFFIX");
      }
//System.out.println("  mr=["+mr.in+","+mr.out+"]");
      int in;
      if((in=mr.in)<0 || mr.out<in) return null;
      return mr;
   }

  /**
   */
   public final boolean isCaptured(){
      return wOffset>=0 && wEnd>=wOffset;
   }

  /**
   */
   public final boolean isCaptured(int id){
      return bounds(id)!=null;
   }

  /**
   */
   public final boolean isCaptured(String groupName){
      Integer id=re.groupId(groupName);
      if(id==null) throw new IllegalArgumentException("unknown group: \""+groupName+"\"");
      return isCaptured(id.intValue());
   }

  /**
   */
   public final int length(int id){
      MemReg mr=bounds(id);
      return mr.out-mr.in;
   }

  /**
   */
   public final int start(int id){
      return bounds(id).in-offset;
   }

  /**
   */
   public final int end(int id){
      return bounds(id).out-offset;
   }

   private final boolean search(int anchors){
      called=true;
      final int end=this.end;
      int offset=this.offset;
      char[] data=this.data;
      int wOffset=this.wOffset;
      int wEnd=this.wEnd;

      MemReg[] memregs=this.memregs;
      int[] counters=this.counters;
      LAEntry[] lookaheads=this.lookaheads;

      //int memregCount=memregs.length;
      //int cntCount=counters.length;
      int memregCount=this.memregCount;
      int cntCount=this.counterCount;

      SearchEntry defaultEntry=this.defaultEntry;
      SearchEntry first=this.first;
      SearchEntry top=this.top;
      SearchEntry actual=null;
      int cnt,regLen;
      int i;

      final boolean matchEnd=(anchors&ANCHOR_END)>0;
      final boolean allowIncomplete=(anchors&ACCEPT_INCOMPLETE)>0;

      Pattern re=this.re;
      Term root=re.root;
      Term term;
      if(top==null){
         if((anchors&ANCHOR_START)>0){
            term=re.root0;  //raw root
            root=startAnchor;
         }
         else if((anchors&ANCHOR_LASTMATCH)>0){
            term=re.root0;  //raw root
            root=lastMatchAnchor;
         }
         else{
            term=root;  //optimized root
         }
         i=wOffset;
         actual=first;
         SearchEntry.popState(defaultEntry,memregs,counters);
      }
      else{
         top=(actual=top).sub;
         term=actual.term;
         i=actual.index;
         SearchEntry.popState(actual,memregs,counters);
      }
      cnt=actual.cnt;
      regLen=actual.regLen;

      main:
      while(wOffset<=end){
         matchHere:
         for(;;){
     /*
     System.out.print("char: "+i+", term: ");
     System.out.print(term.toString());

     System.out.print(" // mrs:{");
     for(int dbi=0;dbi<memregs.length;dbi++){
        System.out.print('[');
        System.out.print(memregs[dbi].in);
        System.out.print(',');
        System.out.print(memregs[dbi].out);
        System.out.print(']');
        System.out.print(' ');
     }
     System.out.print("}, crs:{");
     for(int dbi=0;dbi<counters.length;dbi++){
        System.out.print(counters[dbi]);
        if(dbi<counters.length-1)System.out.print(',');
     }
     System.out.println("}");
     */
            int memreg,cntreg;
            char c;
            switch(term.type){
               case FIND:{
                  int jump=find(data,i+term.distance,end,term.target); //don't eat the last match
                  if(jump<0) break main; //return false
                  i+=jump;
                  wOffset=i; //force window to move
                  if(term.eat){
                     if(i==end) break;
                     i++;
                  }
                  term=term.next;
                  continue matchHere;
               }
               case FINDREG:{
                  MemReg mr=memregs[term.target.memreg];
                  int sampleOff=mr.in;
                  int sampleLen=mr.out-sampleOff;
                  //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
                  /*@since 1.2*/
                  if(sampleOff<0 || sampleLen<0){
                     break;
                  }
                  else if(sampleLen==0){
                     term=term.next;
                     continue matchHere;
                  }
                  int jump=findReg(data,i+term.distance,sampleOff,sampleLen,term.target,end); //don't eat the last match
                  if(jump<0) break main; //return false
                  i+=jump;
                  wOffset=i; //force window to move
                  if(term.eat){
                     i+=sampleLen;
                     if(i>end) break;
                  }
                  term=term.next;
                  continue matchHere;
               }
               case VOID:
                  term=term.next;
                  continue matchHere;

               case CHAR:
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end || data[i]!=term.c) break;
//System.out.println("CHAR: "+data[i]+", i="+i);
                  i++;
                  term=term.next;
                  continue matchHere;

               case ANY_CHAR:
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end) break;
                  i++;
                  term=term.next;
                  continue matchHere;

               case ANY_CHAR_NE:
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end || data[i]=='\n') break;
                  i++;
                  term=term.next;
                  continue matchHere;

               case END:
                  if(i>=end){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  break;

               case END_EOL:  //perl's $
                  if(i>=end){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else{
                     boolean matches=
                        i>=end |
                        ((i+1)==end && data[i]=='\n');

                     if(matches){
                        term=term.next;
                        continue matchHere;
                     }
                     else break;
                  }

               case LINE_END:
                  if(i>=end){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else{
                     /*
                     if(((c=data[i])=='\r' || c=='\n') &&
                           (c=data[i-1])!='\r' && c!='\n'){
                        term=term.next;
                        continue matchHere;
                     }
                     */
                     //5 aug 2001
                     if(data[i]=='\n'){
                        term=term.next;
                        continue matchHere;
                     }
                  }
                  break;

               case START: //Perl's "^"
                  if(i==offset){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  //break;

                  //changed on 27-04-2002
                  //due to a side effect: if ALLOW_INCOMPLETE is enabled,
                  //the anchorStart moves up to the end and succeeds
                  //(see comments at the last lines of matchHere, ~line 1830)
                  //Solution: if there are some entries on the stack ("^a|b$"),
                  //try them; otherwise it's a final 'no'
                  //if(top!=null) break;
                  //else break main;

                  //changed on 25-05-2002
                  //rationale: if the term is startAnchor,
                  //it's the root term by definition,
                  //so if it doesn't match, the entire pattern
                  //couldn't match too;
                  //otherwise we could have the following problem:
                  //"c|^a" against "abc" finds only "a"
                  if(top!=null) break;
                  if(term!=startAnchor) break;
                  else break main;

               case LAST_MATCH_END:
                  if(i==wEnd || wEnd == -1){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  break main; //return false

               case LINE_START:
                  if(i==offset){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else if(i<end){
                     /*
                     if(((c=data[i-1])=='\r' || c=='\n') &&
                           (c=data[i])!='\r' && c!='\n'){
                        term=term.next;
                        continue matchHere;
                     }
                     */
                     //5 aug 2001
                     //if((c=data[i-1])=='\r' || c=='\n'){ ??
                     if((c=data[i-1])=='\n'){
                        term=term.next;
                        continue matchHere;
                     }
                  }
                  break;

               case BITSET:{
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end) break;
                  c=data[i];
                  if(!(c<=255 && term.bitset[c])^term.inverse) break;
                  i++;
                  term=term.next;
                  continue matchHere;
               }
               case BITSET2:{
                  //can only be 1-char-wide
                  //  \/
                  if(i>=end) break;
                  c=data[i];
                  boolean[] arr=term.bitset2[c>>8];
                  if(arr==null || !arr[c&255]^term.inverse) break;
                  i++;
                  term=term.next;
                  continue matchHere;
               }
               case BOUNDARY:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[] bitset=term.bitset;
                  test1:{
                     int j=i-1;
                     //if(j<offset || j>=end) break test1;
                     if(j<offset) break test1;
                     c= data[j];
                     ch1Meets= (c<256 && bitset[c]);
                  }
                  test2:{
                     //if(i<offset || i>=end) break test2;
                     if(i>=end) break test2;
                     c= data[i];
                     ch2Meets= (c<256 && bitset[c]);
                  }
                  if(ch1Meets^ch2Meets^term.inverse){  //meets
                     term=term.next;
                     continue matchHere;
                  }
                  else break;
               }
               case UBOUNDARY:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[][] bitset2=term.bitset2;
                  test1:{
                     int j=i-1;
                     //if(j<offset || j>=end) break test1;
                     if(j<offset) break test1;
                     c= data[j];
                     boolean[] bits=bitset2[c>>8];
                     ch1Meets= bits!=null && bits[c&0xff];
                  }
                  test2:{
                     //if(i<offset || i>=end) break test2;
                     if(i>=end) break test2;
                     c= data[i];
                     boolean[] bits=bitset2[c>>8];
                     ch2Meets= bits!=null && bits[c&0xff];
                  }
                  if(ch1Meets^ch2Meets^term.inverse){  //is boundary ^ inv
                     term=term.next;
                     continue matchHere;
                  }
                  else break;
               }
               case DIRECTION:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[] bitset=term.bitset;
                  boolean inv=term.inverse;
//System.out.println("i="+i+", inv="+inv+", bitset="+CharacterClass.stringValue0(bitset));
                  int j=i-1;
                  //if(j>=offset && j<end){
                  if(j>=offset){
                     c= data[j];
                     ch1Meets= c<256 && bitset[c];
//System.out.println("    ch1Meets="+ch1Meets);
                  }
                  if(ch1Meets^inv) break;

                  //if(i>=offset && i<end){
                  if(i<end){
                     c= data[i];
                     ch2Meets= c<256 && bitset[c];
//System.out.println("    ch2Meets="+ch2Meets);
                  }
                  if(!ch2Meets^inv) break;

//System.out.println("    Ok");

                  term=term.next;
                  continue matchHere;
               }
               case UDIRECTION:{
                  boolean ch1Meets=false,ch2Meets=false;
                  boolean[][] bitset2=term.bitset2;
                  boolean inv=term.inverse;
                  int j=i-1;

                  //if(j>=offset && j<end){
                  if(j>=offset){
                     c= data[j];
                     boolean[] bits=bitset2[c>>8];
                     ch1Meets= bits!=null && bits[c&0xff];
                  }
                  if(ch1Meets^inv) break;

                  //if(i>=offset && i<end){
                  if(i<end){
                     c= data[i];
                     boolean[] bits=bitset2[c>>8];
                     ch2Meets= bits!=null && bits[c&0xff];
                  }
                  if(!ch2Meets^inv) break;

                  term=term.next;
                  continue matchHere;
               }
               case REG:{
                  MemReg mr=memregs[term.memreg];
                  int sampleOffset=mr.in;
                  int sampleOutside=mr.out;
                  int rLen;
                  if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){
                     break;
                  }
                  else if(rLen==0){
                     term=term.next;
                     continue matchHere;
                  }

                  // don't prevent us from reaching the 'end'
                  if((i+rLen)>end) break;

                  if(compareRegions(data,sampleOffset,i,rLen,end)){
                     i+=rLen;
                     term=term.next;
                     continue matchHere;
                  }
                  break;
               }
               case REG_I:{
                  MemReg mr=memregs[term.memreg];
                  int sampleOffset=mr.in;
                  int sampleOutside=mr.out;
                  int rLen;
                  if(sampleOffset<0 || (rLen=sampleOutside-sampleOffset)<0){
                     break;
                  }
                  else if(rLen==0){
                     term=term.next;
                     continue matchHere;
                  }

                  // don't prevent us from reaching the 'end'
                  if((i+rLen)>end) break;

                  if(compareRegionsI(data,sampleOffset,i,rLen,end)){
                     i+=rLen;
                     term=term.next;
                     continue matchHere;
                  }
                  break;
               }
               case REPEAT_0_INF:{
                   //System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
                  //i+=(cnt=repeat(data,i,end,term.target));
                  if((cnt=repeat(data,i,end,term.target))<=0){
                     term=term.next;
                     continue;
                  }
                  i+=cnt;

                  //branch out the backtracker (that is term.failNext, see make*())
                  actual.cnt=cnt;
                  actual.term=term.failNext;
                  actual.index=i;
                  actual=(top=actual).on;
                  if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                  }
                  term=term.next;
                  continue;
               }
               case REPEAT_MIN_INF:{
//System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
                  cnt=repeat(data,i,end,term.target);
                  if(cnt<term.minCount) break;
                  i+=cnt;

                  //branch out the backtracker (that is term.failNext, see make*())
                  actual.cnt=cnt;
                  actual.term=term.failNext;
                  actual.index=i;
                  actual=(top=actual).on;
                  if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                  }
                  term=term.next;
                  continue;
               }
               case REPEAT_MIN_MAX:{
//System.out.println("REPEAT, i="+i+", term.minCount="+term.minCount+", term.maxCount="+term.maxCount);
                  int out1=end;
                  int out2=i+term.maxCount;
                  cnt=repeat(data,i,out1<out2? out1: out2,term.target);
                  if(cnt<term.minCount) break;
                  i+=cnt;

                  //branch out the backtracker (that is term.failNext, see make*())
                  actual.cnt=cnt;
                  actual.term=term.failNext;
                  actual.index=i;
                  actual=(top=actual).on;
                  if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                  }
                  term=term.next;
                  continue;
               }
               case REPEAT_REG_MIN_INF:{
               	 MemReg mr=memregs[term.memreg];
                  int sampleOffset=mr.in;
                  int sampleOutside=mr.out;
                  //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg);
                  //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg);
                  /*@since 1.2*/
                  int bitset;
                  if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){
                     break;
                  }
                  else if(bitset==0){
                     term=term.next;
                     continue matchHere;
                  }

                  cnt=0;

                  while(compareRegions(data,i,sampleOffset,bitset,end)){
                     cnt++;
                     i+=bitset;
                  }

                  if(cnt<term.minCount) break;

                  actual.cnt=cnt;
                  actual.term=term.failNext;
                  actual.index=i;
                  actual.regLen=bitset;
                  actual=(top=actual).on;
                  if(actual==null){
                     actual=new SearchEntry();
                     top.on=actual;
                     actual.sub=top;
                  }
                  term=term.next;
                  continue;
               }
               case REPEAT_REG_MIN_MAX:{
               	   MemReg mr=memregs[term.memreg];
                  int sampleOffset=mr.in;
                  int sampleOutside=mr.out;
                  //if(sampleOffset<0) throw new Error("register is referred before definition: "+term.memreg);
                  //if(sampleOutside<0 || sampleOutside<sampleOffset) throw new Error("register is referred within definition: "+term.memreg);
                  /*@since 1.2*/
                  int bitset;
                  if(sampleOffset<0 || (bitset=sampleOutside-sampleOffset)<0){
                     break;
                  }
                  else if(bitset==0){
                     term=term.next;
                     continue matchHere;
                  }

                  cnt=0;
                  int countBack=term.maxCount;
                  while(countBack>0 && compareRegions(data,i,sampleOffset,bitset,end)){
                     cnt++;
                     i+=bitset;
                     countBack--;
                  }

                  if(cnt<term.minCount) break;

                  actual.cnt=cnt;
                  actual.term=term.failNext;
                  actual.index=i;
                  actual.regLen=bitset;
                  actual=(top=actual).on;
                  if(actual==null){
                     actual=new SearchEntry();
                     top.on=actual;
                     actual.sub=top;
                  }
                  term=term.next;
                  continue;
               }
               case BACKTRACK_0:
//System.out.println("<<");
                  cnt=actual.cnt;
                  if(cnt>0){
                     cnt--;
                     i--;
                     actual.cnt=cnt;
                     actual.index=i;
                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;

               case BACKTRACK_MIN:
//System.out.println("<<");
                  cnt=actual.cnt;
                  if(cnt>term.minCount){
                     cnt--;
                     i--;
                     actual.cnt=cnt;
                     actual.index=i;
                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;

               case BACKTRACK_FIND_MIN:{
//System.out.print("<<<[cnt=");
                  cnt=actual.cnt;
//System.out.print(cnt+", minCnt=");
//System.out.print(term.minCount+", target=");
//System.out.print(term.target+"]");
                  int minCnt;
                  if(cnt>(minCnt=term.minCount)){
                     int start=i+term.distance;
                     if(start>end){
                        int exceed=start-end;
                        cnt-=exceed;
                        if(cnt<=minCnt) break;
                        i-=exceed;
                        start=end;
                     }
                     int back=findBack(data,i+term.distance,cnt-minCnt,term.target);
//System.out.print("[back="+back+"]");
                     if(back<0) break;

                     //cnt-=back;
                     //i-=back;
                     if((cnt-=back)<=minCnt){
                        i-=back;
                        if(term.eat)i++;
                        term=term.next;
                        continue;
                     }
                     i-=back;

                     actual.cnt=cnt;
                     actual.index=i;

                     if(term.eat)i++;

                     actual.term=term;
                     actual=(top=actual).on;
                     if(actual==null){
                        actual=new SearchEntry();
                        top.on=actual;
                        actual.sub=top;
                     }
                     term=term.next;
                     continue;
                  }
                  else break;
               }

               case BACKTRACK_FINDREG_MIN:{
//System.out.print("<<<[cnt=");
                  cnt=actual.cnt;
//System.out.print(cnt+", minCnt=");
//System.out.print(term.minCount+", target=");
//System.out.print(term.target);
//System.out.print("reg=<"+memregs[term.target.memreg].in+","+memregs[term.target.memreg].out+">]");
                  int minCnt;
                  if(cnt>(minCnt=term.minCount)){
                     int start=i+term.distance;
                     if(start>end){
                        int exceed=start-end;
                        cnt-=exceed;
                        if(cnt<=minCnt) break;
                        i-=exceed;
                        start=end;
                     }
                     MemReg mr=memregs[term.target.memreg];
                     int sampleOff=mr.in;
                     int sampleLen=mr.out-sampleOff;
                     //if(sampleOff<0 || sampleLen<0) throw new Error("backreference used before definition: \\"+term.memreg);
                     //int back=findBackReg(data,i+term.distance,sampleOff,sampleLen,cnt-minCnt,term.target,end);
                     //if(back<0) break;
                     /*@since 1.2*/
                     int back;
                     if(sampleOff<0 || sampleLen<0){
                     //the group is not def., as in the case of '(\w+)\1'
                     //treat as usual BACKTRACK_MIN…
Tech Fingerprint

Alerts (10)

'import' Maintainability Info: Wildcard imports (e.g., `import java.util.*;`) can obscure the origin of classes and lead to namespace collisions. Prefer importing specific classes explicitly.
32 33
Complexity hotspot; line 219 (total complexity: 3)
219
Complexity hotspot; line 295 (total complexity: 3)
295
'return null;' Returning null forces callers to perform null checks, risking NullPointerException. Consider using Optional<T> (Java 8+), throwing an exception, or returning a Null Object/empty collection instead.
723 836
Complexity hotspot; lines 1094 to 1096 (total complexity: 3)
1094 1095 1096
Complexity hotspot; line 1099 (total complexity: 3)
1099