PageRenderTime 103ms CodeModel.GetById 22ms app.highlight 70ms RepoModel.GetById 1ms app.codeStats 1ms

/js/src/yarr/pcre/pcre_exec.cpp

http://github.com/zpao/v8monkey
C++ | 2192 lines | 1480 code | 331 blank | 381 comment | 428 complexity | 9ffcd184ede3ce61e3c420ee9f8311da MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* This is JavaScriptCore's variant of the PCRE library. While this library
   2started out as a copy of PCRE, many of the features of PCRE have been
   3removed. This library now supports only the regular expression features
   4required by the JavaScript language specification, and has only the functions
   5needed by JavaScriptCore and the rest of WebKit.
   6
   7                 Originally written by Philip Hazel
   8           Copyright (c) 1997-2006 University of Cambridge
   9    Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
  10    Copyright (C) 2007 Eric Seidel <eric@webkit.org>
  11
  12-----------------------------------------------------------------------------
  13Redistribution and use in source and binary forms, with or without
  14modification, are permitted provided that the following conditions are met:
  15
  16    * Redistributions of source code must retain the above copyright notice,
  17      this list of conditions and the following disclaimer.
  18
  19    * Redistributions in binary form must reproduce the above copyright
  20      notice, this list of conditions and the following disclaimer in the
  21      documentation and/or other materials provided with the distribution.
  22
  23    * Neither the name of the University of Cambridge nor the names of its
  24      contributors may be used to endorse or promote products derived from
  25      this software without specific prior written permission.
  26
  27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  37POSSIBILITY OF SUCH DAMAGE.
  38-----------------------------------------------------------------------------
  39*/
  40
  41/* This module contains jsRegExpExecute(), the externally visible function
  42that does pattern matching using an NFA algorithm, following the rules from
  43the JavaScript specification. There are also some supporting functions. */
  44
  45#include "pcre_internal.h"
  46
  47#include <limits.h>
  48#include "yarr/ASCIICType.h"
  49#include "jsarena.h"
  50#include "jscntxt.h"
  51
  52using namespace WTF;
  53
  54#if !WTF_COMPILER_MSVC && !WTF_COMPILER_SUNPRO
  55#define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
  56#endif
  57
  58/* Note: Webkit sources have USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP disabled. */
  59/* Note: There are hardcoded constants all over the place, but in the port of
  60 Yarr to TraceMonkey two bytes are added to the OP_BRA* opcodes, so the
  61 instruction stream now looks like this at the start of a bracket group:
  62
  63    OP_BRA* [link:LINK_SIZE] [minNestedBracket,maxNestedBracket:2]
  64
  65 Both capturing and non-capturing brackets encode this information. */
  66
  67/* Avoid warnings on Windows. */
  68#undef min
  69#undef max
  70
  71#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
  72typedef int ReturnLocation;
  73#else
  74typedef void* ReturnLocation;
  75#endif
  76
  77/* Node on a stack of brackets. This is used to detect and reject
  78 matches of the empty string per ECMAScript repeat match rules. This
  79 also prevents infinite loops on quantified empty matches. One node
  80 represents the start state at the start of this bracket group. */
  81struct BracketChainNode {
  82    BracketChainNode* previousBracket;
  83    const UChar* bracketStart;
  84    /* True if the minimum number of matches was already satisfied
  85     when we started matching this group. */
  86    bool minSatisfied;
  87};
  88
  89struct MatchFrame {
  90    ReturnLocation returnLocation;
  91    struct MatchFrame* previousFrame;
  92    int *savedOffsets;
  93    /* The frame allocates saved offsets into the regular expression arena pool so
  94     that they can be restored during backtracking. */
  95    size_t savedOffsetsSize;
  96    JSArenaPool *regExpPool;
  97
  98    MatchFrame() : savedOffsetsSize(0), regExpPool(0) {}
  99    void init(JSArenaPool *regExpPool) { this->regExpPool = regExpPool; }
 100    
 101    /* Function arguments that may change */
 102    struct {
 103        const UChar* subjectPtr;
 104        const unsigned char* instructionPtr;
 105        int offsetTop;
 106        BracketChainNode* bracketChain;
 107    } args;
 108    
 109    
 110    /* PCRE uses "fake" recursion built off of gotos, thus
 111     stack-based local variables are not safe to use.  Instead we have to
 112     store local variables on the current MatchFrame. */
 113    struct {
 114        const unsigned char* data;
 115        const unsigned char* startOfRepeatingBracket;
 116        const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
 117        const unsigned char* instructionPtrAtStartOfOnce;
 118        
 119        int repeatOthercase;
 120        int savedSubjectOffset;
 121        
 122        int ctype;
 123        int fc;
 124        int fi;
 125        int length;
 126        int max;
 127        int number;
 128        int offset;
 129        int skipBytes;
 130        int minBracket;
 131        int limitBracket;
 132        int bracketsBefore;
 133        bool minSatisfied;
 134        
 135        BracketChainNode bracketChainNode;
 136    } locals;
 137
 138    void saveOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
 139        JS_ASSERT(regExpPool);
 140        JS_ASSERT(minBracket >= 0);
 141        JS_ASSERT(limitBracket >= minBracket);
 142        JS_ASSERT(offsetEnd >= 0);
 143        if (minBracket == limitBracket)
 144            return;
 145        const size_t newSavedOffsetCount = 3 * (limitBracket - minBracket);
 146        /* Increase saved offset space if necessary. */
 147        {
 148            size_t targetSize = sizeof(*savedOffsets) * newSavedOffsetCount;
 149            if (savedOffsetsSize < targetSize) {
 150                JS_ARENA_ALLOCATE_CAST(savedOffsets, int *, regExpPool, targetSize);
 151                JS_ASSERT(savedOffsets); /* FIXME: error code, bug 574459. */
 152                savedOffsetsSize = targetSize;
 153            }
 154        }
 155        for (unsigned i = 0; i < unsigned(limitBracket - minBracket); ++i) {
 156            int bracketIter = minBracket + i;
 157            JS_ASSERT(2 * bracketIter + 1 <= offsetEnd);
 158            int start = offsets[2 * bracketIter];
 159            int end = offsets[2 * bracketIter + 1];
 160            JS_ASSERT(bracketIter <= offsetEnd);
 161            int offset = offsets[offsetEnd - bracketIter];
 162            DPRINTF(("saving bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset));
 163            JS_ASSERT(start <= end);
 164            JS_ASSERT(i * 3 + 2 < newSavedOffsetCount);
 165            savedOffsets[i * 3 + 0] = start;
 166            savedOffsets[i * 3 + 1] = end;
 167            savedOffsets[i * 3 + 2] = offset;
 168        }
 169    }
 170
 171    void clobberOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
 172        for (int i = 0; i < limitBracket - minBracket; ++i) {
 173            int bracketIter = minBracket + i;
 174            JS_ASSERT(2 * bracketIter + 1 < offsetEnd);
 175            offsets[2 * bracketIter + 0] = -1;
 176            offsets[2 * bracketIter + 1] = -1;
 177        }
 178    }
 179
 180    void restoreOffsets(int minBracket, int limitBracket, int *offsets, int offsetEnd) {
 181        JS_ASSERT(regExpPool);
 182        JS_ASSERT_IF(limitBracket > minBracket, savedOffsets);
 183        for (int i = 0; i < limitBracket - minBracket; ++i) {
 184            int bracketIter = minBracket + i;
 185            int start = savedOffsets[i * 3 + 0];
 186            int end = savedOffsets[i * 3 + 1];
 187            int offset = savedOffsets[i * 3 + 2];
 188            DPRINTF(("restoring bracket %d; start: %d; end: %d; offset: %d\n", bracketIter, start, end, offset));
 189            JS_ASSERT(start <= end);
 190            offsets[2 * bracketIter + 0] = start;
 191            offsets[2 * bracketIter + 1] = end;
 192            offsets[offsetEnd - bracketIter] = offset;
 193        }
 194    }
 195
 196    /* Extract the bracket data after the current opcode/link at |instructionPtr| into the locals. */
 197    void extractBrackets(const unsigned char *instructionPtr) {
 198        uint16_t bracketMess = get2ByteValue(instructionPtr + 1 + LINK_SIZE);
 199        locals.minBracket = (bracketMess >> 8) & 0xff;
 200        locals.limitBracket = (bracketMess & 0xff);
 201        JS_ASSERT(locals.minBracket <= locals.limitBracket);
 202    }
 203
 204    /* At the start of a bracketed group, add the current subject pointer to the
 205     stack of such pointers, to be re-instated at the end of the group when we hit
 206     the closing ket. When match() is called in other circumstances, we don't add to
 207     this stack. */
 208    void startNewGroup(bool minSatisfied) {
 209        locals.bracketChainNode.previousBracket = args.bracketChain;
 210        locals.bracketChainNode.bracketStart = args.subjectPtr;
 211        locals.bracketChainNode.minSatisfied = minSatisfied;
 212        args.bracketChain = &locals.bracketChainNode;
 213    }
 214};
 215
 216/* Structure for passing "static" information around between the functions
 217doing traditional NFA matching, so that they are thread-safe. */
 218
 219struct MatchData {
 220    int             *offsetVector;  /* Offset vector */
 221    int             offsetEnd;      /* One past the end */
 222    int             offsetMax;      /* The maximum usable for return data */
 223    bool            offsetOverflow; /* Set if too many extractions */
 224    const UChar     *startSubject;  /* Start of the subject string */
 225    const UChar     *endSubject;    /* End of the subject string */
 226    const UChar     *endMatchPtr;   /* Subject position at end match */
 227    int             endOffsetTop;   /* Highwater mark at end of match */
 228    bool            multiline;
 229    bool            ignoreCase;
 230
 231    void setOffsetPair(size_t pairNum, int start, int end) {
 232        JS_ASSERT(int(2 * pairNum + 1) < offsetEnd && int(pairNum) < offsetEnd);
 233        JS_ASSERT(start <= end);
 234        JS_ASSERT_IF(start < 0, start == end && start == -1);
 235        DPRINTF(("setting offset pair at %u (%d, %d)\n", pairNum, start, end));
 236        offsetVector[2 * pairNum + 0] = start;
 237        offsetVector[2 * pairNum + 1] = end;
 238    }
 239};
 240
 241/* The maximum remaining length of subject we are prepared to search for a
 242reqByte match. */
 243
 244#define REQ_BYTE_MAX 1000
 245
 246/* The below limit restricts the number of "recursive" match calls in order to
 247avoid spending exponential time on complex regular expressions. */
 248
 249static const unsigned matchLimit = 1000000;
 250
 251/*************************************************
 252*          Match a back-reference                *
 253*************************************************/
 254
 255/* If a back reference hasn't been set, the length that is passed is greater
 256than the number of characters left in the string, so the match fails.
 257
 258Arguments:
 259  offset      index into the offset vector
 260  subjectPtr        points into the subject
 261  length      length to be matched
 262  md          points to match data block
 263
 264Returns:      true if matched
 265*/
 266
 267static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
 268{
 269    const UChar* p = md.startSubject + md.offsetVector[offset];
 270    
 271    /* Always fail if not enough characters left */
 272    
 273    if (length > md.endSubject - subjectPtr)
 274        return false;
 275    
 276    /* Separate the caselesss case for speed */
 277    
 278    if (md.ignoreCase) {
 279        while (length-- > 0) {
 280            UChar c = *p++;
 281            int othercase = jsc_pcre_ucp_othercase(c);
 282            UChar d = *subjectPtr++;
 283            if (c != d && othercase != d)
 284                return false;
 285        }
 286    }
 287    else {
 288        while (length-- > 0)
 289            if (*p++ != *subjectPtr++)
 290                return false;
 291    }
 292    
 293    return true;
 294}
 295
 296#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
 297
 298/* Use numbered labels and switch statement at the bottom of the match function. */
 299
 300#define RMATCH_WHERE(num) num
 301#define RRETURN_LABEL RRETURN_SWITCH
 302
 303#else
 304
 305/* Use GCC's computed goto extension. */
 306
 307/* For one test case this is more than 40% faster than the switch statement.
 308We could avoid the use of the num argument entirely by using local labels,
 309but using it for the GCC case as well as the non-GCC case allows us to share
 310a bit more code and notice if we use conflicting numbers.*/
 311
 312#define RMATCH_WHERE(num) JS_EXTENSION(&&RRETURN_##num)
 313#define RRETURN_LABEL *stack.currentFrame->returnLocation
 314
 315#endif
 316
 317#define RECURSIVE_MATCH_COMMON(num) \
 318    goto RECURSE;\
 319    RRETURN_##num: \
 320    stack.popCurrentFrame();
 321
 322#define RECURSIVE_MATCH(num, ra, rb) \
 323    do { \
 324        stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
 325        RECURSIVE_MATCH_COMMON(num) \
 326    } while (0)
 327
 328#define RECURSIVE_MATCH_NEW_GROUP(num, ra, rb, gm) \
 329    do { \
 330        stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
 331        stack.currentFrame->startNewGroup(gm); \
 332        RECURSIVE_MATCH_COMMON(num) \
 333    } while (0)
 334
 335#define RRETURN do { JS_EXTENSION_(goto RRETURN_LABEL); } while (0)
 336
 337#define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
 338
 339/*************************************************
 340*         Match from current position            *
 341*************************************************/
 342
 343/* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
 344in the subject string, while substringStart holds the value of subjectPtr at the start of the
 345last bracketed group - used for breaking infinite loops matching zero-length
 346strings. This function is called recursively in many circumstances. Whenever it
 347returns a negative (error) response, the outer match() call must also return the
 348same response.
 349
 350Arguments:
 351   subjectPtr        pointer in subject
 352   instructionPtr       position in code
 353   offsetTop  current top pointer
 354   md          pointer to "static" info for the match
 355
 356Returns:       1 if matched          )  these values are >= 0
 357               0 if failed to match  )
 358               a negative error value if aborted by an error condition
 359                 (e.g. stopped by repeated call or recursion limit)
 360*/
 361
 362static const unsigned numFramesOnStack = 16;
 363
 364struct MatchStack {
 365    JSArenaPool *regExpPool;
 366    void *regExpPoolMark;
 367
 368    MatchStack(JSArenaPool *regExpPool)
 369        : regExpPool(regExpPool)
 370        , regExpPoolMark(JS_ARENA_MARK(regExpPool))
 371        , framesEnd(frames + numFramesOnStack)
 372        , currentFrame(frames)
 373        , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
 374    {
 375        JS_ASSERT((sizeof(frames) / sizeof(frames[0])) == numFramesOnStack);
 376        JS_ASSERT(regExpPool);
 377        for (size_t i = 0; i < numFramesOnStack; ++i)
 378            frames[i].init(regExpPool);
 379    }
 380
 381    ~MatchStack() { JS_ARENA_RELEASE(regExpPool, regExpPoolMark); }
 382    
 383    MatchFrame frames[numFramesOnStack];
 384    MatchFrame* framesEnd;
 385    MatchFrame* currentFrame;
 386    unsigned size;
 387    
 388    bool canUseStackBufferForNextFrame() {
 389        return size < numFramesOnStack;
 390    }
 391    
 392    MatchFrame* allocateNextFrame() {
 393        if (canUseStackBufferForNextFrame())
 394            return currentFrame + 1;
 395        // FIXME: bug 574459 -- no NULL check
 396        MatchFrame *frame = js::OffTheBooks::new_<MatchFrame>();
 397        frame->init(regExpPool);
 398        return frame;
 399    }
 400    
 401    void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation) {
 402        MatchFrame* newframe = allocateNextFrame();
 403        newframe->previousFrame = currentFrame;
 404
 405        newframe->args.subjectPtr = currentFrame->args.subjectPtr;
 406        newframe->args.offsetTop = currentFrame->args.offsetTop;
 407        newframe->args.instructionPtr = instructionPtr;
 408        newframe->args.bracketChain = bracketChain;
 409        newframe->returnLocation = returnLocation;
 410        size++;
 411
 412        currentFrame = newframe;
 413    }
 414    
 415    void popCurrentFrame() {
 416        MatchFrame* oldFrame = currentFrame;
 417        currentFrame = currentFrame->previousFrame;
 418        if (size > numFramesOnStack)
 419            js::Foreground::delete_(oldFrame);
 420        size--;
 421    }
 422
 423    void popAllFrames() {
 424        while (size)
 425            popCurrentFrame();
 426    }
 427};
 428
 429static int matchError(int errorCode, MatchStack& stack)
 430{
 431    stack.popAllFrames();
 432    return errorCode;
 433}
 434
 435/* Get the next UTF-8 character, not advancing the pointer, incrementing length
 436 if there are extra bytes. This is called when we know we are in UTF-8 mode. */
 437
 438static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
 439{
 440    c = *subjectPtr;
 441    if ((c & 0xc0) == 0xc0) {
 442        int gcaa = jsc_pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
 443        int gcss = 6 * gcaa;
 444        c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
 445        for (int gcii = 1; gcii <= gcaa; gcii++) {
 446            gcss -= 6;
 447            c |= (subjectPtr[gcii] & 0x3f) << gcss;
 448        }
 449        len += gcaa;
 450    }
 451}
 452
 453static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
 454{
 455    // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
 456    static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
 457    static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
 458
 459    JS_ASSERT(instructionOffset >= 0);
 460    JS_ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
 461
 462    minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
 463    minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
 464    maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
 465}
 466
 467/* Helper class for passing a flag value from one op to the next that runs.
 468 This allows us to set the flag in certain ops. When the flag is read, it
 469 will be true only if the previous op set the flag, otherwise it is false. */
 470class LinearFlag {
 471public:
 472    LinearFlag() : flag(false) {}
 473    
 474    bool readAndClear() {
 475        bool rv = flag;
 476        flag = false;
 477        return rv;
 478    }
 479
 480    void set() {
 481        flag = true;
 482    }
 483
 484private:
 485    bool flag;
 486};
 487
 488static int
 489match(JSArenaPool *regExpPool, const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
 490{
 491    bool isMatch = false;
 492    int min;
 493    bool minimize = false; /* Initialization not really needed, but some compilers think so. */
 494    unsigned remainingMatchCount = matchLimit;
 495    int othercase; /* Declare here to avoid errors during jumps */
 496    bool minSatisfied;
 497    
 498    MatchStack stack(regExpPool);
 499    LinearFlag minSatNextBracket;
 500
 501    /* The opcode jump table. */
 502#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
 503#define EMIT_JUMP_TABLE_ENTRY(opcode) JS_EXTENSION(&&LABEL_OP_##opcode)
 504    static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
 505#undef EMIT_JUMP_TABLE_ENTRY
 506#endif
 507    
 508    /* One-time setup of the opcode jump table. */
 509#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
 510    for (int i = 255; !opcodeJumpTable[i]; i--)
 511        opcodeJumpTable[i] = &&CAPTURING_BRACKET;
 512#endif
 513    
 514#ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
 515    // Shark shows this as a hot line
 516    // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
 517    stack.currentFrame->returnLocation = JS_EXTENSION(&&RETURN);
 518#else
 519    stack.currentFrame->returnLocation = 0;
 520#endif
 521    stack.currentFrame->args.subjectPtr = subjectPtr;
 522    stack.currentFrame->args.instructionPtr = instructionPtr;
 523    stack.currentFrame->args.offsetTop = offsetTop;
 524    stack.currentFrame->args.bracketChain = 0;
 525    stack.currentFrame->startNewGroup(false);
 526    
 527    /* This is where control jumps back to to effect "recursion" */
 528    
 529RECURSE:
 530    if (!--remainingMatchCount)
 531        return matchError(JSRegExpErrorHitLimit, stack);
 532
 533    /* Now start processing the operations. */
 534    
 535#ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
 536    while (true)
 537#endif
 538    {
 539        
 540#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
 541#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
 542#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
 543#else
 544#define BEGIN_OPCODE(opcode) case OP_##opcode
 545#define NEXT_OPCODE continue
 546#endif
 547#define LOCALS(__ident) (stack.currentFrame->locals.__ident)
 548        
 549#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
 550        NEXT_OPCODE;
 551#else
 552        switch (*stack.currentFrame->args.instructionPtr)
 553#endif
 554        {
 555            /* Non-capturing bracket: optimized */
 556                
 557            BEGIN_OPCODE(BRA):
 558            NON_CAPTURING_BRACKET:
 559                DPRINTF(("start non-capturing bracket\n"));
 560                stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr);
 561                /* If we see no ALT, we have to skip three bytes of bracket data (link plus nested
 562                 bracket data. */
 563                stack.currentFrame->locals.skipBytes = 3;
 564                /* We must compute this value at the top, before we move the instruction pointer. */
 565                stack.currentFrame->locals.minSatisfied = minSatNextBracket.readAndClear();
 566                do {
 567                    /* We need to extract this into a variable so we can correctly pass it by value
 568                     through RECURSIVE_MATCH_NEW_GROUP, which modifies currentFrame. */
 569                    minSatisfied = stack.currentFrame->locals.minSatisfied;
 570                    RECURSIVE_MATCH_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, stack.currentFrame->args.bracketChain, minSatisfied);
 571                    if (isMatch) {
 572                        DPRINTF(("non-capturing bracket succeeded\n"));
 573                        RRETURN;
 574                    }
 575                    stack.currentFrame->locals.skipBytes = 1;
 576                    stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
 577                } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
 578                DPRINTF(("non-capturing bracket failed\n"));
 579                for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
 580                    md.setOffsetPair(i, -1, -1);
 581                RRETURN;
 582                
 583            /* Skip over large extraction number data if encountered. */
 584                
 585            BEGIN_OPCODE(BRANUMBER):
 586                stack.currentFrame->args.instructionPtr += 3;
 587                NEXT_OPCODE;
 588                
 589            /* End of the pattern. */
 590                
 591            BEGIN_OPCODE(END):
 592                md.endMatchPtr = stack.currentFrame->args.subjectPtr;          /* Record where we ended */
 593                md.endOffsetTop = stack.currentFrame->args.offsetTop;   /* and how many extracts were taken */
 594                isMatch = true;
 595                RRETURN;
 596                
 597            /* Assertion brackets. Check the alternative branches in turn - the
 598             matching won't pass the KET for an assertion. If any one branch matches,
 599             the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
 600             start of each branch to move the current point backwards, so the code at
 601             this level is identical to the lookahead case. */
 602                
 603            BEGIN_OPCODE(ASSERT):
 604                {
 605                    uint16_t bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE);
 606                    LOCALS(minBracket) = (bracketMess >> 8) & 0xff;
 607                    LOCALS(limitBracket) = bracketMess & 0xff;
 608                    JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket));
 609                }
 610                stack.currentFrame->locals.skipBytes = 3;
 611                do {
 612                    RECURSIVE_MATCH_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false);
 613                    if (isMatch)
 614                        break;
 615                    stack.currentFrame->locals.skipBytes = 1;
 616                    stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
 617                } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
 618                if (*stack.currentFrame->args.instructionPtr == OP_KET) {
 619                    for (size_t i = LOCALS(minBracket); i < size_t(LOCALS(limitBracket)); ++i)
 620                        md.setOffsetPair(i, -1, -1);
 621                    RRETURN_NO_MATCH;
 622                }
 623                
 624                /* Continue from after the assertion, updating the offsets high water
 625                 mark, since extracts may have been taken during the assertion. */
 626                
 627                advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
 628                stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
 629                stack.currentFrame->args.offsetTop = md.endOffsetTop;
 630                NEXT_OPCODE;
 631                
 632            /* Negative assertion: all branches must fail to match */
 633                
 634            BEGIN_OPCODE(ASSERT_NOT):
 635                stack.currentFrame->locals.skipBytes = 3;
 636                {
 637                    unsigned bracketMess = get2ByteValue(stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE);
 638                    LOCALS(minBracket) = (bracketMess >> 8) & 0xff;
 639                    LOCALS(limitBracket) = bracketMess & 0xff;
 640                }
 641                JS_ASSERT(LOCALS(minBracket) <= LOCALS(limitBracket));
 642                do {
 643                    RECURSIVE_MATCH_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + stack.currentFrame->locals.skipBytes + LINK_SIZE, NULL, false);
 644                    if (isMatch)
 645                        RRETURN_NO_MATCH;
 646                    stack.currentFrame->locals.skipBytes = 1;
 647                    stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
 648                } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
 649                
 650                stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.skipBytes + LINK_SIZE;
 651                NEXT_OPCODE;
 652                
 653            /* An alternation is the end of a branch; scan along to find the end of the
 654             bracketed group and go to there. */
 655                
 656            BEGIN_OPCODE(ALT):
 657                advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
 658                NEXT_OPCODE;
 659                
 660            /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
 661             that it may occur zero times. It may repeat infinitely, or not at all -
 662             i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
 663             repeat limits are compiled as a number of copies, with the optional ones
 664             preceded by BRAZERO or BRAMINZERO. */
 665                
 666            BEGIN_OPCODE(BRAZERO): {
 667                stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
 668                stack.currentFrame->extractBrackets(stack.currentFrame->args.instructionPtr + 1);
 669                stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 670                minSatNextBracket.set();
 671                RECURSIVE_MATCH_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain, true);
 672                if (isMatch)
 673                    RRETURN;
 674                stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 675                advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
 676                stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
 677                NEXT_OPCODE;
 678            }
 679                
 680            BEGIN_OPCODE(BRAMINZERO): {
 681                stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
 682                advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
 683                RECURSIVE_MATCH_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain, false);
 684                if (isMatch)
 685                    RRETURN;
 686                stack.currentFrame->args.instructionPtr++;
 687                NEXT_OPCODE;
 688            }
 689                
 690            /* End of a group, repeated or non-repeating. If we are at the end of
 691             an assertion "group", stop matching and return 1, but record the
 692             current high water mark for use by positive assertions. Do this also
 693             for the "once" (not-backup up) groups. */
 694                
 695            BEGIN_OPCODE(KET):
 696            BEGIN_OPCODE(KETRMIN):
 697            BEGIN_OPCODE(KETRMAX):
 698                stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
 699                stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
 700                stack.currentFrame->locals.minSatisfied = stack.currentFrame->args.bracketChain->minSatisfied;
 701
 702                /* Back up the stack of bracket start pointers. */
 703
 704                stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
 705
 706                if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
 707                    md.endOffsetTop = stack.currentFrame->args.offsetTop;
 708                    isMatch = true;
 709                    RRETURN;
 710                }
 711                
 712                /* In all other cases except a conditional group we have to check the
 713                 group number back at the start and if necessary complete handling an
 714                 extraction by setting the offsets and bumping the high water mark. */
 715                
 716                stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
 717                
 718                /* For extended extraction brackets (large number), we have to fish out
 719                 the number from a dummy opcode at the start. */
 720                
 721                if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
 722                    stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 4 + LINK_SIZE);
 723                stack.currentFrame->locals.offset = 2 * stack.currentFrame->locals.number;
 724                
 725                DPRINTF(("end bracket %d\n", stack.currentFrame->locals.number));
 726                
 727                /* Test for a numbered group. This includes groups called as a result
 728                 of recursion. Note that whole-pattern recursion is coded as a recurse
 729                 into group 0, so it won't be picked up here. Instead, we catch it when
 730                 the OP_END is reached. */
 731                
 732                if (stack.currentFrame->locals.number > 0) {
 733                    if (stack.currentFrame->locals.offset >= md.offsetMax)
 734                        md.offsetOverflow = true;
 735                    else {
 736                        int start = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
 737                        int end = stack.currentFrame->args.subjectPtr - md.startSubject;
 738                        if (start == end && stack.currentFrame->locals.minSatisfied) {
 739                            DPRINTF(("empty string while group already matched; bailing"));
 740                            RRETURN_NO_MATCH;
 741                        }
 742                        DPRINTF(("saving; start: %d; end: %d\n", start, end));
 743                        JS_ASSERT(start <= end);
 744                        md.setOffsetPair(stack.currentFrame->locals.number, start, end);
 745                        if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
 746                            stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
 747                    }
 748                }
 749                
 750                /* For a non-repeating ket, just continue at this level. This also
 751                 happens for a repeating ket if no characters were matched in the group.
 752                 This is the forcible breaking of infinite loops as implemented in Perl
 753                 5.005. If there is an options reset, it will get obeyed in the normal
 754                 course of events. */
 755                
 756                if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
 757                    DPRINTF(("non-repeating ket or empty match\n"));
 758                    if (stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction && stack.currentFrame->locals.minSatisfied) {
 759                        DPRINTF(("empty string while group already matched; bailing"));
 760                        RRETURN_NO_MATCH;
 761                    }
 762                    stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
 763                    NEXT_OPCODE;
 764                }
 765                
 766                /* The repeating kets try the rest of the pattern or restart from the
 767                 preceding bracket, in the appropriate order. */
 768                
 769                stack.currentFrame->extractBrackets(LOCALS(instructionPtrAtStartOfOnce));
 770                JS_ASSERT_IF(LOCALS(number), LOCALS(minBracket) <= LOCALS(number) && LOCALS(number) < LOCALS(limitBracket));
 771                if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
 772                    stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 773                    RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
 774                    if (isMatch)
 775                        RRETURN;
 776                    else
 777                        stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 778                    DPRINTF(("recursively matching lazy group\n"));
 779                    minSatNextBracket.set();
 780                    RECURSIVE_MATCH_NEW_GROUP(17, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true);
 781                } else { /* OP_KETRMAX */
 782                    stack.currentFrame->saveOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 783                    stack.currentFrame->clobberOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 784                    DPRINTF(("recursively matching greedy group\n"));
 785                    minSatNextBracket.set();
 786                    RECURSIVE_MATCH_NEW_GROUP(18, LOCALS(instructionPtrAtStartOfOnce), stack.currentFrame->args.bracketChain, true);
 787                    if (isMatch)
 788                        RRETURN;
 789                    else
 790                        stack.currentFrame->restoreOffsets(LOCALS(minBracket), LOCALS(limitBracket), md.offsetVector, md.offsetEnd);
 791                    RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
 792                }
 793                RRETURN;
 794                
 795            /* Start of subject. */
 796
 797            BEGIN_OPCODE(CIRC):
 798                if (stack.currentFrame->args.subjectPtr != md.startSubject)
 799                    RRETURN_NO_MATCH;
 800                stack.currentFrame->args.instructionPtr++;
 801                NEXT_OPCODE;
 802
 803            /* After internal newline if multiline. */
 804
 805            BEGIN_OPCODE(BOL):
 806                if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
 807                    RRETURN_NO_MATCH;
 808                stack.currentFrame->args.instructionPtr++;
 809                NEXT_OPCODE;
 810
 811            /* End of subject. */
 812
 813            BEGIN_OPCODE(DOLL):
 814                if (stack.currentFrame->args.subjectPtr < md.endSubject)
 815                    RRETURN_NO_MATCH;
 816                stack.currentFrame->args.instructionPtr++;
 817                NEXT_OPCODE;
 818
 819            /* Before internal newline if multiline. */
 820
 821            BEGIN_OPCODE(EOL):
 822                if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
 823                    RRETURN_NO_MATCH;
 824                stack.currentFrame->args.instructionPtr++;
 825                NEXT_OPCODE;
 826                
 827            /* Word boundary assertions */
 828                
 829            BEGIN_OPCODE(NOT_WORD_BOUNDARY):
 830            BEGIN_OPCODE(WORD_BOUNDARY): {
 831                bool currentCharIsWordChar = false;
 832                bool previousCharIsWordChar = false;
 833                
 834                if (stack.currentFrame->args.subjectPtr > md.startSubject)
 835                    previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
 836                if (stack.currentFrame->args.subjectPtr < md.endSubject)
 837                    currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
 838                
 839                /* Now see if the situation is what we want */
 840                bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
 841                if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
 842                    RRETURN_NO_MATCH;
 843                NEXT_OPCODE;
 844            }
 845                
 846            /* Match a single character type; inline for speed */
 847                
 848            BEGIN_OPCODE(NOT_NEWLINE):
 849                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 850                    RRETURN_NO_MATCH;
 851                if (isNewline(*stack.currentFrame->args.subjectPtr++))
 852                    RRETURN_NO_MATCH;
 853                stack.currentFrame->args.instructionPtr++;
 854                NEXT_OPCODE;
 855
 856            BEGIN_OPCODE(NOT_DIGIT):
 857                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 858                    RRETURN_NO_MATCH;
 859                if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
 860                    RRETURN_NO_MATCH;
 861                stack.currentFrame->args.instructionPtr++;
 862                NEXT_OPCODE;
 863
 864            BEGIN_OPCODE(DIGIT):
 865                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 866                    RRETURN_NO_MATCH;
 867                if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
 868                    RRETURN_NO_MATCH;
 869                stack.currentFrame->args.instructionPtr++;
 870                NEXT_OPCODE;
 871
 872            BEGIN_OPCODE(NOT_WHITESPACE):
 873                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 874                    RRETURN_NO_MATCH;
 875                if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
 876                    RRETURN_NO_MATCH;
 877                stack.currentFrame->args.instructionPtr++;
 878                NEXT_OPCODE;
 879
 880            BEGIN_OPCODE(WHITESPACE):
 881                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 882                    RRETURN_NO_MATCH;
 883                if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
 884                    RRETURN_NO_MATCH;
 885                stack.currentFrame->args.instructionPtr++;
 886                NEXT_OPCODE;
 887                
 888            BEGIN_OPCODE(NOT_WORDCHAR):
 889                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 890                    RRETURN_NO_MATCH;
 891                if (isWordChar(*stack.currentFrame->args.subjectPtr++))
 892                    RRETURN_NO_MATCH;
 893                stack.currentFrame->args.instructionPtr++;
 894                NEXT_OPCODE;
 895                
 896            BEGIN_OPCODE(WORDCHAR):
 897                if (stack.currentFrame->args.subjectPtr >= md.endSubject)
 898                    RRETURN_NO_MATCH;
 899                if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
 900                    RRETURN_NO_MATCH;
 901                stack.currentFrame->args.instructionPtr++;
 902                NEXT_OPCODE;
 903                
 904            /* Match a back reference, possibly repeatedly. Look past the end of the
 905             item to see if there is repeat information following. The code is similar
 906             to that for character classes, but repeated for efficiency. Then obey
 907             similar code to character type repeats - written out again for speed.
 908             However, if the referenced string is the empty string, always treat
 909             it as matched, any number of times (otherwise there could be infinite
 910             loops). */
 911                
 912            BEGIN_OPCODE(REF):
 913                stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1;               /* Doubled ref number */
 914                stack.currentFrame->args.instructionPtr += 3;                                 /* Advance past item */
 915                
 916                /* If the reference is unset, set the length to be longer than the amount
 917                 of subject left; this ensures that every attempt at a match fails. We
 918                 can't just fail here, because of the possibility of quantifiers with zero
 919                 minima. */
 920                
 921                if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
 922                    stack.currentFrame->locals.length = 0;
 923                else
 924                    stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
 925                
 926                /* Set up for repetition, or handle the non-repeated case */
 927                
 928                switch (*stack.currentFrame->args.instructionPtr) {
 929                    case OP_CRSTAR:
 930                    case OP_CRMINSTAR:
 931                    case OP_CRPLUS:
 932                    case OP_CRMINPLUS:
 933                    case OP_CRQUERY:
 934                    case OP_CRMINQUERY:
 935                        repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
 936                        break;
 937                        
 938                    case OP_CRRANGE:
 939                    case OP_CRMINRANGE:
 940                        minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
 941                        min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
 942                        stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
 943                        if (stack.currentFrame->locals.max == 0)
 944                            stack.currentFrame->locals.max = INT_MAX;
 945                        stack.currentFrame->args.instructionPtr += 5;
 946                        break;
 947                    
 948                    default:               /* No repeat follows */
 949                        if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
 950                            RRETURN_NO_MATCH;
 951                        stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
 952                        NEXT_OPCODE;
 953                }
 954                
 955                /* If the length of the reference is zero, just continue with the
 956                 main loop. */
 957                
 958                if (stack.currentFrame->locals.length == 0)
 959                    NEXT_OPCODE;
 960                
 961                /* First, ensure the minimum number of matches are present. */
 962                
 963                for (int i = 1; i <= min; i++) {
 964                    if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
 965                        RRETURN_NO_MATCH;
 966                    stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
 967                }
 968                
 969                /* If min = max, continue at the same level without recursion.
 970                 They are not both allowed to be zero. */
 971                
 972                if (min == stack.currentFrame->locals.max)
 973                    NEXT_OPCODE;
 974                
 975                /* If minimizing, keep trying and advancing the pointer */
 976                
 977                if (minimize) {
 978                    for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
 979                        RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
 980                        if (isMatch)
 981                            RRETURN;
 982                        if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
 983                            RRETURN;
 984                        stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
 985                    }
 986                    /* Control never reaches here */
 987                }
 988                
 989                /* If maximizing, find the longest string and work backwards */
 990                
 991                else {
 992                    stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
 993                    for (int i = min; i < stack.currentFrame->locals.max; i++) {
 994                        if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
 995                            break;
 996                        stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
 997                    }
 998                    while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
 999                        RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
1000                        if (isMatch)
1001                            RRETURN;
1002                        stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
1003                    }
1004                    RRETURN_NO_MATCH;
1005                }
1006                /* Control never reaches here */
1007                
1008            /* Match a bit-mapped character class, possibly repeatedly. This op code is
1009             used when all the characters in the class have values in the range 0-255,
1010             and either the matching is caseful, or the characters are in the range
1011             0-127 when UTF-8 processing is enabled. The only difference between
1012             OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1013             encountered.
1014             
1015             First, look past the end of the item to see if there is repeat information
1016             following. Then obey similar code to character type repeats - written out
1017             again for speed. */
1018                
1019            BEGIN_OPCODE(NCLASS):
1020            BEGIN_OPCODE(CLASS):
1021                stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1;                /* Save for matching */
1022                stack.currentFrame->args.instructionPtr += 33;                     /* Advance past the item */
1023                
1024                switch (*stack.currentFrame->args.instructionPtr) {
1025                    case OP_CRSTAR:
1026                    case OP_CRMINSTAR:
1027                    case OP_CRPLUS:
1028                    case OP_CRMINPLUS:
1029                    case OP_CRQUERY:
1030                    case OP_CRMINQUERY:
1031                        repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
1032                        break;
1033                        
1034                    case OP_CRRANGE:
1035                    case OP_CRMINRANGE:
1036                        minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
1037                        min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
1038                        stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
1039                        if (stack.currentFrame->locals.max == 0)
1040                            stack.currentFrame->locals.max = INT_MAX;
1041                        stack.currentFrame->args.instructionPtr += 5;
1042                        break;
1043                        
1044                    default:               /* No repeat follows */
1045                        min = stack.currentFrame->locals.max = 1;
1046                        break;
1047                }
1048                
1049                /* First, ensure the minimum number of matches are present. */
1050                
1051                for (int i = 1; i <= min; i++) {
1052                    if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1053                        RRETURN_NO_MATCH;
1054                    int c = *stack.currentFrame->args.subjectPtr++;
1055                    if (c > 255) {
1056                        if (stack.currentFrame->locals.data[-1] == OP_CLASS)
1057                            RRETURN_NO_MATCH;
1058                    } else {
1059                        if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
1060                            RRETURN_NO_MATCH;
1061                    }
1062  

Large files files are truncated, but you can click here to view the full file