PageRenderTime 377ms CodeModel.GetById 60ms app.highlight 246ms RepoModel.GetById 36ms app.codeStats 1ms

/js/lib/Socket.IO-node/support/expresso/deps/jscoverage/js/jsregexp.cpp

http://github.com/onedayitwillmake/RealtimeMultiplayerNodeJs
C++ | 1906 lines | 1433 code | 148 blank | 325 comment | 293 complexity | 3040154f5eb7ab91c45eedc02b82c675 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2 * vim: set sw=4 ts=8 et tw=78:
   3 *
   4 * ***** BEGIN LICENSE BLOCK *****
   5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version
   8 * 1.1 (the "License"); you may not use this file except in compliance with
   9 * the License. You may obtain a copy of the License at
  10 * http://www.mozilla.org/MPL/
  11 *
  12 * Software distributed under the License is distributed on an "AS IS" basis,
  13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14 * for the specific language governing rights and limitations under the
  15 * License.
  16 *
  17 * The Original Code is Mozilla Communicator client code, released
  18 * March 31, 1998.
  19 *
  20 * The Initial Developer of the Original Code is
  21 * Netscape Communications Corporation.
  22 * Portions created by the Initial Developer are Copyright (C) 1998
  23 * the Initial Developer. All Rights Reserved.
  24 *
  25 * Contributor(s):
  26 *
  27 * Alternatively, the contents of this file may be used under the terms of
  28 * either of the GNU General Public License Version 2 or later (the "GPL"),
  29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30 * in which case the provisions of the GPL or the LGPL are applicable instead
  31 * of those above. If you wish to allow use of your version of this file only
  32 * under the terms of either the GPL or the LGPL, and not to allow others to
  33 * use your version of this file under the terms of the MPL, indicate your
  34 * decision by deleting the provisions above and replace them with the notice
  35 * and other provisions required by the GPL or the LGPL. If you do not delete
  36 * the provisions above, a recipient may use your version of this file under
  37 * the terms of any one of the MPL, the GPL or the LGPL.
  38 *
  39 * ***** END LICENSE BLOCK ***** */
  40
  41/*
  42 * JS regular expressions, after Perl.
  43 */
  44#include "jsstddef.h"
  45#include <stdlib.h>
  46#include <string.h>
  47#include <stdarg.h>
  48#include "jstypes.h"
  49#include "jsarena.h" /* Added by JSIFY */
  50#include "jsutil.h" /* Added by JSIFY */
  51#include "jsapi.h"
  52#include "jsarray.h"
  53#include "jsatom.h"
  54#include "jsbuiltins.h"
  55#include "jscntxt.h"
  56#include "jsversion.h"
  57#include "jsfun.h"
  58#include "jsgc.h"
  59#include "jsinterp.h"
  60#include "jslock.h"
  61#include "jsnum.h"
  62#include "jsobj.h"
  63#include "jsopcode.h"
  64#include "jsregexp.h"
  65#include "jsscan.h"
  66#include "jsscope.h"
  67#include "jsstr.h"
  68
  69#ifdef JS_TRACER
  70#include "jstracer.h"
  71using namespace avmplus;
  72using namespace nanojit;
  73
  74/* 
  75 * FIXME  Duplicated with jstracer.cpp, doing it this way for now
  76 *        to keep it private to files that need it. 
  77 */
  78#ifdef JS_JIT_SPEW
  79static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose");
  80#define debug_only_v(x) if (verbose_debug) { x; }
  81#else
  82#define debug_only_v(x)
  83#endif
  84#endif
  85
  86typedef enum REOp {
  87#define REOP_DEF(opcode, name) opcode,
  88#include "jsreops.tbl"
  89#undef REOP_DEF
  90    REOP_LIMIT /* META: no operator >= to this */
  91} REOp;
  92
  93#define REOP_IS_SIMPLE(op)  ((op) <= REOP_NCLASS)
  94
  95#ifdef REGEXP_DEBUG
  96const char *reop_names[] = {
  97#define REOP_DEF(opcode, name) name,
  98#include "jsreops.tbl"
  99#undef REOP_DEF
 100    NULL
 101};
 102#endif
 103
 104#ifdef __GNUC__
 105static int
 106re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
 107#endif
 108
 109#ifdef REGEXP_DEBUG
 110static int
 111re_debug(const char *fmt, ...)
 112{
 113    va_list ap;
 114    int retval;
 115
 116    va_start(ap, fmt);
 117    retval = vprintf(fmt, ap);
 118    va_end(ap);
 119    return retval;
 120}
 121
 122static void
 123re_debug_chars(const jschar *chrs, size_t length)
 124{
 125    int i = 0;
 126
 127    printf(" \"");
 128    while (*chrs && i++ < length) {
 129        putchar((char)*chrs++);
 130    }
 131    printf("\"");
 132}
 133#else  /* !REGEXP_DEBUG */
 134/* This should be optimized to a no-op by our tier-1 compilers. */
 135static int
 136re_debug(const char *fmt, ...)
 137{
 138    return 0;
 139}
 140
 141static void
 142re_debug_chars(const jschar *chrs, size_t length)
 143{
 144}
 145#endif /* !REGEXP_DEBUG */
 146
 147struct RENode {
 148    REOp            op;         /* r.e. op bytecode */
 149    RENode          *next;      /* next in concatenation order */
 150    void            *kid;       /* first operand */
 151    union {
 152        void        *kid2;      /* second operand */
 153        jsint       num;        /* could be a number */
 154        size_t      parenIndex; /* or a parenthesis index */
 155        struct {                /* or a quantifier range */
 156            uintN  min;
 157            uintN  max;
 158            JSPackedBool greedy;
 159        } range;
 160        struct {                /* or a character class */
 161            size_t  startIndex;
 162            size_t  kidlen;     /* length of string at kid, in jschars */
 163            size_t  index;      /* index into class list */
 164            uint16  bmsize;     /* bitmap size, based on max char code */
 165            JSPackedBool sense;
 166        } ucclass;
 167        struct {                /* or a literal sequence */
 168            jschar  chr;        /* of one character */
 169            size_t  length;     /* or many (via the kid) */
 170        } flat;
 171        struct {
 172            RENode  *kid2;      /* second operand from ALT */
 173            jschar  ch1;        /* match char for ALTPREREQ */
 174            jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */
 175        } altprereq;
 176    } u;
 177};
 178
 179#define RE_IS_LETTER(c)     (((c >= 'A') && (c <= 'Z')) ||                    \
 180                             ((c >= 'a') && (c <= 'z')) )
 181#define RE_IS_LINE_TERM(c)  ((c == '\n') || (c == '\r') ||                    \
 182                             (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
 183
 184#define CLASS_CACHE_SIZE    4
 185
 186typedef struct CompilerState {
 187    JSContext       *context;
 188    JSTokenStream   *tokenStream; /* For reporting errors */
 189    const jschar    *cpbegin;
 190    const jschar    *cpend;
 191    const jschar    *cp;
 192    size_t          parenCount;
 193    size_t          classCount;   /* number of [] encountered */
 194    size_t          treeDepth;    /* maximum depth of parse tree */
 195    size_t          progLength;   /* estimated bytecode length */
 196    RENode          *result;
 197    size_t          classBitmapsMem; /* memory to hold all class bitmaps */
 198    struct {
 199        const jschar *start;        /* small cache of class strings */
 200        size_t length;              /* since they're often the same */
 201        size_t index;
 202    } classCache[CLASS_CACHE_SIZE];
 203    uint16          flags;
 204} CompilerState;
 205
 206typedef struct EmitStateStackEntry {
 207    jsbytecode      *altHead;       /* start of REOP_ALT* opcode */
 208    jsbytecode      *nextAltFixup;  /* fixup pointer to next-alt offset */
 209    jsbytecode      *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
 210    jsbytecode      *endTermFixup;  /* fixup ptr. to REOPT_ALTPREREQ* offset */
 211    RENode          *continueNode;  /* original REOP_ALT* node being stacked */
 212    jsbytecode      continueOp;     /* REOP_JUMP or REOP_ENDALT continuation */
 213    JSPackedBool    jumpToJumpFlag; /* true if we've patched jump-to-jump to
 214                                       avoid 16-bit unsigned offset overflow */
 215} EmitStateStackEntry;
 216
 217/*
 218 * Immediate operand sizes and getter/setters.  Unlike the ones in jsopcode.h,
 219 * the getters and setters take the pc of the offset, not of the opcode before
 220 * the offset.
 221 */
 222#define ARG_LEN             2
 223#define GET_ARG(pc)         ((uint16)(((pc)[0] << 8) | (pc)[1]))
 224#define SET_ARG(pc, arg)    ((pc)[0] = (jsbytecode) ((arg) >> 8),       \
 225                             (pc)[1] = (jsbytecode) (arg))
 226
 227#define OFFSET_LEN          ARG_LEN
 228#define OFFSET_MAX          (JS_BIT(ARG_LEN * 8) - 1)
 229#define GET_OFFSET(pc)      GET_ARG(pc)
 230
 231/*
 232 * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
 233 * For sanity, we limit it to 2^24 bytes.
 234 */
 235#define TREE_DEPTH_MAX  (JS_BIT(24) / sizeof(EmitStateStackEntry))
 236
 237/*
 238 * The maximum memory that can be allocated for class bitmaps.
 239 * For sanity, we limit it to 2^24 bytes.
 240 */
 241#define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
 242
 243/*
 244 * Functions to get size and write/read bytecode that represent small indexes
 245 * compactly.
 246 * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
 247 * indicates that the following byte brings more bits to the index. Otherwise
 248 * this is the last byte in the index bytecode representing highest index bits.
 249 */
 250static size_t
 251GetCompactIndexWidth(size_t index)
 252{
 253    size_t width;
 254
 255    for (width = 1; (index >>= 7) != 0; ++width) { }
 256    return width;
 257}
 258
 259static JS_ALWAYS_INLINE jsbytecode *
 260WriteCompactIndex(jsbytecode *pc, size_t index)
 261{
 262    size_t next;
 263
 264    while ((next = index >> 7) != 0) {
 265        *pc++ = (jsbytecode)(index | 0x80);
 266        index = next;
 267    }
 268    *pc++ = (jsbytecode)index;
 269    return pc;
 270}
 271
 272static JS_ALWAYS_INLINE jsbytecode *
 273ReadCompactIndex(jsbytecode *pc, size_t *result)
 274{
 275    size_t nextByte;
 276
 277    nextByte = *pc++;
 278    if ((nextByte & 0x80) == 0) {
 279        /*
 280         * Short-circuit the most common case when compact index <= 127.
 281         */
 282        *result = nextByte;
 283    } else {
 284        size_t shift = 7;
 285        *result = 0x7F & nextByte;
 286        do {
 287            nextByte = *pc++;
 288            *result |= (nextByte & 0x7F) << shift;
 289            shift += 7;
 290        } while ((nextByte & 0x80) != 0);
 291    }
 292    return pc;
 293}
 294
 295typedef struct RECapture {
 296    ptrdiff_t index;           /* start of contents, -1 for empty  */
 297    size_t length;             /* length of capture */
 298} RECapture;
 299
 300typedef struct REMatchState {
 301    const jschar *cp;
 302    RECapture parens[1];      /* first of 're->parenCount' captures,
 303                                 allocated at end of this struct */
 304} REMatchState;
 305
 306struct REBackTrackData;
 307
 308typedef struct REProgState {
 309    jsbytecode *continue_pc;        /* current continuation data */
 310    jsbytecode continue_op;
 311    ptrdiff_t index;                /* progress in text */
 312    size_t parenSoFar;              /* highest indexed paren started */
 313    union {
 314        struct {
 315            uintN min;             /* current quantifier limits */
 316            uintN max;
 317        } quantifier;
 318        struct {
 319            size_t top;             /* backtrack stack state */
 320            size_t sz;
 321        } assertion;
 322    } u;
 323} REProgState;
 324
 325typedef struct REBackTrackData {
 326    size_t sz;                      /* size of previous stack entry */
 327    jsbytecode *backtrack_pc;       /* where to backtrack to */
 328    jsbytecode backtrack_op;
 329    const jschar *cp;               /* index in text of match at backtrack */
 330    size_t parenIndex;              /* start index of saved paren contents */
 331    size_t parenCount;              /* # of saved paren contents */
 332    size_t saveStateStackTop;       /* number of parent states */
 333    /* saved parent states follow */
 334    /* saved paren contents follow */
 335} REBackTrackData;
 336
 337#define INITIAL_STATESTACK  100
 338#define INITIAL_BACKTRACK   8000
 339
 340typedef struct REGlobalData {
 341    JSContext *cx;
 342    JSRegExp *regexp;               /* the RE in execution */
 343    JSBool ok;                      /* runtime error (out_of_memory only?) */
 344    size_t start;                   /* offset to start at */
 345    ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
 346    const jschar    *cpbegin;       /* text base address */
 347    const jschar    *cpend;         /* text limit address */
 348
 349    REProgState *stateStack;        /* stack of state of current parents */
 350    size_t stateStackTop;
 351    size_t stateStackLimit;
 352
 353    REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
 354    REBackTrackData *backTrackSP;
 355    size_t backTrackStackSize;
 356    size_t cursz;                   /* size of current stack entry */
 357    size_t backTrackCount;          /* how many times we've backtracked */
 358    size_t backTrackLimit;          /* upper limit on backtrack states */
 359} REGlobalData;
 360
 361/*
 362 * 1. If IgnoreCase is false, return ch.
 363 * 2. Let u be ch converted to upper case as if by calling
 364 *    String.prototype.toUpperCase on the one-character string ch.
 365 * 3. If u does not consist of a single character, return ch.
 366 * 4. Let cu be u's character.
 367 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
 368 *    code point value is less than decimal 128, then return ch.
 369 * 6. Return cu.
 370 */
 371static JS_ALWAYS_INLINE uintN
 372upcase(uintN ch)
 373{
 374    uintN cu;
 375
 376    JS_ASSERT((uintN) (jschar) ch == ch);
 377    if (ch < 128) {
 378        if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
 379            ch -= (uintN) ('a' - 'A');
 380        return ch;
 381    }
 382
 383    cu = JS_TOUPPER(ch);
 384    return (cu < 128) ? ch : cu;
 385}
 386
 387static JS_ALWAYS_INLINE uintN
 388downcase(uintN ch)
 389{
 390    JS_ASSERT((uintN) (jschar) ch == ch);
 391    if (ch < 128) {
 392        if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
 393            ch += (uintN) ('a' - 'A');
 394        return ch;
 395    }
 396
 397    return JS_TOLOWER(ch);
 398}
 399
 400/* Construct and initialize an RENode, returning NULL for out-of-memory */
 401static RENode *
 402NewRENode(CompilerState *state, REOp op)
 403{
 404    JSContext *cx;
 405    RENode *ren;
 406
 407    cx = state->context;
 408    JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
 409    if (!ren) {
 410        js_ReportOutOfScriptQuota(cx);
 411        return NULL;
 412    }
 413    ren->op = op;
 414    ren->next = NULL;
 415    ren->kid = NULL;
 416    return ren;
 417}
 418
 419/*
 420 * Validates and converts hex ascii value.
 421 */
 422static JSBool
 423isASCIIHexDigit(jschar c, uintN *digit)
 424{
 425    uintN cv = c;
 426
 427    if (cv < '0')
 428        return JS_FALSE;
 429    if (cv <= '9') {
 430        *digit = cv - '0';
 431        return JS_TRUE;
 432    }
 433    cv |= 0x20;
 434    if (cv >= 'a' && cv <= 'f') {
 435        *digit = cv - 'a' + 10;
 436        return JS_TRUE;
 437    }
 438    return JS_FALSE;
 439}
 440
 441
 442typedef struct {
 443    REOp op;
 444    const jschar *errPos;
 445    size_t parenIndex;
 446} REOpData;
 447
 448static JSBool
 449ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
 450                        const jschar *arg)
 451{
 452    if (state->tokenStream) {
 453        return js_ReportCompileErrorNumber(state->context, state->tokenStream,
 454                                           NULL, JSREPORT_UC | flags,
 455                                           errorNumber, arg);
 456    }
 457    return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
 458                                          js_GetErrorMessage, NULL,
 459                                          errorNumber, arg);
 460}
 461
 462static JSBool
 463ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
 464{
 465    return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
 466}
 467
 468/*
 469 * Process the op against the two top operands, reducing them to a single
 470 * operand in the penultimate slot. Update progLength and treeDepth.
 471 */
 472static JSBool
 473ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
 474          intN operandSP)
 475{
 476    RENode *result;
 477
 478    switch (opData->op) {
 479      case REOP_ALT:
 480        result = NewRENode(state, REOP_ALT);
 481        if (!result)
 482            return JS_FALSE;
 483        result->kid = operandStack[operandSP - 2];
 484        result->u.kid2 = operandStack[operandSP - 1];
 485        operandStack[operandSP - 2] = result;
 486
 487        if (state->treeDepth == TREE_DEPTH_MAX) {
 488            ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
 489            return JS_FALSE;
 490        }
 491        ++state->treeDepth;
 492
 493        /*
 494         * Look at both alternates to see if there's a FLAT or a CLASS at
 495         * the start of each. If so, use a prerequisite match.
 496         */
 497        if (((RENode *) result->kid)->op == REOP_FLAT &&
 498            ((RENode *) result->u.kid2)->op == REOP_FLAT &&
 499            (state->flags & JSREG_FOLD) == 0) {
 500            result->op = REOP_ALTPREREQ;
 501            result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
 502            result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
 503            /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
 504                                            JUMP, <end> ... ENDALT */
 505            state->progLength += 13;
 506        }
 507        else
 508        if (((RENode *) result->kid)->op == REOP_CLASS &&
 509            ((RENode *) result->kid)->u.ucclass.index < 256 &&
 510            ((RENode *) result->u.kid2)->op == REOP_FLAT &&
 511            (state->flags & JSREG_FOLD) == 0) {
 512            result->op = REOP_ALTPREREQ2;
 513            result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
 514            result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
 515            /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
 516                                            JUMP, <end> ... ENDALT */
 517            state->progLength += 13;
 518        }
 519        else
 520        if (((RENode *) result->kid)->op == REOP_FLAT &&
 521            ((RENode *) result->u.kid2)->op == REOP_CLASS &&
 522            ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
 523            (state->flags & JSREG_FOLD) == 0) {
 524            result->op = REOP_ALTPREREQ2;
 525            result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
 526            result->u.altprereq.ch2 =
 527                ((RENode *) result->u.kid2)->u.ucclass.index;
 528            /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
 529                                          JUMP, <end> ... ENDALT */
 530            state->progLength += 13;
 531        }
 532        else {
 533            /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
 534            state->progLength += 7;
 535        }
 536        break;
 537
 538      case REOP_CONCAT:
 539        result = operandStack[operandSP - 2];
 540        while (result->next)
 541            result = result->next;
 542        result->next = operandStack[operandSP - 1];
 543        break;
 544
 545      case REOP_ASSERT:
 546      case REOP_ASSERT_NOT:
 547      case REOP_LPARENNON:
 548      case REOP_LPAREN:
 549        /* These should have been processed by a close paren. */
 550        ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
 551                                opData->errPos);
 552        return JS_FALSE;
 553
 554      default:;
 555    }
 556    return JS_TRUE;
 557}
 558
 559/*
 560 * Parser forward declarations.
 561 */
 562static JSBool ParseTerm(CompilerState *state);
 563static JSBool ParseQuantifier(CompilerState *state);
 564static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
 565
 566/*
 567 * Top-down regular expression grammar, based closely on Perl4.
 568 *
 569 *  regexp:     altern                  A regular expression is one or more
 570 *              altern '|' regexp       alternatives separated by vertical bar.
 571 */
 572#define INITIAL_STACK_SIZE  128
 573
 574static JSBool
 575ParseRegExp(CompilerState *state)
 576{
 577    size_t parenIndex;
 578    RENode *operand;
 579    REOpData *operatorStack;
 580    RENode **operandStack;
 581    REOp op;
 582    intN i;
 583    JSBool result = JS_FALSE;
 584
 585    intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
 586    intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
 587
 588    /* Watch out for empty regexp */
 589    if (state->cp == state->cpend) {
 590        state->result = NewRENode(state, REOP_EMPTY);
 591        return (state->result != NULL);
 592    }
 593
 594    operatorStack = (REOpData *)
 595        JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
 596    if (!operatorStack)
 597        return JS_FALSE;
 598
 599    operandStack = (RENode **)
 600        JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
 601    if (!operandStack)
 602        goto out;
 603
 604    for (;;) {
 605        parenIndex = state->parenCount;
 606        if (state->cp == state->cpend) {
 607            /*
 608             * If we are at the end of the regexp and we're short one or more
 609             * operands, the regexp must have the form /x|/ or some such, with
 610             * left parentheses making us short more than one operand.
 611             */
 612            if (operatorSP >= operandSP) {
 613                operand = NewRENode(state, REOP_EMPTY);
 614                if (!operand)
 615                    goto out;
 616                goto pushOperand;
 617            }
 618        } else {
 619            switch (*state->cp) {
 620              case '(':
 621                ++state->cp;
 622                if (state->cp + 1 < state->cpend &&
 623                    *state->cp == '?' &&
 624                    (state->cp[1] == '=' ||
 625                     state->cp[1] == '!' ||
 626                     state->cp[1] == ':')) {
 627                    switch (state->cp[1]) {
 628                      case '=':
 629                        op = REOP_ASSERT;
 630                        /* ASSERT, <next>, ... ASSERTTEST */
 631                        state->progLength += 4;
 632                        break;
 633                      case '!':
 634                        op = REOP_ASSERT_NOT;
 635                        /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
 636                        state->progLength += 4;
 637                        break;
 638                      default:
 639                        op = REOP_LPARENNON;
 640                        break;
 641                    }
 642                    state->cp += 2;
 643                } else {
 644                    op = REOP_LPAREN;
 645                    /* LPAREN, <index>, ... RPAREN, <index> */
 646                    state->progLength
 647                        += 2 * (1 + GetCompactIndexWidth(parenIndex));
 648                    state->parenCount++;
 649                    if (state->parenCount == 65535) {
 650                        ReportRegExpError(state, JSREPORT_ERROR,
 651                                          JSMSG_TOO_MANY_PARENS);
 652                        goto out;
 653                    }
 654                }
 655                goto pushOperator;
 656
 657              case ')':
 658                /*
 659                 * If there's no stacked open parenthesis, throw syntax error.
 660                 */
 661                for (i = operatorSP - 1; ; i--) {
 662                    if (i < 0) {
 663                        ReportRegExpError(state, JSREPORT_ERROR,
 664                                          JSMSG_UNMATCHED_RIGHT_PAREN);
 665                        goto out;
 666                    }
 667                    if (operatorStack[i].op == REOP_ASSERT ||
 668                        operatorStack[i].op == REOP_ASSERT_NOT ||
 669                        operatorStack[i].op == REOP_LPARENNON ||
 670                        operatorStack[i].op == REOP_LPAREN) {
 671                        break;
 672                    }
 673                }
 674                /* FALL THROUGH */
 675
 676              case '|':
 677                /* Expected an operand before these, so make an empty one */
 678                operand = NewRENode(state, REOP_EMPTY);
 679                if (!operand)
 680                    goto out;
 681                goto pushOperand;
 682
 683              default:
 684                if (!ParseTerm(state))
 685                    goto out;
 686                operand = state->result;
 687pushOperand:
 688                if (operandSP == operandStackSize) {
 689                    RENode **tmp;
 690                    operandStackSize += operandStackSize;
 691                    tmp = (RENode **)
 692                        JS_realloc(state->context, operandStack,
 693                                   sizeof(RENode *) * operandStackSize);
 694                    if (!tmp)
 695                        goto out;
 696                    operandStack = tmp;
 697                }
 698                operandStack[operandSP++] = operand;
 699                break;
 700            }
 701        }
 702
 703        /* At the end; process remaining operators. */
 704restartOperator:
 705        if (state->cp == state->cpend) {
 706            while (operatorSP) {
 707                --operatorSP;
 708                if (!ProcessOp(state, &operatorStack[operatorSP],
 709                               operandStack, operandSP))
 710                    goto out;
 711                --operandSP;
 712            }
 713            JS_ASSERT(operandSP == 1);
 714            state->result = operandStack[0];
 715            result = JS_TRUE;
 716            goto out;
 717        }
 718
 719        switch (*state->cp) {
 720          case '|':
 721            /* Process any stacked 'concat' operators */
 722            ++state->cp;
 723            while (operatorSP &&
 724                   operatorStack[operatorSP - 1].op == REOP_CONCAT) {
 725                --operatorSP;
 726                if (!ProcessOp(state, &operatorStack[operatorSP],
 727                               operandStack, operandSP)) {
 728                    goto out;
 729                }
 730                --operandSP;
 731            }
 732            op = REOP_ALT;
 733            goto pushOperator;
 734
 735          case ')':
 736            /*
 737             * If there's no stacked open parenthesis, throw syntax error.
 738             */
 739            for (i = operatorSP - 1; ; i--) {
 740                if (i < 0) {
 741                    ReportRegExpError(state, JSREPORT_ERROR,
 742                                      JSMSG_UNMATCHED_RIGHT_PAREN);
 743                    goto out;
 744                }
 745                if (operatorStack[i].op == REOP_ASSERT ||
 746                    operatorStack[i].op == REOP_ASSERT_NOT ||
 747                    operatorStack[i].op == REOP_LPARENNON ||
 748                    operatorStack[i].op == REOP_LPAREN) {
 749                    break;
 750                }
 751            }
 752            ++state->cp;
 753
 754            /* Process everything on the stack until the open parenthesis. */
 755            for (;;) {
 756                JS_ASSERT(operatorSP);
 757                --operatorSP;
 758                switch (operatorStack[operatorSP].op) {
 759                  case REOP_ASSERT:
 760                  case REOP_ASSERT_NOT:
 761                  case REOP_LPAREN:
 762                    operand = NewRENode(state, operatorStack[operatorSP].op);
 763                    if (!operand)
 764                        goto out;
 765                    operand->u.parenIndex =
 766                        operatorStack[operatorSP].parenIndex;
 767                    JS_ASSERT(operandSP);
 768                    operand->kid = operandStack[operandSP - 1];
 769                    operandStack[operandSP - 1] = operand;
 770                    if (state->treeDepth == TREE_DEPTH_MAX) {
 771                        ReportRegExpError(state, JSREPORT_ERROR,
 772                                          JSMSG_REGEXP_TOO_COMPLEX);
 773                        goto out;
 774                    }
 775                    ++state->treeDepth;
 776                    /* FALL THROUGH */
 777
 778                  case REOP_LPARENNON:
 779                    state->result = operandStack[operandSP - 1];
 780                    if (!ParseQuantifier(state))
 781                        goto out;
 782                    operandStack[operandSP - 1] = state->result;
 783                    goto restartOperator;
 784                  default:
 785                    if (!ProcessOp(state, &operatorStack[operatorSP],
 786                                   operandStack, operandSP))
 787                        goto out;
 788                    --operandSP;
 789                    break;
 790                }
 791            }
 792            break;
 793
 794          case '{':
 795          {
 796            const jschar *errp = state->cp;
 797
 798            if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
 799                /*
 800                 * This didn't even scan correctly as a quantifier, so we should
 801                 * treat it as flat.
 802                 */
 803                op = REOP_CONCAT;
 804                goto pushOperator;
 805            }
 806
 807            state->cp = errp;
 808            /* FALL THROUGH */
 809          }
 810
 811          case '+':
 812          case '*':
 813          case '?':
 814            ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
 815                                    state->cp);
 816            result = JS_FALSE;
 817            goto out;
 818
 819          default:
 820            /* Anything else is the start of the next term. */
 821            op = REOP_CONCAT;
 822pushOperator:
 823            if (operatorSP == operatorStackSize) {
 824                REOpData *tmp;
 825                operatorStackSize += operatorStackSize;
 826                tmp = (REOpData *)
 827                    JS_realloc(state->context, operatorStack,
 828                               sizeof(REOpData) * operatorStackSize);
 829                if (!tmp)
 830                    goto out;
 831                operatorStack = tmp;
 832            }
 833            operatorStack[operatorSP].op = op;
 834            operatorStack[operatorSP].errPos = state->cp;
 835            operatorStack[operatorSP++].parenIndex = parenIndex;
 836            break;
 837        }
 838    }
 839out:
 840    if (operatorStack)
 841        JS_free(state->context, operatorStack);
 842    if (operandStack)
 843        JS_free(state->context, operandStack);
 844    return result;
 845}
 846
 847/*
 848 * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
 849 * its being on the stack, and to propagate errors to its callers.
 850 */
 851#define JSREG_FIND_PAREN_COUNT  0x8000
 852#define JSREG_FIND_PAREN_ERROR  0x4000
 853
 854/*
 855 * Magic return value from FindParenCount and GetDecimalValue, to indicate
 856 * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
 857 * its findMax parameter is non-null.
 858 */
 859#define OVERFLOW_VALUE          ((uintN)-1)
 860
 861static uintN
 862FindParenCount(CompilerState *state)
 863{
 864    CompilerState temp;
 865    int i;
 866
 867    if (state->flags & JSREG_FIND_PAREN_COUNT)
 868        return OVERFLOW_VALUE;
 869
 870    /*
 871     * Copy state into temp, flag it so we never report an invalid backref,
 872     * and reset its members to parse the entire regexp.  This is obviously
 873     * suboptimal, but GetDecimalValue calls us only if a backref appears to
 874     * refer to a forward parenthetical, which is rare.
 875     */
 876    temp = *state;
 877    temp.flags |= JSREG_FIND_PAREN_COUNT;
 878    temp.cp = temp.cpbegin;
 879    temp.parenCount = 0;
 880    temp.classCount = 0;
 881    temp.progLength = 0;
 882    temp.treeDepth = 0;
 883    temp.classBitmapsMem = 0;
 884    for (i = 0; i < CLASS_CACHE_SIZE; i++)
 885        temp.classCache[i].start = NULL;
 886
 887    if (!ParseRegExp(&temp)) {
 888        state->flags |= JSREG_FIND_PAREN_ERROR;
 889        return OVERFLOW_VALUE;
 890    }
 891    return temp.parenCount;
 892}
 893
 894/*
 895 * Extract and return a decimal value at state->cp.  The initial character c
 896 * has already been read.  Return OVERFLOW_VALUE if the result exceeds max.
 897 * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
 898 * state->flags to discover whether an error occurred under findMax.
 899 */
 900static uintN
 901GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
 902                CompilerState *state)
 903{
 904    uintN value = JS7_UNDEC(c);
 905    JSBool overflow = (value > max && (!findMax || value > findMax(state)));
 906
 907    /* The following restriction allows simpler overflow checks. */
 908    JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
 909    while (state->cp < state->cpend) {
 910        c = *state->cp;
 911        if (!JS7_ISDEC(c))
 912            break;
 913        value = 10 * value + JS7_UNDEC(c);
 914        if (!overflow && value > max && (!findMax || value > findMax(state)))
 915            overflow = JS_TRUE;
 916        ++state->cp;
 917    }
 918    return overflow ? OVERFLOW_VALUE : value;
 919}
 920
 921/*
 922 * Calculate the total size of the bitmap required for a class expression.
 923 */
 924static JSBool
 925CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
 926                    const jschar *end)
 927{
 928    uintN max = 0;
 929    JSBool inRange = JS_FALSE;
 930    jschar c, rangeStart = 0;
 931    uintN n, digit, nDigits, i;
 932
 933    target->u.ucclass.bmsize = 0;
 934    target->u.ucclass.sense = JS_TRUE;
 935
 936    if (src == end)
 937        return JS_TRUE;
 938
 939    if (*src == '^') {
 940        ++src;
 941        target->u.ucclass.sense = JS_FALSE;
 942    }
 943
 944    while (src != end) {
 945        JSBool canStartRange = JS_TRUE;
 946        uintN localMax = 0;
 947
 948        switch (*src) {
 949          case '\\':
 950            ++src;
 951            c = *src++;
 952            switch (c) {
 953              case 'b':
 954                localMax = 0x8;
 955                break;
 956              case 'f':
 957                localMax = 0xC;
 958                break;
 959              case 'n':
 960                localMax = 0xA;
 961                break;
 962              case 'r':
 963                localMax = 0xD;
 964                break;
 965              case 't':
 966                localMax = 0x9;
 967                break;
 968              case 'v':
 969                localMax = 0xB;
 970                break;
 971              case 'c':
 972                if (src < end && RE_IS_LETTER(*src)) {
 973                    localMax = (uintN) (*src++) & 0x1F;
 974                } else {
 975                    --src;
 976                    localMax = '\\';
 977                }
 978                break;
 979              case 'x':
 980                nDigits = 2;
 981                goto lexHex;
 982              case 'u':
 983                nDigits = 4;
 984lexHex:
 985                n = 0;
 986                for (i = 0; (i < nDigits) && (src < end); i++) {
 987                    c = *src++;
 988                    if (!isASCIIHexDigit(c, &digit)) {
 989                        /*
 990                         * Back off to accepting the original
 991                         *'\' as a literal.
 992                         */
 993                        src -= i + 1;
 994                        n = '\\';
 995                        break;
 996                    }
 997                    n = (n << 4) | digit;
 998                }
 999                localMax = n;
1000                break;
1001              case 'd':
1002                canStartRange = JS_FALSE;
1003                if (inRange) {
1004                    JS_ReportErrorNumber(state->context,
1005                                         js_GetErrorMessage, NULL,
1006                                         JSMSG_BAD_CLASS_RANGE);
1007                    return JS_FALSE;
1008                }
1009                localMax = '9';
1010                break;
1011              case 'D':
1012              case 's':
1013              case 'S':
1014              case 'w':
1015              case 'W':
1016                canStartRange = JS_FALSE;
1017                if (inRange) {
1018                    JS_ReportErrorNumber(state->context,
1019                                         js_GetErrorMessage, NULL,
1020                                         JSMSG_BAD_CLASS_RANGE);
1021                    return JS_FALSE;
1022                }
1023                max = 65535;
1024
1025                /*
1026                 * If this is the start of a range, ensure that it's less than
1027                 * the end.
1028                 */
1029                localMax = 0;
1030                break;
1031              case '0':
1032              case '1':
1033              case '2':
1034              case '3':
1035              case '4':
1036              case '5':
1037              case '6':
1038              case '7':
1039                /*
1040                 *  This is a non-ECMA extension - decimal escapes (in this
1041                 *  case, octal!) are supposed to be an error inside class
1042                 *  ranges, but supported here for backwards compatibility.
1043                 *
1044                 */
1045                n = JS7_UNDEC(c);
1046                c = *src;
1047                if ('0' <= c && c <= '7') {
1048                    src++;
1049                    n = 8 * n + JS7_UNDEC(c);
1050                    c = *src;
1051                    if ('0' <= c && c <= '7') {
1052                        src++;
1053                        i = 8 * n + JS7_UNDEC(c);
1054                        if (i <= 0377)
1055                            n = i;
1056                        else
1057                            src--;
1058                    }
1059                }
1060                localMax = n;
1061                break;
1062
1063              default:
1064                localMax = c;
1065                break;
1066            }
1067            break;
1068          default:
1069            localMax = *src++;
1070            break;
1071        }
1072
1073        if (inRange) {
1074            /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
1075            if (rangeStart > localMax) {
1076                JS_ReportErrorNumber(state->context,
1077                                     js_GetErrorMessage, NULL,
1078                                     JSMSG_BAD_CLASS_RANGE);
1079                return JS_FALSE;
1080            }
1081            inRange = JS_FALSE;
1082        } else {
1083            if (canStartRange && src < end - 1) {
1084                if (*src == '-') {
1085                    ++src;
1086                    inRange = JS_TRUE;
1087                    rangeStart = (jschar)localMax;
1088                    continue;
1089                }
1090            }
1091            if (state->flags & JSREG_FOLD)
1092                rangeStart = localMax;   /* one run of the uc/dc loop below */
1093        }
1094
1095        if (state->flags & JSREG_FOLD) {
1096            jschar maxch = localMax;
1097
1098            for (i = rangeStart; i <= localMax; i++) {
1099                jschar uch, dch;
1100
1101                uch = upcase(i);
1102                dch = downcase(i);
1103                maxch = JS_MAX(maxch, uch);
1104                maxch = JS_MAX(maxch, dch);
1105            }
1106            localMax = maxch;
1107        }
1108
1109        if (localMax > max)
1110            max = localMax;
1111    }
1112    target->u.ucclass.bmsize = max;
1113    return JS_TRUE;
1114}
1115
1116/*
1117 *  item:       assertion               An item is either an assertion or
1118 *              quantatom               a quantified atom.
1119 *
1120 *  assertion:  '^'                     Assertions match beginning of string
1121 *                                      (or line if the class static property
1122 *                                      RegExp.multiline is true).
1123 *              '$'                     End of string (or line if the class
1124 *                                      static property RegExp.multiline is
1125 *                                      true).
1126 *              '\b'                    Word boundary (between \w and \W).
1127 *              '\B'                    Word non-boundary.
1128 *
1129 *  quantatom:  atom                    An unquantified atom.
1130 *              quantatom '{' n ',' m '}'
1131 *                                      Atom must occur between n and m times.
1132 *              quantatom '{' n ',' '}' Atom must occur at least n times.
1133 *              quantatom '{' n '}'     Atom must occur exactly n times.
1134 *              quantatom '*'           Zero or more times (same as {0,}).
1135 *              quantatom '+'           One or more times (same as {1,}).
1136 *              quantatom '?'           Zero or one time (same as {0,1}).
1137 *
1138 *              any of which can be optionally followed by '?' for ungreedy
1139 *
1140 *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
1141 *                                      can be addressed using a backreference,
1142 *                                      see '\' n below).
1143 *              '.'                     Matches any char except '\n'.
1144 *              '[' classlist ']'       A character class.
1145 *              '[' '^' classlist ']'   A negated character class.
1146 *              '\f'                    Form Feed.
1147 *              '\n'                    Newline (Line Feed).
1148 *              '\r'                    Carriage Return.
1149 *              '\t'                    Horizontal Tab.
1150 *              '\v'                    Vertical Tab.
1151 *              '\d'                    A digit (same as [0-9]).
1152 *              '\D'                    A non-digit.
1153 *              '\w'                    A word character, [0-9a-z_A-Z].
1154 *              '\W'                    A non-word character.
1155 *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
1156 *              '\S'                    A non-whitespace character.
1157 *              '\' n                   A backreference to the nth (n decimal
1158 *                                      and positive) parenthesized expression.
1159 *              '\' octal               An octal escape sequence (octal must be
1160 *                                      two or three digits long, unless it is
1161 *                                      0 for the null character).
1162 *              '\x' hex                A hex escape (hex must be two digits).
1163 *              '\u' unicode            A unicode escape (must be four digits).
1164 *              '\c' ctrl               A control character, ctrl is a letter.
1165 *              '\' literalatomchar     Any character except one of the above
1166 *                                      that follow '\' in an atom.
1167 *              otheratomchar           Any character not first among the other
1168 *                                      atom right-hand sides.
1169 */
1170static JSBool
1171ParseTerm(CompilerState *state)
1172{
1173    jschar c = *state->cp++;
1174    uintN nDigits;
1175    uintN num, tmp, n, i;
1176    const jschar *termStart;
1177
1178    switch (c) {
1179    /* assertions and atoms */
1180      case '^':
1181        state->result = NewRENode(state, REOP_BOL);
1182        if (!state->result)
1183            return JS_FALSE;
1184        state->progLength++;
1185        return JS_TRUE;
1186      case '$':
1187        state->result = NewRENode(state, REOP_EOL);
1188        if (!state->result)
1189            return JS_FALSE;
1190        state->progLength++;
1191        return JS_TRUE;
1192      case '\\':
1193        if (state->cp >= state->cpend) {
1194            /* a trailing '\' is an error */
1195            ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
1196            return JS_FALSE;
1197        }
1198        c = *state->cp++;
1199        switch (c) {
1200        /* assertion escapes */
1201          case 'b' :
1202            state->result = NewRENode(state, REOP_WBDRY);
1203            if (!state->result)
1204                return JS_FALSE;
1205            state->progLength++;
1206            return JS_TRUE;
1207          case 'B':
1208            state->result = NewRENode(state, REOP_WNONBDRY);
1209            if (!state->result)
1210                return JS_FALSE;
1211            state->progLength++;
1212            return JS_TRUE;
1213          /* Decimal escape */
1214          case '0':
1215            /* Give a strict warning. See also the note below. */
1216            if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
1217                                   JSMSG_INVALID_BACKREF)) {
1218                return JS_FALSE;
1219            }
1220     doOctal:
1221            num = 0;
1222            while (state->cp < state->cpend) {
1223                c = *state->cp;
1224                if (c < '0' || '7' < c)
1225                    break;
1226                state->cp++;
1227                tmp = 8 * num + (uintN)JS7_UNDEC(c);
1228                if (tmp > 0377)
1229                    break;
1230                num = tmp;
1231            }
1232            c = (jschar)num;
1233    doFlat:
1234            state->result = NewRENode(state, REOP_FLAT);
1235            if (!state->result)
1236                return JS_FALSE;
1237            state->result->u.flat.chr = c;
1238            state->result->u.flat.length = 1;
1239            state->progLength += 3;
1240            break;
1241          case '1':
1242          case '2':
1243          case '3':
1244          case '4':
1245          case '5':
1246          case '6':
1247          case '7':
1248          case '8':
1249          case '9':
1250            termStart = state->cp - 1;
1251            num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
1252            if (state->flags & JSREG_FIND_PAREN_ERROR)
1253                return JS_FALSE;
1254            if (num == OVERFLOW_VALUE) {
1255                /* Give a strict mode warning. */
1256                if (!ReportRegExpError(state,
1257                                       JSREPORT_WARNING | JSREPORT_STRICT,
1258                                       (c >= '8')
1259                                       ? JSMSG_INVALID_BACKREF
1260                                       : JSMSG_BAD_BACKREF)) {
1261                    return JS_FALSE;
1262                }
1263
1264                /*
1265                 * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
1266                 * error here. However, for compatibility with IE, we treat the
1267                 * whole backref as flat if the first character in it is not a
1268                 * valid octal character, and as an octal escape otherwise.
1269                 */
1270                state->cp = termStart;
1271                if (c >= '8') {
1272                    /* Treat this as flat. termStart - 1 is the \. */
1273                    c = '\\';
1274                    goto asFlat;
1275                }
1276
1277                /* Treat this as an octal escape. */
1278                goto doOctal;
1279            }
1280            JS_ASSERT(1 <= num && num <= 0x10000);
1281            state->result = NewRENode(state, REOP_BACKREF);
1282            if (!state->result)
1283                return JS_FALSE;
1284            state->result->u.parenIndex = num - 1;
1285            state->progLength
1286                += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
1287            break;
1288          /* Control escape */
1289          case 'f':
1290            c = 0xC;
1291            goto doFlat;
1292          case 'n':
1293            c = 0xA;
1294            goto doFlat;
1295          case 'r':
1296            c = 0xD;
1297            goto doFlat;
1298          case 't':
1299            c = 0x9;
1300            goto doFlat;
1301          case 'v':
1302            c = 0xB;
1303            goto doFlat;
1304          /* Control letter */
1305          case 'c':
1306            if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
1307                c = (jschar) (*state->cp++ & 0x1F);
1308            } else {
1309                /* back off to accepting the original '\' as a literal */
1310                --state->cp;
1311                c = '\\';
1312            }
1313            goto doFlat;
1314          /* HexEscapeSequence */
1315          case 'x':
1316            nDigits = 2;
1317            goto lexHex;
1318          /* UnicodeEscapeSequence */
1319          case 'u':
1320            nDigits = 4;
1321lexHex:
1322            n = 0;
1323            for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
1324                uintN digit;
1325                c = *state->cp++;
1326                if (!isASCIIHexDigit(c, &digit)) {
1327                    /*
1328                     * Back off to accepting the original 'u' or 'x' as a
1329                     * literal.
1330                     */
1331                    state->cp -= i + 2;
1332                    n = *state->cp++;
1333                    break;
1334                }
1335                n = (n << 4) | digit;
1336            }
1337            c = (jschar) n;
1338            goto doFlat;
1339          /* Character class escapes */
1340          case 'd':
1341            state->result = NewRENode(state, REOP_DIGIT);
1342doSimple:
1343            if (!state->result)
1344                return JS_FALSE;
1345            state->progLength++;
1346            break;
1347          case 'D':
1348            state->result = NewRENode(state, REOP_NONDIGIT);
1349            goto doSimple;
1350          case 's':
1351            state->result = NewRENode(state, REOP_SPACE);
1352            goto doSimple;
1353          case 'S':
1354            state->result = NewRENode(state, REOP_NONSPACE);
1355            goto doSimple;
1356          case 'w':
1357            state->result = NewRENode(state, REOP_ALNUM);
1358            goto doSimple;
1359          case 'W':
1360            state->result = NewRENode(state, REOP_NONALNUM);
1361            goto doSimple;
1362          /* IdentityEscape */
1363          default:
1364            state->result = NewRENode(state, REOP_FLAT);
1365            if (!state->result)
1366                return JS_FALSE;
1367            state->result->u.flat.chr = c;
1368            state->result->u.flat.length = 1;
1369            state->result->kid = (void *) (state->cp - 1);
1370            state->progLength += 3;
1371            break;
1372        }
1373        break;
1374      case '[':
1375        state->result = NewRENode(state, REOP_CLASS);
1376        if (!state->result)
1377            return JS_FALSE;
1378        termStart = state->cp;
1379        state->result->u.ucclass.startIndex = termStart - state->cpbegin;
1380        for (;;) {
1381            if (state->cp == state->cpend) {
1382                ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1383                                        JSMSG_UNTERM_CLASS, termStart);
1384
1385                return JS_FALSE;
1386            }
1387            if (*state->cp == '\\') {
1388                state->cp++;
1389                if (state->cp != state->cpend)
1390                    state->cp++;
1391                continue;
1392            }
1393            if (*state->cp == ']') {
1394                state->result->u.ucclass.kidlen = state->cp - termStart;
1395                break;
1396            }
1397            state->cp++;
1398        }
1399        for (i = 0; i < CLASS_CACHE_SIZE; i++) {
1400            if (!state->classCache[i].start) {
1401                state->classCache[i].start = termStart;
1402                state->classCache[i].length = state->result->u.ucclass.kidlen;
1403                state->classCache[i].index = state->classCount;
1404                break;
1405            }
1406            if (state->classCache[i].length ==
1407                state->result->u.ucclass.kidlen) {
1408                for (n = 0; ; n++) {
1409                    if (n == state->classCache[i].length) {
1410                        state->result->u.ucclass.index
1411                            = state->classCache[i].index;
1412                        goto claim;
1413                    }
1414                    if (state->classCache[i].start[n] != termStart[n])
1415                        break;
1416                }
1417            }
1418        }
1419        state->result->u.ucclass.index = state->classCount++;
1420
1421    claim:
1422        /*
1423         * Call CalculateBitmapSize now as we want any errors it finds
1424         * to be reported during the parse phase, not at execution.
1425         */
1426        if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
1427            return JS_FALSE;
1428        /*
1429         * Update classBitmapsMem with number of bytes to hold bmsize bits,
1430         * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
1431         * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
1432         */
1433        n = (state->result->u.ucclass.bmsize >> 3) + 1;
1434        if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
1435            ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
1436            return JS_FALSE;
1437        }
1438        state->classBitmapsMem += n;
1439        /* CLASS, <index> */
1440        state->progLength
1441            += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
1442        break;
1443
1444      case '.':
1445        state->result = NewRENode(state, REOP_DOT);
1446        goto doSimple;
1447
1448      case '{':
1449      {
1450        const jschar *errp = state->cp--;
1451        intN err;
1452
1453        err = ParseMinMaxQuantifier(state, JS_TRUE);
1454        state->cp = errp;
1455
1456        if (err < 0)
1457            goto asFlat;
1458
1459        /* FALL THROUGH */
1460      }
1461      case '*':
1462      case '+':
1463      case '?':
1464        ReportRegExpErrorHelper(state, JSREPORT_ERROR,
1465                                JSMSG_BAD_QUANTIFIER, state->cp - 1);
1466        return JS_FALSE;
1467      default:
1468asFlat:
1469        state->result = NewRENode(state, REOP_FLAT);
1470        if (!state->result)
1471            return JS_FALSE;
1472        state->result->u.flat.chr = c;
1473        state->result->u.flat.length = 1;
1474        state->result->kid = (void *) (state->cp - 1);
1475        state->progLength += 3;
1476        break;
1477    }
1478    return ParseQuantifier(state);
1479}
1480
1481static JSBool
1482ParseQuantifier(CompilerState *s

Large files files are truncated, but you can click here to view the full file