PageRenderTime 334ms CodeModel.GetById 42ms RepoModel.GetById 0ms app.codeStats 1ms

/js/lib/Socket.IO-node/support/expresso/deps/jscoverage/js/jsregexp.cpp

http://github.com/onedayitwillmake/RealtimeMultiplayerNodeJs
C++ | 1906 lines | 1433 code | 148 blank | 325 comment | 293 complexity | 3040154f5eb7ab91c45eedc02b82c675 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, MPL-2.0-no-copyleft-exception, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  2. * vim: set sw=4 ts=8 et tw=78:
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is Mozilla Communicator client code, released
  18. * March 31, 1998.
  19. *
  20. * The Initial Developer of the Original Code is
  21. * Netscape Communications Corporation.
  22. * Portions created by the Initial Developer are Copyright (C) 1998
  23. * the Initial Developer. All Rights Reserved.
  24. *
  25. * Contributor(s):
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the MPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the MPL, the GPL or the LGPL.
  38. *
  39. * ***** END LICENSE BLOCK ***** */
  40. /*
  41. * JS regular expressions, after Perl.
  42. */
  43. #include "jsstddef.h"
  44. #include <stdlib.h>
  45. #include <string.h>
  46. #include <stdarg.h>
  47. #include "jstypes.h"
  48. #include "jsarena.h" /* Added by JSIFY */
  49. #include "jsutil.h" /* Added by JSIFY */
  50. #include "jsapi.h"
  51. #include "jsarray.h"
  52. #include "jsatom.h"
  53. #include "jsbuiltins.h"
  54. #include "jscntxt.h"
  55. #include "jsversion.h"
  56. #include "jsfun.h"
  57. #include "jsgc.h"
  58. #include "jsinterp.h"
  59. #include "jslock.h"
  60. #include "jsnum.h"
  61. #include "jsobj.h"
  62. #include "jsopcode.h"
  63. #include "jsregexp.h"
  64. #include "jsscan.h"
  65. #include "jsscope.h"
  66. #include "jsstr.h"
  67. #ifdef JS_TRACER
  68. #include "jstracer.h"
  69. using namespace avmplus;
  70. using namespace nanojit;
  71. /*
  72. * FIXME Duplicated with jstracer.cpp, doing it this way for now
  73. * to keep it private to files that need it.
  74. */
  75. #ifdef JS_JIT_SPEW
  76. static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose");
  77. #define debug_only_v(x) if (verbose_debug) { x; }
  78. #else
  79. #define debug_only_v(x)
  80. #endif
  81. #endif
  82. typedef enum REOp {
  83. #define REOP_DEF(opcode, name) opcode,
  84. #include "jsreops.tbl"
  85. #undef REOP_DEF
  86. REOP_LIMIT /* META: no operator >= to this */
  87. } REOp;
  88. #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
  89. #ifdef REGEXP_DEBUG
  90. const char *reop_names[] = {
  91. #define REOP_DEF(opcode, name) name,
  92. #include "jsreops.tbl"
  93. #undef REOP_DEF
  94. NULL
  95. };
  96. #endif
  97. #ifdef __GNUC__
  98. static int
  99. re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
  100. #endif
  101. #ifdef REGEXP_DEBUG
  102. static int
  103. re_debug(const char *fmt, ...)
  104. {
  105. va_list ap;
  106. int retval;
  107. va_start(ap, fmt);
  108. retval = vprintf(fmt, ap);
  109. va_end(ap);
  110. return retval;
  111. }
  112. static void
  113. re_debug_chars(const jschar *chrs, size_t length)
  114. {
  115. int i = 0;
  116. printf(" \"");
  117. while (*chrs && i++ < length) {
  118. putchar((char)*chrs++);
  119. }
  120. printf("\"");
  121. }
  122. #else /* !REGEXP_DEBUG */
  123. /* This should be optimized to a no-op by our tier-1 compilers. */
  124. static int
  125. re_debug(const char *fmt, ...)
  126. {
  127. return 0;
  128. }
  129. static void
  130. re_debug_chars(const jschar *chrs, size_t length)
  131. {
  132. }
  133. #endif /* !REGEXP_DEBUG */
  134. struct RENode {
  135. REOp op; /* r.e. op bytecode */
  136. RENode *next; /* next in concatenation order */
  137. void *kid; /* first operand */
  138. union {
  139. void *kid2; /* second operand */
  140. jsint num; /* could be a number */
  141. size_t parenIndex; /* or a parenthesis index */
  142. struct { /* or a quantifier range */
  143. uintN min;
  144. uintN max;
  145. JSPackedBool greedy;
  146. } range;
  147. struct { /* or a character class */
  148. size_t startIndex;
  149. size_t kidlen; /* length of string at kid, in jschars */
  150. size_t index; /* index into class list */
  151. uint16 bmsize; /* bitmap size, based on max char code */
  152. JSPackedBool sense;
  153. } ucclass;
  154. struct { /* or a literal sequence */
  155. jschar chr; /* of one character */
  156. size_t length; /* or many (via the kid) */
  157. } flat;
  158. struct {
  159. RENode *kid2; /* second operand from ALT */
  160. jschar ch1; /* match char for ALTPREREQ */
  161. jschar ch2; /* ditto, or class index for ALTPREREQ2 */
  162. } altprereq;
  163. } u;
  164. };
  165. #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
  166. ((c >= 'a') && (c <= 'z')) )
  167. #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
  168. (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
  169. #define CLASS_CACHE_SIZE 4
  170. typedef struct CompilerState {
  171. JSContext *context;
  172. JSTokenStream *tokenStream; /* For reporting errors */
  173. const jschar *cpbegin;
  174. const jschar *cpend;
  175. const jschar *cp;
  176. size_t parenCount;
  177. size_t classCount; /* number of [] encountered */
  178. size_t treeDepth; /* maximum depth of parse tree */
  179. size_t progLength; /* estimated bytecode length */
  180. RENode *result;
  181. size_t classBitmapsMem; /* memory to hold all class bitmaps */
  182. struct {
  183. const jschar *start; /* small cache of class strings */
  184. size_t length; /* since they're often the same */
  185. size_t index;
  186. } classCache[CLASS_CACHE_SIZE];
  187. uint16 flags;
  188. } CompilerState;
  189. typedef struct EmitStateStackEntry {
  190. jsbytecode *altHead; /* start of REOP_ALT* opcode */
  191. jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
  192. jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
  193. jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
  194. RENode *continueNode; /* original REOP_ALT* node being stacked */
  195. jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
  196. JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
  197. avoid 16-bit unsigned offset overflow */
  198. } EmitStateStackEntry;
  199. /*
  200. * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
  201. * the getters and setters take the pc of the offset, not of the opcode before
  202. * the offset.
  203. */
  204. #define ARG_LEN 2
  205. #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
  206. #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
  207. (pc)[1] = (jsbytecode) (arg))
  208. #define OFFSET_LEN ARG_LEN
  209. #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
  210. #define GET_OFFSET(pc) GET_ARG(pc)
  211. /*
  212. * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
  213. * For sanity, we limit it to 2^24 bytes.
  214. */
  215. #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
  216. /*
  217. * The maximum memory that can be allocated for class bitmaps.
  218. * For sanity, we limit it to 2^24 bytes.
  219. */
  220. #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
  221. /*
  222. * Functions to get size and write/read bytecode that represent small indexes
  223. * compactly.
  224. * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
  225. * indicates that the following byte brings more bits to the index. Otherwise
  226. * this is the last byte in the index bytecode representing highest index bits.
  227. */
  228. static size_t
  229. GetCompactIndexWidth(size_t index)
  230. {
  231. size_t width;
  232. for (width = 1; (index >>= 7) != 0; ++width) { }
  233. return width;
  234. }
  235. static JS_ALWAYS_INLINE jsbytecode *
  236. WriteCompactIndex(jsbytecode *pc, size_t index)
  237. {
  238. size_t next;
  239. while ((next = index >> 7) != 0) {
  240. *pc++ = (jsbytecode)(index | 0x80);
  241. index = next;
  242. }
  243. *pc++ = (jsbytecode)index;
  244. return pc;
  245. }
  246. static JS_ALWAYS_INLINE jsbytecode *
  247. ReadCompactIndex(jsbytecode *pc, size_t *result)
  248. {
  249. size_t nextByte;
  250. nextByte = *pc++;
  251. if ((nextByte & 0x80) == 0) {
  252. /*
  253. * Short-circuit the most common case when compact index <= 127.
  254. */
  255. *result = nextByte;
  256. } else {
  257. size_t shift = 7;
  258. *result = 0x7F & nextByte;
  259. do {
  260. nextByte = *pc++;
  261. *result |= (nextByte & 0x7F) << shift;
  262. shift += 7;
  263. } while ((nextByte & 0x80) != 0);
  264. }
  265. return pc;
  266. }
  267. typedef struct RECapture {
  268. ptrdiff_t index; /* start of contents, -1 for empty */
  269. size_t length; /* length of capture */
  270. } RECapture;
  271. typedef struct REMatchState {
  272. const jschar *cp;
  273. RECapture parens[1]; /* first of 're->parenCount' captures,
  274. allocated at end of this struct */
  275. } REMatchState;
  276. struct REBackTrackData;
  277. typedef struct REProgState {
  278. jsbytecode *continue_pc; /* current continuation data */
  279. jsbytecode continue_op;
  280. ptrdiff_t index; /* progress in text */
  281. size_t parenSoFar; /* highest indexed paren started */
  282. union {
  283. struct {
  284. uintN min; /* current quantifier limits */
  285. uintN max;
  286. } quantifier;
  287. struct {
  288. size_t top; /* backtrack stack state */
  289. size_t sz;
  290. } assertion;
  291. } u;
  292. } REProgState;
  293. typedef struct REBackTrackData {
  294. size_t sz; /* size of previous stack entry */
  295. jsbytecode *backtrack_pc; /* where to backtrack to */
  296. jsbytecode backtrack_op;
  297. const jschar *cp; /* index in text of match at backtrack */
  298. size_t parenIndex; /* start index of saved paren contents */
  299. size_t parenCount; /* # of saved paren contents */
  300. size_t saveStateStackTop; /* number of parent states */
  301. /* saved parent states follow */
  302. /* saved paren contents follow */
  303. } REBackTrackData;
  304. #define INITIAL_STATESTACK 100
  305. #define INITIAL_BACKTRACK 8000
  306. typedef struct REGlobalData {
  307. JSContext *cx;
  308. JSRegExp *regexp; /* the RE in execution */
  309. JSBool ok; /* runtime error (out_of_memory only?) */
  310. size_t start; /* offset to start at */
  311. ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
  312. const jschar *cpbegin; /* text base address */
  313. const jschar *cpend; /* text limit address */
  314. REProgState *stateStack; /* stack of state of current parents */
  315. size_t stateStackTop;
  316. size_t stateStackLimit;
  317. REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
  318. REBackTrackData *backTrackSP;
  319. size_t backTrackStackSize;
  320. size_t cursz; /* size of current stack entry */
  321. size_t backTrackCount; /* how many times we've backtracked */
  322. size_t backTrackLimit; /* upper limit on backtrack states */
  323. } REGlobalData;
  324. /*
  325. * 1. If IgnoreCase is false, return ch.
  326. * 2. Let u be ch converted to upper case as if by calling
  327. * String.prototype.toUpperCase on the one-character string ch.
  328. * 3. If u does not consist of a single character, return ch.
  329. * 4. Let cu be u's character.
  330. * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
  331. * code point value is less than decimal 128, then return ch.
  332. * 6. Return cu.
  333. */
  334. static JS_ALWAYS_INLINE uintN
  335. upcase(uintN ch)
  336. {
  337. uintN cu;
  338. JS_ASSERT((uintN) (jschar) ch == ch);
  339. if (ch < 128) {
  340. if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
  341. ch -= (uintN) ('a' - 'A');
  342. return ch;
  343. }
  344. cu = JS_TOUPPER(ch);
  345. return (cu < 128) ? ch : cu;
  346. }
  347. static JS_ALWAYS_INLINE uintN
  348. downcase(uintN ch)
  349. {
  350. JS_ASSERT((uintN) (jschar) ch == ch);
  351. if (ch < 128) {
  352. if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
  353. ch += (uintN) ('a' - 'A');
  354. return ch;
  355. }
  356. return JS_TOLOWER(ch);
  357. }
  358. /* Construct and initialize an RENode, returning NULL for out-of-memory */
  359. static RENode *
  360. NewRENode(CompilerState *state, REOp op)
  361. {
  362. JSContext *cx;
  363. RENode *ren;
  364. cx = state->context;
  365. JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
  366. if (!ren) {
  367. js_ReportOutOfScriptQuota(cx);
  368. return NULL;
  369. }
  370. ren->op = op;
  371. ren->next = NULL;
  372. ren->kid = NULL;
  373. return ren;
  374. }
  375. /*
  376. * Validates and converts hex ascii value.
  377. */
  378. static JSBool
  379. isASCIIHexDigit(jschar c, uintN *digit)
  380. {
  381. uintN cv = c;
  382. if (cv < '0')
  383. return JS_FALSE;
  384. if (cv <= '9') {
  385. *digit = cv - '0';
  386. return JS_TRUE;
  387. }
  388. cv |= 0x20;
  389. if (cv >= 'a' && cv <= 'f') {
  390. *digit = cv - 'a' + 10;
  391. return JS_TRUE;
  392. }
  393. return JS_FALSE;
  394. }
  395. typedef struct {
  396. REOp op;
  397. const jschar *errPos;
  398. size_t parenIndex;
  399. } REOpData;
  400. static JSBool
  401. ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
  402. const jschar *arg)
  403. {
  404. if (state->tokenStream) {
  405. return js_ReportCompileErrorNumber(state->context, state->tokenStream,
  406. NULL, JSREPORT_UC | flags,
  407. errorNumber, arg);
  408. }
  409. return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
  410. js_GetErrorMessage, NULL,
  411. errorNumber, arg);
  412. }
  413. static JSBool
  414. ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
  415. {
  416. return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
  417. }
  418. /*
  419. * Process the op against the two top operands, reducing them to a single
  420. * operand in the penultimate slot. Update progLength and treeDepth.
  421. */
  422. static JSBool
  423. ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
  424. intN operandSP)
  425. {
  426. RENode *result;
  427. switch (opData->op) {
  428. case REOP_ALT:
  429. result = NewRENode(state, REOP_ALT);
  430. if (!result)
  431. return JS_FALSE;
  432. result->kid = operandStack[operandSP - 2];
  433. result->u.kid2 = operandStack[operandSP - 1];
  434. operandStack[operandSP - 2] = result;
  435. if (state->treeDepth == TREE_DEPTH_MAX) {
  436. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
  437. return JS_FALSE;
  438. }
  439. ++state->treeDepth;
  440. /*
  441. * Look at both alternates to see if there's a FLAT or a CLASS at
  442. * the start of each. If so, use a prerequisite match.
  443. */
  444. if (((RENode *) result->kid)->op == REOP_FLAT &&
  445. ((RENode *) result->u.kid2)->op == REOP_FLAT &&
  446. (state->flags & JSREG_FOLD) == 0) {
  447. result->op = REOP_ALTPREREQ;
  448. result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
  449. result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
  450. /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
  451. JUMP, <end> ... ENDALT */
  452. state->progLength += 13;
  453. }
  454. else
  455. if (((RENode *) result->kid)->op == REOP_CLASS &&
  456. ((RENode *) result->kid)->u.ucclass.index < 256 &&
  457. ((RENode *) result->u.kid2)->op == REOP_FLAT &&
  458. (state->flags & JSREG_FOLD) == 0) {
  459. result->op = REOP_ALTPREREQ2;
  460. result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
  461. result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
  462. /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
  463. JUMP, <end> ... ENDALT */
  464. state->progLength += 13;
  465. }
  466. else
  467. if (((RENode *) result->kid)->op == REOP_FLAT &&
  468. ((RENode *) result->u.kid2)->op == REOP_CLASS &&
  469. ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
  470. (state->flags & JSREG_FOLD) == 0) {
  471. result->op = REOP_ALTPREREQ2;
  472. result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
  473. result->u.altprereq.ch2 =
  474. ((RENode *) result->u.kid2)->u.ucclass.index;
  475. /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
  476. JUMP, <end> ... ENDALT */
  477. state->progLength += 13;
  478. }
  479. else {
  480. /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
  481. state->progLength += 7;
  482. }
  483. break;
  484. case REOP_CONCAT:
  485. result = operandStack[operandSP - 2];
  486. while (result->next)
  487. result = result->next;
  488. result->next = operandStack[operandSP - 1];
  489. break;
  490. case REOP_ASSERT:
  491. case REOP_ASSERT_NOT:
  492. case REOP_LPARENNON:
  493. case REOP_LPAREN:
  494. /* These should have been processed by a close paren. */
  495. ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
  496. opData->errPos);
  497. return JS_FALSE;
  498. default:;
  499. }
  500. return JS_TRUE;
  501. }
  502. /*
  503. * Parser forward declarations.
  504. */
  505. static JSBool ParseTerm(CompilerState *state);
  506. static JSBool ParseQuantifier(CompilerState *state);
  507. static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
  508. /*
  509. * Top-down regular expression grammar, based closely on Perl4.
  510. *
  511. * regexp: altern A regular expression is one or more
  512. * altern '|' regexp alternatives separated by vertical bar.
  513. */
  514. #define INITIAL_STACK_SIZE 128
  515. static JSBool
  516. ParseRegExp(CompilerState *state)
  517. {
  518. size_t parenIndex;
  519. RENode *operand;
  520. REOpData *operatorStack;
  521. RENode **operandStack;
  522. REOp op;
  523. intN i;
  524. JSBool result = JS_FALSE;
  525. intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
  526. intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
  527. /* Watch out for empty regexp */
  528. if (state->cp == state->cpend) {
  529. state->result = NewRENode(state, REOP_EMPTY);
  530. return (state->result != NULL);
  531. }
  532. operatorStack = (REOpData *)
  533. JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
  534. if (!operatorStack)
  535. return JS_FALSE;
  536. operandStack = (RENode **)
  537. JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
  538. if (!operandStack)
  539. goto out;
  540. for (;;) {
  541. parenIndex = state->parenCount;
  542. if (state->cp == state->cpend) {
  543. /*
  544. * If we are at the end of the regexp and we're short one or more
  545. * operands, the regexp must have the form /x|/ or some such, with
  546. * left parentheses making us short more than one operand.
  547. */
  548. if (operatorSP >= operandSP) {
  549. operand = NewRENode(state, REOP_EMPTY);
  550. if (!operand)
  551. goto out;
  552. goto pushOperand;
  553. }
  554. } else {
  555. switch (*state->cp) {
  556. case '(':
  557. ++state->cp;
  558. if (state->cp + 1 < state->cpend &&
  559. *state->cp == '?' &&
  560. (state->cp[1] == '=' ||
  561. state->cp[1] == '!' ||
  562. state->cp[1] == ':')) {
  563. switch (state->cp[1]) {
  564. case '=':
  565. op = REOP_ASSERT;
  566. /* ASSERT, <next>, ... ASSERTTEST */
  567. state->progLength += 4;
  568. break;
  569. case '!':
  570. op = REOP_ASSERT_NOT;
  571. /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
  572. state->progLength += 4;
  573. break;
  574. default:
  575. op = REOP_LPARENNON;
  576. break;
  577. }
  578. state->cp += 2;
  579. } else {
  580. op = REOP_LPAREN;
  581. /* LPAREN, <index>, ... RPAREN, <index> */
  582. state->progLength
  583. += 2 * (1 + GetCompactIndexWidth(parenIndex));
  584. state->parenCount++;
  585. if (state->parenCount == 65535) {
  586. ReportRegExpError(state, JSREPORT_ERROR,
  587. JSMSG_TOO_MANY_PARENS);
  588. goto out;
  589. }
  590. }
  591. goto pushOperator;
  592. case ')':
  593. /*
  594. * If there's no stacked open parenthesis, throw syntax error.
  595. */
  596. for (i = operatorSP - 1; ; i--) {
  597. if (i < 0) {
  598. ReportRegExpError(state, JSREPORT_ERROR,
  599. JSMSG_UNMATCHED_RIGHT_PAREN);
  600. goto out;
  601. }
  602. if (operatorStack[i].op == REOP_ASSERT ||
  603. operatorStack[i].op == REOP_ASSERT_NOT ||
  604. operatorStack[i].op == REOP_LPARENNON ||
  605. operatorStack[i].op == REOP_LPAREN) {
  606. break;
  607. }
  608. }
  609. /* FALL THROUGH */
  610. case '|':
  611. /* Expected an operand before these, so make an empty one */
  612. operand = NewRENode(state, REOP_EMPTY);
  613. if (!operand)
  614. goto out;
  615. goto pushOperand;
  616. default:
  617. if (!ParseTerm(state))
  618. goto out;
  619. operand = state->result;
  620. pushOperand:
  621. if (operandSP == operandStackSize) {
  622. RENode **tmp;
  623. operandStackSize += operandStackSize;
  624. tmp = (RENode **)
  625. JS_realloc(state->context, operandStack,
  626. sizeof(RENode *) * operandStackSize);
  627. if (!tmp)
  628. goto out;
  629. operandStack = tmp;
  630. }
  631. operandStack[operandSP++] = operand;
  632. break;
  633. }
  634. }
  635. /* At the end; process remaining operators. */
  636. restartOperator:
  637. if (state->cp == state->cpend) {
  638. while (operatorSP) {
  639. --operatorSP;
  640. if (!ProcessOp(state, &operatorStack[operatorSP],
  641. operandStack, operandSP))
  642. goto out;
  643. --operandSP;
  644. }
  645. JS_ASSERT(operandSP == 1);
  646. state->result = operandStack[0];
  647. result = JS_TRUE;
  648. goto out;
  649. }
  650. switch (*state->cp) {
  651. case '|':
  652. /* Process any stacked 'concat' operators */
  653. ++state->cp;
  654. while (operatorSP &&
  655. operatorStack[operatorSP - 1].op == REOP_CONCAT) {
  656. --operatorSP;
  657. if (!ProcessOp(state, &operatorStack[operatorSP],
  658. operandStack, operandSP)) {
  659. goto out;
  660. }
  661. --operandSP;
  662. }
  663. op = REOP_ALT;
  664. goto pushOperator;
  665. case ')':
  666. /*
  667. * If there's no stacked open parenthesis, throw syntax error.
  668. */
  669. for (i = operatorSP - 1; ; i--) {
  670. if (i < 0) {
  671. ReportRegExpError(state, JSREPORT_ERROR,
  672. JSMSG_UNMATCHED_RIGHT_PAREN);
  673. goto out;
  674. }
  675. if (operatorStack[i].op == REOP_ASSERT ||
  676. operatorStack[i].op == REOP_ASSERT_NOT ||
  677. operatorStack[i].op == REOP_LPARENNON ||
  678. operatorStack[i].op == REOP_LPAREN) {
  679. break;
  680. }
  681. }
  682. ++state->cp;
  683. /* Process everything on the stack until the open parenthesis. */
  684. for (;;) {
  685. JS_ASSERT(operatorSP);
  686. --operatorSP;
  687. switch (operatorStack[operatorSP].op) {
  688. case REOP_ASSERT:
  689. case REOP_ASSERT_NOT:
  690. case REOP_LPAREN:
  691. operand = NewRENode(state, operatorStack[operatorSP].op);
  692. if (!operand)
  693. goto out;
  694. operand->u.parenIndex =
  695. operatorStack[operatorSP].parenIndex;
  696. JS_ASSERT(operandSP);
  697. operand->kid = operandStack[operandSP - 1];
  698. operandStack[operandSP - 1] = operand;
  699. if (state->treeDepth == TREE_DEPTH_MAX) {
  700. ReportRegExpError(state, JSREPORT_ERROR,
  701. JSMSG_REGEXP_TOO_COMPLEX);
  702. goto out;
  703. }
  704. ++state->treeDepth;
  705. /* FALL THROUGH */
  706. case REOP_LPARENNON:
  707. state->result = operandStack[operandSP - 1];
  708. if (!ParseQuantifier(state))
  709. goto out;
  710. operandStack[operandSP - 1] = state->result;
  711. goto restartOperator;
  712. default:
  713. if (!ProcessOp(state, &operatorStack[operatorSP],
  714. operandStack, operandSP))
  715. goto out;
  716. --operandSP;
  717. break;
  718. }
  719. }
  720. break;
  721. case '{':
  722. {
  723. const jschar *errp = state->cp;
  724. if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
  725. /*
  726. * This didn't even scan correctly as a quantifier, so we should
  727. * treat it as flat.
  728. */
  729. op = REOP_CONCAT;
  730. goto pushOperator;
  731. }
  732. state->cp = errp;
  733. /* FALL THROUGH */
  734. }
  735. case '+':
  736. case '*':
  737. case '?':
  738. ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
  739. state->cp);
  740. result = JS_FALSE;
  741. goto out;
  742. default:
  743. /* Anything else is the start of the next term. */
  744. op = REOP_CONCAT;
  745. pushOperator:
  746. if (operatorSP == operatorStackSize) {
  747. REOpData *tmp;
  748. operatorStackSize += operatorStackSize;
  749. tmp = (REOpData *)
  750. JS_realloc(state->context, operatorStack,
  751. sizeof(REOpData) * operatorStackSize);
  752. if (!tmp)
  753. goto out;
  754. operatorStack = tmp;
  755. }
  756. operatorStack[operatorSP].op = op;
  757. operatorStack[operatorSP].errPos = state->cp;
  758. operatorStack[operatorSP++].parenIndex = parenIndex;
  759. break;
  760. }
  761. }
  762. out:
  763. if (operatorStack)
  764. JS_free(state->context, operatorStack);
  765. if (operandStack)
  766. JS_free(state->context, operandStack);
  767. return result;
  768. }
  769. /*
  770. * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
  771. * its being on the stack, and to propagate errors to its callers.
  772. */
  773. #define JSREG_FIND_PAREN_COUNT 0x8000
  774. #define JSREG_FIND_PAREN_ERROR 0x4000
  775. /*
  776. * Magic return value from FindParenCount and GetDecimalValue, to indicate
  777. * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
  778. * its findMax parameter is non-null.
  779. */
  780. #define OVERFLOW_VALUE ((uintN)-1)
  781. static uintN
  782. FindParenCount(CompilerState *state)
  783. {
  784. CompilerState temp;
  785. int i;
  786. if (state->flags & JSREG_FIND_PAREN_COUNT)
  787. return OVERFLOW_VALUE;
  788. /*
  789. * Copy state into temp, flag it so we never report an invalid backref,
  790. * and reset its members to parse the entire regexp. This is obviously
  791. * suboptimal, but GetDecimalValue calls us only if a backref appears to
  792. * refer to a forward parenthetical, which is rare.
  793. */
  794. temp = *state;
  795. temp.flags |= JSREG_FIND_PAREN_COUNT;
  796. temp.cp = temp.cpbegin;
  797. temp.parenCount = 0;
  798. temp.classCount = 0;
  799. temp.progLength = 0;
  800. temp.treeDepth = 0;
  801. temp.classBitmapsMem = 0;
  802. for (i = 0; i < CLASS_CACHE_SIZE; i++)
  803. temp.classCache[i].start = NULL;
  804. if (!ParseRegExp(&temp)) {
  805. state->flags |= JSREG_FIND_PAREN_ERROR;
  806. return OVERFLOW_VALUE;
  807. }
  808. return temp.parenCount;
  809. }
  810. /*
  811. * Extract and return a decimal value at state->cp. The initial character c
  812. * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
  813. * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
  814. * state->flags to discover whether an error occurred under findMax.
  815. */
  816. static uintN
  817. GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
  818. CompilerState *state)
  819. {
  820. uintN value = JS7_UNDEC(c);
  821. JSBool overflow = (value > max && (!findMax || value > findMax(state)));
  822. /* The following restriction allows simpler overflow checks. */
  823. JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
  824. while (state->cp < state->cpend) {
  825. c = *state->cp;
  826. if (!JS7_ISDEC(c))
  827. break;
  828. value = 10 * value + JS7_UNDEC(c);
  829. if (!overflow && value > max && (!findMax || value > findMax(state)))
  830. overflow = JS_TRUE;
  831. ++state->cp;
  832. }
  833. return overflow ? OVERFLOW_VALUE : value;
  834. }
  835. /*
  836. * Calculate the total size of the bitmap required for a class expression.
  837. */
  838. static JSBool
  839. CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
  840. const jschar *end)
  841. {
  842. uintN max = 0;
  843. JSBool inRange = JS_FALSE;
  844. jschar c, rangeStart = 0;
  845. uintN n, digit, nDigits, i;
  846. target->u.ucclass.bmsize = 0;
  847. target->u.ucclass.sense = JS_TRUE;
  848. if (src == end)
  849. return JS_TRUE;
  850. if (*src == '^') {
  851. ++src;
  852. target->u.ucclass.sense = JS_FALSE;
  853. }
  854. while (src != end) {
  855. JSBool canStartRange = JS_TRUE;
  856. uintN localMax = 0;
  857. switch (*src) {
  858. case '\\':
  859. ++src;
  860. c = *src++;
  861. switch (c) {
  862. case 'b':
  863. localMax = 0x8;
  864. break;
  865. case 'f':
  866. localMax = 0xC;
  867. break;
  868. case 'n':
  869. localMax = 0xA;
  870. break;
  871. case 'r':
  872. localMax = 0xD;
  873. break;
  874. case 't':
  875. localMax = 0x9;
  876. break;
  877. case 'v':
  878. localMax = 0xB;
  879. break;
  880. case 'c':
  881. if (src < end && RE_IS_LETTER(*src)) {
  882. localMax = (uintN) (*src++) & 0x1F;
  883. } else {
  884. --src;
  885. localMax = '\\';
  886. }
  887. break;
  888. case 'x':
  889. nDigits = 2;
  890. goto lexHex;
  891. case 'u':
  892. nDigits = 4;
  893. lexHex:
  894. n = 0;
  895. for (i = 0; (i < nDigits) && (src < end); i++) {
  896. c = *src++;
  897. if (!isASCIIHexDigit(c, &digit)) {
  898. /*
  899. * Back off to accepting the original
  900. *'\' as a literal.
  901. */
  902. src -= i + 1;
  903. n = '\\';
  904. break;
  905. }
  906. n = (n << 4) | digit;
  907. }
  908. localMax = n;
  909. break;
  910. case 'd':
  911. canStartRange = JS_FALSE;
  912. if (inRange) {
  913. JS_ReportErrorNumber(state->context,
  914. js_GetErrorMessage, NULL,
  915. JSMSG_BAD_CLASS_RANGE);
  916. return JS_FALSE;
  917. }
  918. localMax = '9';
  919. break;
  920. case 'D':
  921. case 's':
  922. case 'S':
  923. case 'w':
  924. case 'W':
  925. canStartRange = JS_FALSE;
  926. if (inRange) {
  927. JS_ReportErrorNumber(state->context,
  928. js_GetErrorMessage, NULL,
  929. JSMSG_BAD_CLASS_RANGE);
  930. return JS_FALSE;
  931. }
  932. max = 65535;
  933. /*
  934. * If this is the start of a range, ensure that it's less than
  935. * the end.
  936. */
  937. localMax = 0;
  938. break;
  939. case '0':
  940. case '1':
  941. case '2':
  942. case '3':
  943. case '4':
  944. case '5':
  945. case '6':
  946. case '7':
  947. /*
  948. * This is a non-ECMA extension - decimal escapes (in this
  949. * case, octal!) are supposed to be an error inside class
  950. * ranges, but supported here for backwards compatibility.
  951. *
  952. */
  953. n = JS7_UNDEC(c);
  954. c = *src;
  955. if ('0' <= c && c <= '7') {
  956. src++;
  957. n = 8 * n + JS7_UNDEC(c);
  958. c = *src;
  959. if ('0' <= c && c <= '7') {
  960. src++;
  961. i = 8 * n + JS7_UNDEC(c);
  962. if (i <= 0377)
  963. n = i;
  964. else
  965. src--;
  966. }
  967. }
  968. localMax = n;
  969. break;
  970. default:
  971. localMax = c;
  972. break;
  973. }
  974. break;
  975. default:
  976. localMax = *src++;
  977. break;
  978. }
  979. if (inRange) {
  980. /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
  981. if (rangeStart > localMax) {
  982. JS_ReportErrorNumber(state->context,
  983. js_GetErrorMessage, NULL,
  984. JSMSG_BAD_CLASS_RANGE);
  985. return JS_FALSE;
  986. }
  987. inRange = JS_FALSE;
  988. } else {
  989. if (canStartRange && src < end - 1) {
  990. if (*src == '-') {
  991. ++src;
  992. inRange = JS_TRUE;
  993. rangeStart = (jschar)localMax;
  994. continue;
  995. }
  996. }
  997. if (state->flags & JSREG_FOLD)
  998. rangeStart = localMax; /* one run of the uc/dc loop below */
  999. }
  1000. if (state->flags & JSREG_FOLD) {
  1001. jschar maxch = localMax;
  1002. for (i = rangeStart; i <= localMax; i++) {
  1003. jschar uch, dch;
  1004. uch = upcase(i);
  1005. dch = downcase(i);
  1006. maxch = JS_MAX(maxch, uch);
  1007. maxch = JS_MAX(maxch, dch);
  1008. }
  1009. localMax = maxch;
  1010. }
  1011. if (localMax > max)
  1012. max = localMax;
  1013. }
  1014. target->u.ucclass.bmsize = max;
  1015. return JS_TRUE;
  1016. }
  1017. /*
  1018. * item: assertion An item is either an assertion or
  1019. * quantatom a quantified atom.
  1020. *
  1021. * assertion: '^' Assertions match beginning of string
  1022. * (or line if the class static property
  1023. * RegExp.multiline is true).
  1024. * '$' End of string (or line if the class
  1025. * static property RegExp.multiline is
  1026. * true).
  1027. * '\b' Word boundary (between \w and \W).
  1028. * '\B' Word non-boundary.
  1029. *
  1030. * quantatom: atom An unquantified atom.
  1031. * quantatom '{' n ',' m '}'
  1032. * Atom must occur between n and m times.
  1033. * quantatom '{' n ',' '}' Atom must occur at least n times.
  1034. * quantatom '{' n '}' Atom must occur exactly n times.
  1035. * quantatom '*' Zero or more times (same as {0,}).
  1036. * quantatom '+' One or more times (same as {1,}).
  1037. * quantatom '?' Zero or one time (same as {0,1}).
  1038. *
  1039. * any of which can be optionally followed by '?' for ungreedy
  1040. *
  1041. * atom: '(' regexp ')' A parenthesized regexp (what matched
  1042. * can be addressed using a backreference,
  1043. * see '\' n below).
  1044. * '.' Matches any char except '\n'.
  1045. * '[' classlist ']' A character class.
  1046. * '[' '^' classlist ']' A negated character class.
  1047. * '\f' Form Feed.
  1048. * '\n' Newline (Line Feed).
  1049. * '\r' Carriage Return.
  1050. * '\t' Horizontal Tab.
  1051. * '\v' Vertical Tab.
  1052. * '\d' A digit (same as [0-9]).
  1053. * '\D' A non-digit.
  1054. * '\w' A word character, [0-9a-z_A-Z].
  1055. * '\W' A non-word character.
  1056. * '\s' A whitespace character, [ \b\f\n\r\t\v].
  1057. * '\S' A non-whitespace character.
  1058. * '\' n A backreference to the nth (n decimal
  1059. * and positive) parenthesized expression.
  1060. * '\' octal An octal escape sequence (octal must be
  1061. * two or three digits long, unless it is
  1062. * 0 for the null character).
  1063. * '\x' hex A hex escape (hex must be two digits).
  1064. * '\u' unicode A unicode escape (must be four digits).
  1065. * '\c' ctrl A control character, ctrl is a letter.
  1066. * '\' literalatomchar Any character except one of the above
  1067. * that follow '\' in an atom.
  1068. * otheratomchar Any character not first among the other
  1069. * atom right-hand sides.
  1070. */
  1071. static JSBool
  1072. ParseTerm(CompilerState *state)
  1073. {
  1074. jschar c = *state->cp++;
  1075. uintN nDigits;
  1076. uintN num, tmp, n, i;
  1077. const jschar *termStart;
  1078. switch (c) {
  1079. /* assertions and atoms */
  1080. case '^':
  1081. state->result = NewRENode(state, REOP_BOL);
  1082. if (!state->result)
  1083. return JS_FALSE;
  1084. state->progLength++;
  1085. return JS_TRUE;
  1086. case '$':
  1087. state->result = NewRENode(state, REOP_EOL);
  1088. if (!state->result)
  1089. return JS_FALSE;
  1090. state->progLength++;
  1091. return JS_TRUE;
  1092. case '\\':
  1093. if (state->cp >= state->cpend) {
  1094. /* a trailing '\' is an error */
  1095. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
  1096. return JS_FALSE;
  1097. }
  1098. c = *state->cp++;
  1099. switch (c) {
  1100. /* assertion escapes */
  1101. case 'b' :
  1102. state->result = NewRENode(state, REOP_WBDRY);
  1103. if (!state->result)
  1104. return JS_FALSE;
  1105. state->progLength++;
  1106. return JS_TRUE;
  1107. case 'B':
  1108. state->result = NewRENode(state, REOP_WNONBDRY);
  1109. if (!state->result)
  1110. return JS_FALSE;
  1111. state->progLength++;
  1112. return JS_TRUE;
  1113. /* Decimal escape */
  1114. case '0':
  1115. /* Give a strict warning. See also the note below. */
  1116. if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
  1117. JSMSG_INVALID_BACKREF)) {
  1118. return JS_FALSE;
  1119. }
  1120. doOctal:
  1121. num = 0;
  1122. while (state->cp < state->cpend) {
  1123. c = *state->cp;
  1124. if (c < '0' || '7' < c)
  1125. break;
  1126. state->cp++;
  1127. tmp = 8 * num + (uintN)JS7_UNDEC(c);
  1128. if (tmp > 0377)
  1129. break;
  1130. num = tmp;
  1131. }
  1132. c = (jschar)num;
  1133. doFlat:
  1134. state->result = NewRENode(state, REOP_FLAT);
  1135. if (!state->result)
  1136. return JS_FALSE;
  1137. state->result->u.flat.chr = c;
  1138. state->result->u.flat.length = 1;
  1139. state->progLength += 3;
  1140. break;
  1141. case '1':
  1142. case '2':
  1143. case '3':
  1144. case '4':
  1145. case '5':
  1146. case '6':
  1147. case '7':
  1148. case '8':
  1149. case '9':
  1150. termStart = state->cp - 1;
  1151. num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
  1152. if (state->flags & JSREG_FIND_PAREN_ERROR)
  1153. return JS_FALSE;
  1154. if (num == OVERFLOW_VALUE) {
  1155. /* Give a strict mode warning. */
  1156. if (!ReportRegExpError(state,
  1157. JSREPORT_WARNING | JSREPORT_STRICT,
  1158. (c >= '8')
  1159. ? JSMSG_INVALID_BACKREF
  1160. : JSMSG_BAD_BACKREF)) {
  1161. return JS_FALSE;
  1162. }
  1163. /*
  1164. * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
  1165. * error here. However, for compatibility with IE, we treat the
  1166. * whole backref as flat if the first character in it is not a
  1167. * valid octal character, and as an octal escape otherwise.
  1168. */
  1169. state->cp = termStart;
  1170. if (c >= '8') {
  1171. /* Treat this as flat. termStart - 1 is the \. */
  1172. c = '\\';
  1173. goto asFlat;
  1174. }
  1175. /* Treat this as an octal escape. */
  1176. goto doOctal;
  1177. }
  1178. JS_ASSERT(1 <= num && num <= 0x10000);
  1179. state->result = NewRENode(state, REOP_BACKREF);
  1180. if (!state->result)
  1181. return JS_FALSE;
  1182. state->result->u.parenIndex = num - 1;
  1183. state->progLength
  1184. += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
  1185. break;
  1186. /* Control escape */
  1187. case 'f':
  1188. c = 0xC;
  1189. goto doFlat;
  1190. case 'n':
  1191. c = 0xA;
  1192. goto doFlat;
  1193. case 'r':
  1194. c = 0xD;
  1195. goto doFlat;
  1196. case 't':
  1197. c = 0x9;
  1198. goto doFlat;
  1199. case 'v':
  1200. c = 0xB;
  1201. goto doFlat;
  1202. /* Control letter */
  1203. case 'c':
  1204. if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
  1205. c = (jschar) (*state->cp++ & 0x1F);
  1206. } else {
  1207. /* back off to accepting the original '\' as a literal */
  1208. --state->cp;
  1209. c = '\\';
  1210. }
  1211. goto doFlat;
  1212. /* HexEscapeSequence */
  1213. case 'x':
  1214. nDigits = 2;
  1215. goto lexHex;
  1216. /* UnicodeEscapeSequence */
  1217. case 'u':
  1218. nDigits = 4;
  1219. lexHex:
  1220. n = 0;
  1221. for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
  1222. uintN digit;
  1223. c = *state->cp++;
  1224. if (!isASCIIHexDigit(c, &digit)) {
  1225. /*
  1226. * Back off to accepting the original 'u' or 'x' as a
  1227. * literal.
  1228. */
  1229. state->cp -= i + 2;
  1230. n = *state->cp++;
  1231. break;
  1232. }
  1233. n = (n << 4) | digit;
  1234. }
  1235. c = (jschar) n;
  1236. goto doFlat;
  1237. /* Character class escapes */
  1238. case 'd':
  1239. state->result = NewRENode(state, REOP_DIGIT);
  1240. doSimple:
  1241. if (!state->result)
  1242. return JS_FALSE;
  1243. state->progLength++;
  1244. break;
  1245. case 'D':
  1246. state->result = NewRENode(state, REOP_NONDIGIT);
  1247. goto doSimple;
  1248. case 's':
  1249. state->result = NewRENode(state, REOP_SPACE);
  1250. goto doSimple;
  1251. case 'S':
  1252. state->result = NewRENode(state, REOP_NONSPACE);
  1253. goto doSimple;
  1254. case 'w':
  1255. state->result = NewRENode(state, REOP_ALNUM);
  1256. goto doSimple;
  1257. case 'W':
  1258. state->result = NewRENode(state, REOP_NONALNUM);
  1259. goto doSimple;
  1260. /* IdentityEscape */
  1261. default:
  1262. state->result = NewRENode(state, REOP_FLAT);
  1263. if (!state->result)
  1264. return JS_FALSE;
  1265. state->result->u.flat.chr = c;
  1266. state->result->u.flat.length = 1;
  1267. state->result->kid = (void *) (state->cp - 1);
  1268. state->progLength += 3;
  1269. break;
  1270. }
  1271. break;
  1272. case '[':
  1273. state->result = NewRENode(state, REOP_CLASS);
  1274. if (!state->result)
  1275. return JS_FALSE;
  1276. termStart = state->cp;
  1277. state->result->u.ucclass.startIndex = termStart - state->cpbegin;
  1278. for (;;) {
  1279. if (state->cp == state->cpend) {
  1280. ReportRegExpErrorHelper(state, JSREPORT_ERROR,
  1281. JSMSG_UNTERM_CLASS, termStart);
  1282. return JS_FALSE;
  1283. }
  1284. if (*state->cp == '\\') {
  1285. state->cp++;
  1286. if (state->cp != state->cpend)
  1287. state->cp++;
  1288. continue;
  1289. }
  1290. if (*state->cp == ']') {
  1291. state->result->u.ucclass.kidlen = state->cp - termStart;
  1292. break;
  1293. }
  1294. state->cp++;
  1295. }
  1296. for (i = 0; i < CLASS_CACHE_SIZE; i++) {
  1297. if (!state->classCache[i].start) {
  1298. state->classCache[i].start = termStart;
  1299. state->classCache[i].length = state->result->u.ucclass.kidlen;
  1300. state->classCache[i].index = state->classCount;
  1301. break;
  1302. }
  1303. if (state->classCache[i].length ==
  1304. state->result->u.ucclass.kidlen) {
  1305. for (n = 0; ; n++) {
  1306. if (n == state->classCache[i].length) {
  1307. state->result->u.ucclass.index
  1308. = state->classCache[i].index;
  1309. goto claim;
  1310. }
  1311. if (state->classCache[i].start[n] != termStart[n])
  1312. break;
  1313. }
  1314. }
  1315. }
  1316. state->result->u.ucclass.index = state->classCount++;
  1317. claim:
  1318. /*
  1319. * Call CalculateBitmapSize now as we want any errors it finds
  1320. * to be reported during the parse phase, not at execution.
  1321. */
  1322. if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
  1323. return JS_FALSE;
  1324. /*
  1325. * Update classBitmapsMem with number of bytes to hold bmsize bits,
  1326. * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
  1327. * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
  1328. */
  1329. n = (state->result->u.ucclass.bmsize >> 3) + 1;
  1330. if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
  1331. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
  1332. return JS_FALSE;
  1333. }
  1334. state->classBitmapsMem += n;
  1335. /* CLASS, <index> */
  1336. state->progLength
  1337. += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
  1338. break;
  1339. case '.':
  1340. state->result = NewRENode(state, REOP_DOT);
  1341. goto doSimple;
  1342. case '{':
  1343. {
  1344. const jschar *errp = state->cp--;
  1345. intN err;
  1346. err = ParseMinMaxQuantifier(state, JS_TRUE);
  1347. state->cp = errp;
  1348. if (err < 0)
  1349. goto asFlat;
  1350. /* FALL THROUGH */
  1351. }
  1352. case '*':
  1353. case '+':
  1354. case '?':
  1355. ReportRegExpErrorHelper(state, JSREPORT_ERROR,
  1356. JSMSG_BAD_QUANTIFIER, state->cp - 1);
  1357. return JS_FALSE;
  1358. default:
  1359. asFlat:
  1360. state->result = NewRENode(state, REOP_FLAT);
  1361. if (!state->result)
  1362. return JS_FALSE;
  1363. state->result->u.flat.chr = c;
  1364. state->result->u.flat.length = 1;
  1365. state->result->kid = (void *) (state->cp - 1);
  1366. state->progLength += 3;
  1367. break;
  1368. }
  1369. return ParseQuantifier(state);
  1370. }
  1371. static JSBool
  1372. ParseQuantifier(CompilerState *s

Large files files are truncated, but you can click here to view the full file