PageRenderTime 641ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/js/lib/Socket.IO-node/support/expresso/deps/jscoverage/js/jsregexp.cpp

http://github.com/onedayitwillmake/RealtimeMultiplayerNodeJs
C++ | 1906 lines | 1433 code | 148 blank | 325 comment | 293 complexity | 3040154f5eb7ab91c45eedc02b82c675 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, MPL-2.0-no-copyleft-exception, BSD-3-Clause
  1. /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  2. * vim: set sw=4 ts=8 et tw=78:
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is Mozilla Communicator client code, released
  18. * March 31, 1998.
  19. *
  20. * The Initial Developer of the Original Code is
  21. * Netscape Communications Corporation.
  22. * Portions created by the Initial Developer are Copyright (C) 1998
  23. * the Initial Developer. All Rights Reserved.
  24. *
  25. * Contributor(s):
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the MPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the MPL, the GPL or the LGPL.
  38. *
  39. * ***** END LICENSE BLOCK ***** */
  40. /*
  41. * JS regular expressions, after Perl.
  42. */
  43. #include "jsstddef.h"
  44. #include <stdlib.h>
  45. #include <string.h>
  46. #include <stdarg.h>
  47. #include "jstypes.h"
  48. #include "jsarena.h" /* Added by JSIFY */
  49. #include "jsutil.h" /* Added by JSIFY */
  50. #include "jsapi.h"
  51. #include "jsarray.h"
  52. #include "jsatom.h"
  53. #include "jsbuiltins.h"
  54. #include "jscntxt.h"
  55. #include "jsversion.h"
  56. #include "jsfun.h"
  57. #include "jsgc.h"
  58. #include "jsinterp.h"
  59. #include "jslock.h"
  60. #include "jsnum.h"
  61. #include "jsobj.h"
  62. #include "jsopcode.h"
  63. #include "jsregexp.h"
  64. #include "jsscan.h"
  65. #include "jsscope.h"
  66. #include "jsstr.h"
  67. #ifdef JS_TRACER
  68. #include "jstracer.h"
  69. using namespace avmplus;
  70. using namespace nanojit;
  71. /*
  72. * FIXME Duplicated with jstracer.cpp, doing it this way for now
  73. * to keep it private to files that need it.
  74. */
  75. #ifdef JS_JIT_SPEW
  76. static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose");
  77. #define debug_only_v(x) if (verbose_debug) { x; }
  78. #else
  79. #define debug_only_v(x)
  80. #endif
  81. #endif
  82. typedef enum REOp {
  83. #define REOP_DEF(opcode, name) opcode,
  84. #include "jsreops.tbl"
  85. #undef REOP_DEF
  86. REOP_LIMIT /* META: no operator >= to this */
  87. } REOp;
  88. #define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS)
  89. #ifdef REGEXP_DEBUG
  90. const char *reop_names[] = {
  91. #define REOP_DEF(opcode, name) name,
  92. #include "jsreops.tbl"
  93. #undef REOP_DEF
  94. NULL
  95. };
  96. #endif
  97. #ifdef __GNUC__
  98. static int
  99. re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2)));
  100. #endif
  101. #ifdef REGEXP_DEBUG
  102. static int
  103. re_debug(const char *fmt, ...)
  104. {
  105. va_list ap;
  106. int retval;
  107. va_start(ap, fmt);
  108. retval = vprintf(fmt, ap);
  109. va_end(ap);
  110. return retval;
  111. }
  112. static void
  113. re_debug_chars(const jschar *chrs, size_t length)
  114. {
  115. int i = 0;
  116. printf(" \"");
  117. while (*chrs && i++ < length) {
  118. putchar((char)*chrs++);
  119. }
  120. printf("\"");
  121. }
  122. #else /* !REGEXP_DEBUG */
  123. /* This should be optimized to a no-op by our tier-1 compilers. */
  124. static int
  125. re_debug(const char *fmt, ...)
  126. {
  127. return 0;
  128. }
  129. static void
  130. re_debug_chars(const jschar *chrs, size_t length)
  131. {
  132. }
  133. #endif /* !REGEXP_DEBUG */
  134. struct RENode {
  135. REOp op; /* r.e. op bytecode */
  136. RENode *next; /* next in concatenation order */
  137. void *kid; /* first operand */
  138. union {
  139. void *kid2; /* second operand */
  140. jsint num; /* could be a number */
  141. size_t parenIndex; /* or a parenthesis index */
  142. struct { /* or a quantifier range */
  143. uintN min;
  144. uintN max;
  145. JSPackedBool greedy;
  146. } range;
  147. struct { /* or a character class */
  148. size_t startIndex;
  149. size_t kidlen; /* length of string at kid, in jschars */
  150. size_t index; /* index into class list */
  151. uint16 bmsize; /* bitmap size, based on max char code */
  152. JSPackedBool sense;
  153. } ucclass;
  154. struct { /* or a literal sequence */
  155. jschar chr; /* of one character */
  156. size_t length; /* or many (via the kid) */
  157. } flat;
  158. struct {
  159. RENode *kid2; /* second operand from ALT */
  160. jschar ch1; /* match char for ALTPREREQ */
  161. jschar ch2; /* ditto, or class index for ALTPREREQ2 */
  162. } altprereq;
  163. } u;
  164. };
  165. #define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \
  166. ((c >= 'a') && (c <= 'z')) )
  167. #define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \
  168. (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR))
  169. #define CLASS_CACHE_SIZE 4
  170. typedef struct CompilerState {
  171. JSContext *context;
  172. JSTokenStream *tokenStream; /* For reporting errors */
  173. const jschar *cpbegin;
  174. const jschar *cpend;
  175. const jschar *cp;
  176. size_t parenCount;
  177. size_t classCount; /* number of [] encountered */
  178. size_t treeDepth; /* maximum depth of parse tree */
  179. size_t progLength; /* estimated bytecode length */
  180. RENode *result;
  181. size_t classBitmapsMem; /* memory to hold all class bitmaps */
  182. struct {
  183. const jschar *start; /* small cache of class strings */
  184. size_t length; /* since they're often the same */
  185. size_t index;
  186. } classCache[CLASS_CACHE_SIZE];
  187. uint16 flags;
  188. } CompilerState;
  189. typedef struct EmitStateStackEntry {
  190. jsbytecode *altHead; /* start of REOP_ALT* opcode */
  191. jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */
  192. jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */
  193. jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */
  194. RENode *continueNode; /* original REOP_ALT* node being stacked */
  195. jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */
  196. JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to
  197. avoid 16-bit unsigned offset overflow */
  198. } EmitStateStackEntry;
  199. /*
  200. * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h,
  201. * the getters and setters take the pc of the offset, not of the opcode before
  202. * the offset.
  203. */
  204. #define ARG_LEN 2
  205. #define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1]))
  206. #define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \
  207. (pc)[1] = (jsbytecode) (arg))
  208. #define OFFSET_LEN ARG_LEN
  209. #define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1)
  210. #define GET_OFFSET(pc) GET_ARG(pc)
  211. /*
  212. * Maximum supported tree depth is maximum size of EmitStateStackEntry stack.
  213. * For sanity, we limit it to 2^24 bytes.
  214. */
  215. #define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry))
  216. /*
  217. * The maximum memory that can be allocated for class bitmaps.
  218. * For sanity, we limit it to 2^24 bytes.
  219. */
  220. #define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24)
  221. /*
  222. * Functions to get size and write/read bytecode that represent small indexes
  223. * compactly.
  224. * Each byte in the code represent 7-bit chunk of the index. 8th bit when set
  225. * indicates that the following byte brings more bits to the index. Otherwise
  226. * this is the last byte in the index bytecode representing highest index bits.
  227. */
  228. static size_t
  229. GetCompactIndexWidth(size_t index)
  230. {
  231. size_t width;
  232. for (width = 1; (index >>= 7) != 0; ++width) { }
  233. return width;
  234. }
  235. static JS_ALWAYS_INLINE jsbytecode *
  236. WriteCompactIndex(jsbytecode *pc, size_t index)
  237. {
  238. size_t next;
  239. while ((next = index >> 7) != 0) {
  240. *pc++ = (jsbytecode)(index | 0x80);
  241. index = next;
  242. }
  243. *pc++ = (jsbytecode)index;
  244. return pc;
  245. }
  246. static JS_ALWAYS_INLINE jsbytecode *
  247. ReadCompactIndex(jsbytecode *pc, size_t *result)
  248. {
  249. size_t nextByte;
  250. nextByte = *pc++;
  251. if ((nextByte & 0x80) == 0) {
  252. /*
  253. * Short-circuit the most common case when compact index <= 127.
  254. */
  255. *result = nextByte;
  256. } else {
  257. size_t shift = 7;
  258. *result = 0x7F & nextByte;
  259. do {
  260. nextByte = *pc++;
  261. *result |= (nextByte & 0x7F) << shift;
  262. shift += 7;
  263. } while ((nextByte & 0x80) != 0);
  264. }
  265. return pc;
  266. }
  267. typedef struct RECapture {
  268. ptrdiff_t index; /* start of contents, -1 for empty */
  269. size_t length; /* length of capture */
  270. } RECapture;
  271. typedef struct REMatchState {
  272. const jschar *cp;
  273. RECapture parens[1]; /* first of 're->parenCount' captures,
  274. allocated at end of this struct */
  275. } REMatchState;
  276. struct REBackTrackData;
  277. typedef struct REProgState {
  278. jsbytecode *continue_pc; /* current continuation data */
  279. jsbytecode continue_op;
  280. ptrdiff_t index; /* progress in text */
  281. size_t parenSoFar; /* highest indexed paren started */
  282. union {
  283. struct {
  284. uintN min; /* current quantifier limits */
  285. uintN max;
  286. } quantifier;
  287. struct {
  288. size_t top; /* backtrack stack state */
  289. size_t sz;
  290. } assertion;
  291. } u;
  292. } REProgState;
  293. typedef struct REBackTrackData {
  294. size_t sz; /* size of previous stack entry */
  295. jsbytecode *backtrack_pc; /* where to backtrack to */
  296. jsbytecode backtrack_op;
  297. const jschar *cp; /* index in text of match at backtrack */
  298. size_t parenIndex; /* start index of saved paren contents */
  299. size_t parenCount; /* # of saved paren contents */
  300. size_t saveStateStackTop; /* number of parent states */
  301. /* saved parent states follow */
  302. /* saved paren contents follow */
  303. } REBackTrackData;
  304. #define INITIAL_STATESTACK 100
  305. #define INITIAL_BACKTRACK 8000
  306. typedef struct REGlobalData {
  307. JSContext *cx;
  308. JSRegExp *regexp; /* the RE in execution */
  309. JSBool ok; /* runtime error (out_of_memory only?) */
  310. size_t start; /* offset to start at */
  311. ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
  312. const jschar *cpbegin; /* text base address */
  313. const jschar *cpend; /* text limit address */
  314. REProgState *stateStack; /* stack of state of current parents */
  315. size_t stateStackTop;
  316. size_t stateStackLimit;
  317. REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
  318. REBackTrackData *backTrackSP;
  319. size_t backTrackStackSize;
  320. size_t cursz; /* size of current stack entry */
  321. size_t backTrackCount; /* how many times we've backtracked */
  322. size_t backTrackLimit; /* upper limit on backtrack states */
  323. } REGlobalData;
  324. /*
  325. * 1. If IgnoreCase is false, return ch.
  326. * 2. Let u be ch converted to upper case as if by calling
  327. * String.prototype.toUpperCase on the one-character string ch.
  328. * 3. If u does not consist of a single character, return ch.
  329. * 4. Let cu be u's character.
  330. * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
  331. * code point value is less than decimal 128, then return ch.
  332. * 6. Return cu.
  333. */
  334. static JS_ALWAYS_INLINE uintN
  335. upcase(uintN ch)
  336. {
  337. uintN cu;
  338. JS_ASSERT((uintN) (jschar) ch == ch);
  339. if (ch < 128) {
  340. if (ch - (uintN) 'a' <= (uintN) ('z' - 'a'))
  341. ch -= (uintN) ('a' - 'A');
  342. return ch;
  343. }
  344. cu = JS_TOUPPER(ch);
  345. return (cu < 128) ? ch : cu;
  346. }
  347. static JS_ALWAYS_INLINE uintN
  348. downcase(uintN ch)
  349. {
  350. JS_ASSERT((uintN) (jschar) ch == ch);
  351. if (ch < 128) {
  352. if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A'))
  353. ch += (uintN) ('a' - 'A');
  354. return ch;
  355. }
  356. return JS_TOLOWER(ch);
  357. }
  358. /* Construct and initialize an RENode, returning NULL for out-of-memory */
  359. static RENode *
  360. NewRENode(CompilerState *state, REOp op)
  361. {
  362. JSContext *cx;
  363. RENode *ren;
  364. cx = state->context;
  365. JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren);
  366. if (!ren) {
  367. js_ReportOutOfScriptQuota(cx);
  368. return NULL;
  369. }
  370. ren->op = op;
  371. ren->next = NULL;
  372. ren->kid = NULL;
  373. return ren;
  374. }
  375. /*
  376. * Validates and converts hex ascii value.
  377. */
  378. static JSBool
  379. isASCIIHexDigit(jschar c, uintN *digit)
  380. {
  381. uintN cv = c;
  382. if (cv < '0')
  383. return JS_FALSE;
  384. if (cv <= '9') {
  385. *digit = cv - '0';
  386. return JS_TRUE;
  387. }
  388. cv |= 0x20;
  389. if (cv >= 'a' && cv <= 'f') {
  390. *digit = cv - 'a' + 10;
  391. return JS_TRUE;
  392. }
  393. return JS_FALSE;
  394. }
  395. typedef struct {
  396. REOp op;
  397. const jschar *errPos;
  398. size_t parenIndex;
  399. } REOpData;
  400. static JSBool
  401. ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber,
  402. const jschar *arg)
  403. {
  404. if (state->tokenStream) {
  405. return js_ReportCompileErrorNumber(state->context, state->tokenStream,
  406. NULL, JSREPORT_UC | flags,
  407. errorNumber, arg);
  408. }
  409. return JS_ReportErrorFlagsAndNumberUC(state->context, flags,
  410. js_GetErrorMessage, NULL,
  411. errorNumber, arg);
  412. }
  413. static JSBool
  414. ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber)
  415. {
  416. return ReportRegExpErrorHelper(state, flags, errorNumber, NULL);
  417. }
  418. /*
  419. * Process the op against the two top operands, reducing them to a single
  420. * operand in the penultimate slot. Update progLength and treeDepth.
  421. */
  422. static JSBool
  423. ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack,
  424. intN operandSP)
  425. {
  426. RENode *result;
  427. switch (opData->op) {
  428. case REOP_ALT:
  429. result = NewRENode(state, REOP_ALT);
  430. if (!result)
  431. return JS_FALSE;
  432. result->kid = operandStack[operandSP - 2];
  433. result->u.kid2 = operandStack[operandSP - 1];
  434. operandStack[operandSP - 2] = result;
  435. if (state->treeDepth == TREE_DEPTH_MAX) {
  436. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
  437. return JS_FALSE;
  438. }
  439. ++state->treeDepth;
  440. /*
  441. * Look at both alternates to see if there's a FLAT or a CLASS at
  442. * the start of each. If so, use a prerequisite match.
  443. */
  444. if (((RENode *) result->kid)->op == REOP_FLAT &&
  445. ((RENode *) result->u.kid2)->op == REOP_FLAT &&
  446. (state->flags & JSREG_FOLD) == 0) {
  447. result->op = REOP_ALTPREREQ;
  448. result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
  449. result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr;
  450. /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
  451. JUMP, <end> ... ENDALT */
  452. state->progLength += 13;
  453. }
  454. else
  455. if (((RENode *) result->kid)->op == REOP_CLASS &&
  456. ((RENode *) result->kid)->u.ucclass.index < 256 &&
  457. ((RENode *) result->u.kid2)->op == REOP_FLAT &&
  458. (state->flags & JSREG_FOLD) == 0) {
  459. result->op = REOP_ALTPREREQ2;
  460. result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr;
  461. result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index;
  462. /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
  463. JUMP, <end> ... ENDALT */
  464. state->progLength += 13;
  465. }
  466. else
  467. if (((RENode *) result->kid)->op == REOP_FLAT &&
  468. ((RENode *) result->u.kid2)->op == REOP_CLASS &&
  469. ((RENode *) result->u.kid2)->u.ucclass.index < 256 &&
  470. (state->flags & JSREG_FOLD) == 0) {
  471. result->op = REOP_ALTPREREQ2;
  472. result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr;
  473. result->u.altprereq.ch2 =
  474. ((RENode *) result->u.kid2)->u.ucclass.index;
  475. /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
  476. JUMP, <end> ... ENDALT */
  477. state->progLength += 13;
  478. }
  479. else {
  480. /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
  481. state->progLength += 7;
  482. }
  483. break;
  484. case REOP_CONCAT:
  485. result = operandStack[operandSP - 2];
  486. while (result->next)
  487. result = result->next;
  488. result->next = operandStack[operandSP - 1];
  489. break;
  490. case REOP_ASSERT:
  491. case REOP_ASSERT_NOT:
  492. case REOP_LPARENNON:
  493. case REOP_LPAREN:
  494. /* These should have been processed by a close paren. */
  495. ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN,
  496. opData->errPos);
  497. return JS_FALSE;
  498. default:;
  499. }
  500. return JS_TRUE;
  501. }
  502. /*
  503. * Parser forward declarations.
  504. */
  505. static JSBool ParseTerm(CompilerState *state);
  506. static JSBool ParseQuantifier(CompilerState *state);
  507. static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues);
  508. /*
  509. * Top-down regular expression grammar, based closely on Perl4.
  510. *
  511. * regexp: altern A regular expression is one or more
  512. * altern '|' regexp alternatives separated by vertical bar.
  513. */
  514. #define INITIAL_STACK_SIZE 128
  515. static JSBool
  516. ParseRegExp(CompilerState *state)
  517. {
  518. size_t parenIndex;
  519. RENode *operand;
  520. REOpData *operatorStack;
  521. RENode **operandStack;
  522. REOp op;
  523. intN i;
  524. JSBool result = JS_FALSE;
  525. intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
  526. intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;
  527. /* Watch out for empty regexp */
  528. if (state->cp == state->cpend) {
  529. state->result = NewRENode(state, REOP_EMPTY);
  530. return (state->result != NULL);
  531. }
  532. operatorStack = (REOpData *)
  533. JS_malloc(state->context, sizeof(REOpData) * operatorStackSize);
  534. if (!operatorStack)
  535. return JS_FALSE;
  536. operandStack = (RENode **)
  537. JS_malloc(state->context, sizeof(RENode *) * operandStackSize);
  538. if (!operandStack)
  539. goto out;
  540. for (;;) {
  541. parenIndex = state->parenCount;
  542. if (state->cp == state->cpend) {
  543. /*
  544. * If we are at the end of the regexp and we're short one or more
  545. * operands, the regexp must have the form /x|/ or some such, with
  546. * left parentheses making us short more than one operand.
  547. */
  548. if (operatorSP >= operandSP) {
  549. operand = NewRENode(state, REOP_EMPTY);
  550. if (!operand)
  551. goto out;
  552. goto pushOperand;
  553. }
  554. } else {
  555. switch (*state->cp) {
  556. case '(':
  557. ++state->cp;
  558. if (state->cp + 1 < state->cpend &&
  559. *state->cp == '?' &&
  560. (state->cp[1] == '=' ||
  561. state->cp[1] == '!' ||
  562. state->cp[1] == ':')) {
  563. switch (state->cp[1]) {
  564. case '=':
  565. op = REOP_ASSERT;
  566. /* ASSERT, <next>, ... ASSERTTEST */
  567. state->progLength += 4;
  568. break;
  569. case '!':
  570. op = REOP_ASSERT_NOT;
  571. /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
  572. state->progLength += 4;
  573. break;
  574. default:
  575. op = REOP_LPARENNON;
  576. break;
  577. }
  578. state->cp += 2;
  579. } else {
  580. op = REOP_LPAREN;
  581. /* LPAREN, <index>, ... RPAREN, <index> */
  582. state->progLength
  583. += 2 * (1 + GetCompactIndexWidth(parenIndex));
  584. state->parenCount++;
  585. if (state->parenCount == 65535) {
  586. ReportRegExpError(state, JSREPORT_ERROR,
  587. JSMSG_TOO_MANY_PARENS);
  588. goto out;
  589. }
  590. }
  591. goto pushOperator;
  592. case ')':
  593. /*
  594. * If there's no stacked open parenthesis, throw syntax error.
  595. */
  596. for (i = operatorSP - 1; ; i--) {
  597. if (i < 0) {
  598. ReportRegExpError(state, JSREPORT_ERROR,
  599. JSMSG_UNMATCHED_RIGHT_PAREN);
  600. goto out;
  601. }
  602. if (operatorStack[i].op == REOP_ASSERT ||
  603. operatorStack[i].op == REOP_ASSERT_NOT ||
  604. operatorStack[i].op == REOP_LPARENNON ||
  605. operatorStack[i].op == REOP_LPAREN) {
  606. break;
  607. }
  608. }
  609. /* FALL THROUGH */
  610. case '|':
  611. /* Expected an operand before these, so make an empty one */
  612. operand = NewRENode(state, REOP_EMPTY);
  613. if (!operand)
  614. goto out;
  615. goto pushOperand;
  616. default:
  617. if (!ParseTerm(state))
  618. goto out;
  619. operand = state->result;
  620. pushOperand:
  621. if (operandSP == operandStackSize) {
  622. RENode **tmp;
  623. operandStackSize += operandStackSize;
  624. tmp = (RENode **)
  625. JS_realloc(state->context, operandStack,
  626. sizeof(RENode *) * operandStackSize);
  627. if (!tmp)
  628. goto out;
  629. operandStack = tmp;
  630. }
  631. operandStack[operandSP++] = operand;
  632. break;
  633. }
  634. }
  635. /* At the end; process remaining operators. */
  636. restartOperator:
  637. if (state->cp == state->cpend) {
  638. while (operatorSP) {
  639. --operatorSP;
  640. if (!ProcessOp(state, &operatorStack[operatorSP],
  641. operandStack, operandSP))
  642. goto out;
  643. --operandSP;
  644. }
  645. JS_ASSERT(operandSP == 1);
  646. state->result = operandStack[0];
  647. result = JS_TRUE;
  648. goto out;
  649. }
  650. switch (*state->cp) {
  651. case '|':
  652. /* Process any stacked 'concat' operators */
  653. ++state->cp;
  654. while (operatorSP &&
  655. operatorStack[operatorSP - 1].op == REOP_CONCAT) {
  656. --operatorSP;
  657. if (!ProcessOp(state, &operatorStack[operatorSP],
  658. operandStack, operandSP)) {
  659. goto out;
  660. }
  661. --operandSP;
  662. }
  663. op = REOP_ALT;
  664. goto pushOperator;
  665. case ')':
  666. /*
  667. * If there's no stacked open parenthesis, throw syntax error.
  668. */
  669. for (i = operatorSP - 1; ; i--) {
  670. if (i < 0) {
  671. ReportRegExpError(state, JSREPORT_ERROR,
  672. JSMSG_UNMATCHED_RIGHT_PAREN);
  673. goto out;
  674. }
  675. if (operatorStack[i].op == REOP_ASSERT ||
  676. operatorStack[i].op == REOP_ASSERT_NOT ||
  677. operatorStack[i].op == REOP_LPARENNON ||
  678. operatorStack[i].op == REOP_LPAREN) {
  679. break;
  680. }
  681. }
  682. ++state->cp;
  683. /* Process everything on the stack until the open parenthesis. */
  684. for (;;) {
  685. JS_ASSERT(operatorSP);
  686. --operatorSP;
  687. switch (operatorStack[operatorSP].op) {
  688. case REOP_ASSERT:
  689. case REOP_ASSERT_NOT:
  690. case REOP_LPAREN:
  691. operand = NewRENode(state, operatorStack[operatorSP].op);
  692. if (!operand)
  693. goto out;
  694. operand->u.parenIndex =
  695. operatorStack[operatorSP].parenIndex;
  696. JS_ASSERT(operandSP);
  697. operand->kid = operandStack[operandSP - 1];
  698. operandStack[operandSP - 1] = operand;
  699. if (state->treeDepth == TREE_DEPTH_MAX) {
  700. ReportRegExpError(state, JSREPORT_ERROR,
  701. JSMSG_REGEXP_TOO_COMPLEX);
  702. goto out;
  703. }
  704. ++state->treeDepth;
  705. /* FALL THROUGH */
  706. case REOP_LPARENNON:
  707. state->result = operandStack[operandSP - 1];
  708. if (!ParseQuantifier(state))
  709. goto out;
  710. operandStack[operandSP - 1] = state->result;
  711. goto restartOperator;
  712. default:
  713. if (!ProcessOp(state, &operatorStack[operatorSP],
  714. operandStack, operandSP))
  715. goto out;
  716. --operandSP;
  717. break;
  718. }
  719. }
  720. break;
  721. case '{':
  722. {
  723. const jschar *errp = state->cp;
  724. if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) {
  725. /*
  726. * This didn't even scan correctly as a quantifier, so we should
  727. * treat it as flat.
  728. */
  729. op = REOP_CONCAT;
  730. goto pushOperator;
  731. }
  732. state->cp = errp;
  733. /* FALL THROUGH */
  734. }
  735. case '+':
  736. case '*':
  737. case '?':
  738. ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER,
  739. state->cp);
  740. result = JS_FALSE;
  741. goto out;
  742. default:
  743. /* Anything else is the start of the next term. */
  744. op = REOP_CONCAT;
  745. pushOperator:
  746. if (operatorSP == operatorStackSize) {
  747. REOpData *tmp;
  748. operatorStackSize += operatorStackSize;
  749. tmp = (REOpData *)
  750. JS_realloc(state->context, operatorStack,
  751. sizeof(REOpData) * operatorStackSize);
  752. if (!tmp)
  753. goto out;
  754. operatorStack = tmp;
  755. }
  756. operatorStack[operatorSP].op = op;
  757. operatorStack[operatorSP].errPos = state->cp;
  758. operatorStack[operatorSP++].parenIndex = parenIndex;
  759. break;
  760. }
  761. }
  762. out:
  763. if (operatorStack)
  764. JS_free(state->context, operatorStack);
  765. if (operandStack)
  766. JS_free(state->context, operandStack);
  767. return result;
  768. }
  769. /*
  770. * Hack two bits in CompilerState.flags, for use within FindParenCount to flag
  771. * its being on the stack, and to propagate errors to its callers.
  772. */
  773. #define JSREG_FIND_PAREN_COUNT 0x8000
  774. #define JSREG_FIND_PAREN_ERROR 0x4000
  775. /*
  776. * Magic return value from FindParenCount and GetDecimalValue, to indicate
  777. * overflow beyond GetDecimalValue's max parameter, or a computed maximum if
  778. * its findMax parameter is non-null.
  779. */
  780. #define OVERFLOW_VALUE ((uintN)-1)
  781. static uintN
  782. FindParenCount(CompilerState *state)
  783. {
  784. CompilerState temp;
  785. int i;
  786. if (state->flags & JSREG_FIND_PAREN_COUNT)
  787. return OVERFLOW_VALUE;
  788. /*
  789. * Copy state into temp, flag it so we never report an invalid backref,
  790. * and reset its members to parse the entire regexp. This is obviously
  791. * suboptimal, but GetDecimalValue calls us only if a backref appears to
  792. * refer to a forward parenthetical, which is rare.
  793. */
  794. temp = *state;
  795. temp.flags |= JSREG_FIND_PAREN_COUNT;
  796. temp.cp = temp.cpbegin;
  797. temp.parenCount = 0;
  798. temp.classCount = 0;
  799. temp.progLength = 0;
  800. temp.treeDepth = 0;
  801. temp.classBitmapsMem = 0;
  802. for (i = 0; i < CLASS_CACHE_SIZE; i++)
  803. temp.classCache[i].start = NULL;
  804. if (!ParseRegExp(&temp)) {
  805. state->flags |= JSREG_FIND_PAREN_ERROR;
  806. return OVERFLOW_VALUE;
  807. }
  808. return temp.parenCount;
  809. }
  810. /*
  811. * Extract and return a decimal value at state->cp. The initial character c
  812. * has already been read. Return OVERFLOW_VALUE if the result exceeds max.
  813. * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in
  814. * state->flags to discover whether an error occurred under findMax.
  815. */
  816. static uintN
  817. GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state),
  818. CompilerState *state)
  819. {
  820. uintN value = JS7_UNDEC(c);
  821. JSBool overflow = (value > max && (!findMax || value > findMax(state)));
  822. /* The following restriction allows simpler overflow checks. */
  823. JS_ASSERT(max <= ((uintN)-1 - 9) / 10);
  824. while (state->cp < state->cpend) {
  825. c = *state->cp;
  826. if (!JS7_ISDEC(c))
  827. break;
  828. value = 10 * value + JS7_UNDEC(c);
  829. if (!overflow && value > max && (!findMax || value > findMax(state)))
  830. overflow = JS_TRUE;
  831. ++state->cp;
  832. }
  833. return overflow ? OVERFLOW_VALUE : value;
  834. }
  835. /*
  836. * Calculate the total size of the bitmap required for a class expression.
  837. */
  838. static JSBool
  839. CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
  840. const jschar *end)
  841. {
  842. uintN max = 0;
  843. JSBool inRange = JS_FALSE;
  844. jschar c, rangeStart = 0;
  845. uintN n, digit, nDigits, i;
  846. target->u.ucclass.bmsize = 0;
  847. target->u.ucclass.sense = JS_TRUE;
  848. if (src == end)
  849. return JS_TRUE;
  850. if (*src == '^') {
  851. ++src;
  852. target->u.ucclass.sense = JS_FALSE;
  853. }
  854. while (src != end) {
  855. JSBool canStartRange = JS_TRUE;
  856. uintN localMax = 0;
  857. switch (*src) {
  858. case '\\':
  859. ++src;
  860. c = *src++;
  861. switch (c) {
  862. case 'b':
  863. localMax = 0x8;
  864. break;
  865. case 'f':
  866. localMax = 0xC;
  867. break;
  868. case 'n':
  869. localMax = 0xA;
  870. break;
  871. case 'r':
  872. localMax = 0xD;
  873. break;
  874. case 't':
  875. localMax = 0x9;
  876. break;
  877. case 'v':
  878. localMax = 0xB;
  879. break;
  880. case 'c':
  881. if (src < end && RE_IS_LETTER(*src)) {
  882. localMax = (uintN) (*src++) & 0x1F;
  883. } else {
  884. --src;
  885. localMax = '\\';
  886. }
  887. break;
  888. case 'x':
  889. nDigits = 2;
  890. goto lexHex;
  891. case 'u':
  892. nDigits = 4;
  893. lexHex:
  894. n = 0;
  895. for (i = 0; (i < nDigits) && (src < end); i++) {
  896. c = *src++;
  897. if (!isASCIIHexDigit(c, &digit)) {
  898. /*
  899. * Back off to accepting the original
  900. *'\' as a literal.
  901. */
  902. src -= i + 1;
  903. n = '\\';
  904. break;
  905. }
  906. n = (n << 4) | digit;
  907. }
  908. localMax = n;
  909. break;
  910. case 'd':
  911. canStartRange = JS_FALSE;
  912. if (inRange) {
  913. JS_ReportErrorNumber(state->context,
  914. js_GetErrorMessage, NULL,
  915. JSMSG_BAD_CLASS_RANGE);
  916. return JS_FALSE;
  917. }
  918. localMax = '9';
  919. break;
  920. case 'D':
  921. case 's':
  922. case 'S':
  923. case 'w':
  924. case 'W':
  925. canStartRange = JS_FALSE;
  926. if (inRange) {
  927. JS_ReportErrorNumber(state->context,
  928. js_GetErrorMessage, NULL,
  929. JSMSG_BAD_CLASS_RANGE);
  930. return JS_FALSE;
  931. }
  932. max = 65535;
  933. /*
  934. * If this is the start of a range, ensure that it's less than
  935. * the end.
  936. */
  937. localMax = 0;
  938. break;
  939. case '0':
  940. case '1':
  941. case '2':
  942. case '3':
  943. case '4':
  944. case '5':
  945. case '6':
  946. case '7':
  947. /*
  948. * This is a non-ECMA extension - decimal escapes (in this
  949. * case, octal!) are supposed to be an error inside class
  950. * ranges, but supported here for backwards compatibility.
  951. *
  952. */
  953. n = JS7_UNDEC(c);
  954. c = *src;
  955. if ('0' <= c && c <= '7') {
  956. src++;
  957. n = 8 * n + JS7_UNDEC(c);
  958. c = *src;
  959. if ('0' <= c && c <= '7') {
  960. src++;
  961. i = 8 * n + JS7_UNDEC(c);
  962. if (i <= 0377)
  963. n = i;
  964. else
  965. src--;
  966. }
  967. }
  968. localMax = n;
  969. break;
  970. default:
  971. localMax = c;
  972. break;
  973. }
  974. break;
  975. default:
  976. localMax = *src++;
  977. break;
  978. }
  979. if (inRange) {
  980. /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */
  981. if (rangeStart > localMax) {
  982. JS_ReportErrorNumber(state->context,
  983. js_GetErrorMessage, NULL,
  984. JSMSG_BAD_CLASS_RANGE);
  985. return JS_FALSE;
  986. }
  987. inRange = JS_FALSE;
  988. } else {
  989. if (canStartRange && src < end - 1) {
  990. if (*src == '-') {
  991. ++src;
  992. inRange = JS_TRUE;
  993. rangeStart = (jschar)localMax;
  994. continue;
  995. }
  996. }
  997. if (state->flags & JSREG_FOLD)
  998. rangeStart = localMax; /* one run of the uc/dc loop below */
  999. }
  1000. if (state->flags & JSREG_FOLD) {
  1001. jschar maxch = localMax;
  1002. for (i = rangeStart; i <= localMax; i++) {
  1003. jschar uch, dch;
  1004. uch = upcase(i);
  1005. dch = downcase(i);
  1006. maxch = JS_MAX(maxch, uch);
  1007. maxch = JS_MAX(maxch, dch);
  1008. }
  1009. localMax = maxch;
  1010. }
  1011. if (localMax > max)
  1012. max = localMax;
  1013. }
  1014. target->u.ucclass.bmsize = max;
  1015. return JS_TRUE;
  1016. }
  1017. /*
  1018. * item: assertion An item is either an assertion or
  1019. * quantatom a quantified atom.
  1020. *
  1021. * assertion: '^' Assertions match beginning of string
  1022. * (or line if the class static property
  1023. * RegExp.multiline is true).
  1024. * '$' End of string (or line if the class
  1025. * static property RegExp.multiline is
  1026. * true).
  1027. * '\b' Word boundary (between \w and \W).
  1028. * '\B' Word non-boundary.
  1029. *
  1030. * quantatom: atom An unquantified atom.
  1031. * quantatom '{' n ',' m '}'
  1032. * Atom must occur between n and m times.
  1033. * quantatom '{' n ',' '}' Atom must occur at least n times.
  1034. * quantatom '{' n '}' Atom must occur exactly n times.
  1035. * quantatom '*' Zero or more times (same as {0,}).
  1036. * quantatom '+' One or more times (same as {1,}).
  1037. * quantatom '?' Zero or one time (same as {0,1}).
  1038. *
  1039. * any of which can be optionally followed by '?' for ungreedy
  1040. *
  1041. * atom: '(' regexp ')' A parenthesized regexp (what matched
  1042. * can be addressed using a backreference,
  1043. * see '\' n below).
  1044. * '.' Matches any char except '\n'.
  1045. * '[' classlist ']' A character class.
  1046. * '[' '^' classlist ']' A negated character class.
  1047. * '\f' Form Feed.
  1048. * '\n' Newline (Line Feed).
  1049. * '\r' Carriage Return.
  1050. * '\t' Horizontal Tab.
  1051. * '\v' Vertical Tab.
  1052. * '\d' A digit (same as [0-9]).
  1053. * '\D' A non-digit.
  1054. * '\w' A word character, [0-9a-z_A-Z].
  1055. * '\W' A non-word character.
  1056. * '\s' A whitespace character, [ \b\f\n\r\t\v].
  1057. * '\S' A non-whitespace character.
  1058. * '\' n A backreference to the nth (n decimal
  1059. * and positive) parenthesized expression.
  1060. * '\' octal An octal escape sequence (octal must be
  1061. * two or three digits long, unless it is
  1062. * 0 for the null character).
  1063. * '\x' hex A hex escape (hex must be two digits).
  1064. * '\u' unicode A unicode escape (must be four digits).
  1065. * '\c' ctrl A control character, ctrl is a letter.
  1066. * '\' literalatomchar Any character except one of the above
  1067. * that follow '\' in an atom.
  1068. * otheratomchar Any character not first among the other
  1069. * atom right-hand sides.
  1070. */
  1071. static JSBool
  1072. ParseTerm(CompilerState *state)
  1073. {
  1074. jschar c = *state->cp++;
  1075. uintN nDigits;
  1076. uintN num, tmp, n, i;
  1077. const jschar *termStart;
  1078. switch (c) {
  1079. /* assertions and atoms */
  1080. case '^':
  1081. state->result = NewRENode(state, REOP_BOL);
  1082. if (!state->result)
  1083. return JS_FALSE;
  1084. state->progLength++;
  1085. return JS_TRUE;
  1086. case '$':
  1087. state->result = NewRENode(state, REOP_EOL);
  1088. if (!state->result)
  1089. return JS_FALSE;
  1090. state->progLength++;
  1091. return JS_TRUE;
  1092. case '\\':
  1093. if (state->cp >= state->cpend) {
  1094. /* a trailing '\' is an error */
  1095. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH);
  1096. return JS_FALSE;
  1097. }
  1098. c = *state->cp++;
  1099. switch (c) {
  1100. /* assertion escapes */
  1101. case 'b' :
  1102. state->result = NewRENode(state, REOP_WBDRY);
  1103. if (!state->result)
  1104. return JS_FALSE;
  1105. state->progLength++;
  1106. return JS_TRUE;
  1107. case 'B':
  1108. state->result = NewRENode(state, REOP_WNONBDRY);
  1109. if (!state->result)
  1110. return JS_FALSE;
  1111. state->progLength++;
  1112. return JS_TRUE;
  1113. /* Decimal escape */
  1114. case '0':
  1115. /* Give a strict warning. See also the note below. */
  1116. if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT,
  1117. JSMSG_INVALID_BACKREF)) {
  1118. return JS_FALSE;
  1119. }
  1120. doOctal:
  1121. num = 0;
  1122. while (state->cp < state->cpend) {
  1123. c = *state->cp;
  1124. if (c < '0' || '7' < c)
  1125. break;
  1126. state->cp++;
  1127. tmp = 8 * num + (uintN)JS7_UNDEC(c);
  1128. if (tmp > 0377)
  1129. break;
  1130. num = tmp;
  1131. }
  1132. c = (jschar)num;
  1133. doFlat:
  1134. state->result = NewRENode(state, REOP_FLAT);
  1135. if (!state->result)
  1136. return JS_FALSE;
  1137. state->result->u.flat.chr = c;
  1138. state->result->u.flat.length = 1;
  1139. state->progLength += 3;
  1140. break;
  1141. case '1':
  1142. case '2':
  1143. case '3':
  1144. case '4':
  1145. case '5':
  1146. case '6':
  1147. case '7':
  1148. case '8':
  1149. case '9':
  1150. termStart = state->cp - 1;
  1151. num = GetDecimalValue(c, state->parenCount, FindParenCount, state);
  1152. if (state->flags & JSREG_FIND_PAREN_ERROR)
  1153. return JS_FALSE;
  1154. if (num == OVERFLOW_VALUE) {
  1155. /* Give a strict mode warning. */
  1156. if (!ReportRegExpError(state,
  1157. JSREPORT_WARNING | JSREPORT_STRICT,
  1158. (c >= '8')
  1159. ? JSMSG_INVALID_BACKREF
  1160. : JSMSG_BAD_BACKREF)) {
  1161. return JS_FALSE;
  1162. }
  1163. /*
  1164. * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax
  1165. * error here. However, for compatibility with IE, we treat the
  1166. * whole backref as flat if the first character in it is not a
  1167. * valid octal character, and as an octal escape otherwise.
  1168. */
  1169. state->cp = termStart;
  1170. if (c >= '8') {
  1171. /* Treat this as flat. termStart - 1 is the \. */
  1172. c = '\\';
  1173. goto asFlat;
  1174. }
  1175. /* Treat this as an octal escape. */
  1176. goto doOctal;
  1177. }
  1178. JS_ASSERT(1 <= num && num <= 0x10000);
  1179. state->result = NewRENode(state, REOP_BACKREF);
  1180. if (!state->result)
  1181. return JS_FALSE;
  1182. state->result->u.parenIndex = num - 1;
  1183. state->progLength
  1184. += 1 + GetCompactIndexWidth(state->result->u.parenIndex);
  1185. break;
  1186. /* Control escape */
  1187. case 'f':
  1188. c = 0xC;
  1189. goto doFlat;
  1190. case 'n':
  1191. c = 0xA;
  1192. goto doFlat;
  1193. case 'r':
  1194. c = 0xD;
  1195. goto doFlat;
  1196. case 't':
  1197. c = 0x9;
  1198. goto doFlat;
  1199. case 'v':
  1200. c = 0xB;
  1201. goto doFlat;
  1202. /* Control letter */
  1203. case 'c':
  1204. if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) {
  1205. c = (jschar) (*state->cp++ & 0x1F);
  1206. } else {
  1207. /* back off to accepting the original '\' as a literal */
  1208. --state->cp;
  1209. c = '\\';
  1210. }
  1211. goto doFlat;
  1212. /* HexEscapeSequence */
  1213. case 'x':
  1214. nDigits = 2;
  1215. goto lexHex;
  1216. /* UnicodeEscapeSequence */
  1217. case 'u':
  1218. nDigits = 4;
  1219. lexHex:
  1220. n = 0;
  1221. for (i = 0; i < nDigits && state->cp < state->cpend; i++) {
  1222. uintN digit;
  1223. c = *state->cp++;
  1224. if (!isASCIIHexDigit(c, &digit)) {
  1225. /*
  1226. * Back off to accepting the original 'u' or 'x' as a
  1227. * literal.
  1228. */
  1229. state->cp -= i + 2;
  1230. n = *state->cp++;
  1231. break;
  1232. }
  1233. n = (n << 4) | digit;
  1234. }
  1235. c = (jschar) n;
  1236. goto doFlat;
  1237. /* Character class escapes */
  1238. case 'd':
  1239. state->result = NewRENode(state, REOP_DIGIT);
  1240. doSimple:
  1241. if (!state->result)
  1242. return JS_FALSE;
  1243. state->progLength++;
  1244. break;
  1245. case 'D':
  1246. state->result = NewRENode(state, REOP_NONDIGIT);
  1247. goto doSimple;
  1248. case 's':
  1249. state->result = NewRENode(state, REOP_SPACE);
  1250. goto doSimple;
  1251. case 'S':
  1252. state->result = NewRENode(state, REOP_NONSPACE);
  1253. goto doSimple;
  1254. case 'w':
  1255. state->result = NewRENode(state, REOP_ALNUM);
  1256. goto doSimple;
  1257. case 'W':
  1258. state->result = NewRENode(state, REOP_NONALNUM);
  1259. goto doSimple;
  1260. /* IdentityEscape */
  1261. default:
  1262. state->result = NewRENode(state, REOP_FLAT);
  1263. if (!state->result)
  1264. return JS_FALSE;
  1265. state->result->u.flat.chr = c;
  1266. state->result->u.flat.length = 1;
  1267. state->result->kid = (void *) (state->cp - 1);
  1268. state->progLength += 3;
  1269. break;
  1270. }
  1271. break;
  1272. case '[':
  1273. state->result = NewRENode(state, REOP_CLASS);
  1274. if (!state->result)
  1275. return JS_FALSE;
  1276. termStart = state->cp;
  1277. state->result->u.ucclass.startIndex = termStart - state->cpbegin;
  1278. for (;;) {
  1279. if (state->cp == state->cpend) {
  1280. ReportRegExpErrorHelper(state, JSREPORT_ERROR,
  1281. JSMSG_UNTERM_CLASS, termStart);
  1282. return JS_FALSE;
  1283. }
  1284. if (*state->cp == '\\') {
  1285. state->cp++;
  1286. if (state->cp != state->cpend)
  1287. state->cp++;
  1288. continue;
  1289. }
  1290. if (*state->cp == ']') {
  1291. state->result->u.ucclass.kidlen = state->cp - termStart;
  1292. break;
  1293. }
  1294. state->cp++;
  1295. }
  1296. for (i = 0; i < CLASS_CACHE_SIZE; i++) {
  1297. if (!state->classCache[i].start) {
  1298. state->classCache[i].start = termStart;
  1299. state->classCache[i].length = state->result->u.ucclass.kidlen;
  1300. state->classCache[i].index = state->classCount;
  1301. break;
  1302. }
  1303. if (state->classCache[i].length ==
  1304. state->result->u.ucclass.kidlen) {
  1305. for (n = 0; ; n++) {
  1306. if (n == state->classCache[i].length) {
  1307. state->result->u.ucclass.index
  1308. = state->classCache[i].index;
  1309. goto claim;
  1310. }
  1311. if (state->classCache[i].start[n] != termStart[n])
  1312. break;
  1313. }
  1314. }
  1315. }
  1316. state->result->u.ucclass.index = state->classCount++;
  1317. claim:
  1318. /*
  1319. * Call CalculateBitmapSize now as we want any errors it finds
  1320. * to be reported during the parse phase, not at execution.
  1321. */
  1322. if (!CalculateBitmapSize(state, state->result, termStart, state->cp++))
  1323. return JS_FALSE;
  1324. /*
  1325. * Update classBitmapsMem with number of bytes to hold bmsize bits,
  1326. * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8
  1327. * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize.
  1328. */
  1329. n = (state->result->u.ucclass.bmsize >> 3) + 1;
  1330. if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) {
  1331. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
  1332. return JS_FALSE;
  1333. }
  1334. state->classBitmapsMem += n;
  1335. /* CLASS, <index> */
  1336. state->progLength
  1337. += 1 + GetCompactIndexWidth(state->result->u.ucclass.index);
  1338. break;
  1339. case '.':
  1340. state->result = NewRENode(state, REOP_DOT);
  1341. goto doSimple;
  1342. case '{':
  1343. {
  1344. const jschar *errp = state->cp--;
  1345. intN err;
  1346. err = ParseMinMaxQuantifier(state, JS_TRUE);
  1347. state->cp = errp;
  1348. if (err < 0)
  1349. goto asFlat;
  1350. /* FALL THROUGH */
  1351. }
  1352. case '*':
  1353. case '+':
  1354. case '?':
  1355. ReportRegExpErrorHelper(state, JSREPORT_ERROR,
  1356. JSMSG_BAD_QUANTIFIER, state->cp - 1);
  1357. return JS_FALSE;
  1358. default:
  1359. asFlat:
  1360. state->result = NewRENode(state, REOP_FLAT);
  1361. if (!state->result)
  1362. return JS_FALSE;
  1363. state->result->u.flat.chr = c;
  1364. state->result->u.flat.length = 1;
  1365. state->result->kid = (void *) (state->cp - 1);
  1366. state->progLength += 3;
  1367. break;
  1368. }
  1369. return ParseQuantifier(state);
  1370. }
  1371. static JSBool
  1372. ParseQuantifier(CompilerState *state)
  1373. {
  1374. RENode *term;
  1375. term = state->result;
  1376. if (state->cp < state->cpend) {
  1377. switch (*state->cp) {
  1378. case '+':
  1379. state->result = NewRENode(state, REOP_QUANT);
  1380. if (!state->result)
  1381. return JS_FALSE;
  1382. state->result->u.range.min = 1;
  1383. state->result->u.range.max = (uintN)-1;
  1384. /* <PLUS>, <next> ... <ENDCHILD> */
  1385. state->progLength += 4;
  1386. goto quantifier;
  1387. case '*':
  1388. state->result = NewRENode(state, REOP_QUANT);
  1389. if (!state->result)
  1390. return JS_FALSE;
  1391. state->result->u.range.min = 0;
  1392. state->result->u.range.max = (uintN)-1;
  1393. /* <STAR>, <next> ... <ENDCHILD> */
  1394. state->progLength += 4;
  1395. goto quantifier;
  1396. case '?':
  1397. state->result = NewRENode(state, REOP_QUANT);
  1398. if (!state->result)
  1399. return JS_FALSE;
  1400. state->result->u.range.min = 0;
  1401. state->result->u.range.max = 1;
  1402. /* <OPT>, <next> ... <ENDCHILD> */
  1403. state->progLength += 4;
  1404. goto quantifier;
  1405. case '{': /* balance '}' */
  1406. {
  1407. intN err;
  1408. const jschar *errp = state->cp;
  1409. err = ParseMinMaxQuantifier(state, JS_FALSE);
  1410. if (err == 0)
  1411. goto quantifier;
  1412. if (err == -1)
  1413. return JS_TRUE;
  1414. ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp);
  1415. return JS_FALSE;
  1416. }
  1417. default:;
  1418. }
  1419. }
  1420. return JS_TRUE;
  1421. quantifier:
  1422. if (state->treeDepth == TREE_DEPTH_MAX) {
  1423. ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX);
  1424. return JS_FALSE;
  1425. }
  1426. ++state->treeDepth;
  1427. ++state->cp;
  1428. state->result->kid = term;
  1429. if (state->cp < state->cpend && *state->cp == '?') {
  1430. ++state->cp;
  1431. state->result->u.range.greedy = JS_FALSE;
  1432. } else {
  1433. state->result->u.range.greedy = JS_TRUE;
  1434. }
  1435. return JS_TRUE;
  1436. }
  1437. static intN
  1438. ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues)
  1439. {
  1440. uintN min, max;
  1441. jschar c;
  1442. const jschar *errp = state->cp++;
  1443. c = *state->cp;
  1444. if (JS7_ISDEC(c)) {
  1445. ++state->cp;
  1446. min = GetDecimalValue(c, 0xFFFF, NULL, state);
  1447. c = *state->cp;
  1448. if (!ignoreValues && min == OVERFLOW_VALUE)
  1449. return JSMSG_MIN_TOO_BIG;
  1450. if (c == ',') {
  1451. c = *++state->cp;
  1452. if (JS7_ISDEC(c)) {
  1453. ++state->cp;
  1454. max = GetDecimalValue(c, 0xFFFF, NULL, state);
  1455. c = *state->cp;
  1456. if (!ignoreValues && max == OVERFLOW_VALUE)
  1457. return JSMSG_MAX_TOO_BIG;
  1458. if (!ignoreValues && min > max)
  1459. return JSMSG_OUT_OF_ORDER;
  1460. } else {
  1461. max = (uintN)-1;
  1462. }
  1463. } else {
  1464. max = min;
  1465. }
  1466. if (c == '}') {
  1467. state->result = NewRENode(state, REOP_QUANT);
  1468. if (!state->result)
  1469. return JSMSG_OUT_OF_MEMORY;
  1470. state->result->u.range.min = min;
  1471. state->result->u.range.max = max;
  1472. /*
  1473. * QUANT, <min>, <max>, <next> ... <ENDCHILD>
  1474. * where <max> is written as compact(max+1) to make
  1475. * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1.
  1476. */
  1477. state->progLength += (1 + GetCompactIndexWidth(min)
  1478. + GetCompactIndexWidth(max + 1)
  1479. +3);
  1480. return 0;
  1481. }
  1482. }
  1483. state->cp = errp;
  1484. return -1;
  1485. }
  1486. static JSBool
  1487. SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target)
  1488. {
  1489. ptrdiff_t offset = target - jump;
  1490. /* Check that target really points forward. */
  1491. JS_ASSERT(offset >= 2);
  1492. if ((size_t)offset > OFFSET_MAX)
  1493. return JS_FALSE;
  1494. jump[0] = JUMP_OFFSET_HI(offset);
  1495. jump[1] = JUMP_OFFSET_LO(offset);
  1496. return JS_TRUE;
  1497. }
  1498. /* Copy the charset data from a character class node to the charset list
  1499. * in the regexp object. */
  1500. static JS_ALWAYS_INLINE RECharSet *
  1501. InitNodeCharSet(JSRegExp *re, RENode *node)
  1502. {
  1503. RECharSet *charSet = &re->classList[node->u.ucclass.index];
  1504. charSet->converted = JS_FALSE;
  1505. charSet->length = node->u.ucclass.bmsize;
  1506. charSet->u.src.startIndex = node->u.ucclass.startIndex;
  1507. charSet->u.src.length = node->u.ucclass.kidlen;
  1508. charSet->sense = node->u.ucclass.sense;
  1509. return charSet;
  1510. }
  1511. /*
  1512. * Generate bytecode for the tree rooted at t using an explicit stack instead
  1513. * of recursion.
  1514. */
  1515. static jsbytecode *
  1516. EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth,
  1517. jsbytecode *pc, RENode *t)
  1518. {
  1519. EmitStateStackEntry *emitStateSP, *emitStateStack;
  1520. REOp op;
  1521. if (treeDepth == 0) {
  1522. emitStateStack = NULL;
  1523. } else {
  1524. emitStateStack =
  1525. (EmitStateStackEntry *)JS_malloc(state->context,
  1526. sizeof(EmitStateStackEntry) *
  1527. treeDepth);
  1528. if (!emitStateStack)
  1529. return NULL;
  1530. }
  1531. emitStateSP = emitStateStack;
  1532. op = t->op;
  1533. JS_ASSERT(op < REOP_LIMIT);
  1534. for (;;) {
  1535. *pc++ = op;
  1536. switch (op) {
  1537. case REOP_EMPTY:
  1538. --pc;
  1539. break;
  1540. case REOP_ALTPREREQ2:
  1541. case REOP_ALTPREREQ:
  1542. JS_ASSERT(emitStateSP);
  1543. emitStateSP->altHead = pc - 1;
  1544. emitStateSP->endTermFixup = pc;
  1545. pc += OFFSET_LEN;
  1546. SET_ARG(pc, t->u.altprereq.ch1);
  1547. pc += ARG_LEN;
  1548. SET_ARG(pc, t->u.altprereq.ch2);
  1549. pc += ARG_LEN;
  1550. emitStateSP->nextAltFixup = pc; /* offset to next alternate */
  1551. pc += OFFSET_LEN;
  1552. emitStateSP->continueNode = t;
  1553. emitStateSP->continueOp = REOP_JUMP;
  1554. emitStateSP->jumpToJumpFlag = JS_FALSE;
  1555. ++emitStateSP;
  1556. JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
  1557. t = (RENode *) t->kid;
  1558. op = t->op;
  1559. JS_ASSERT(op < REOP_LIMIT);
  1560. continue;
  1561. case REOP_JUMP:
  1562. emitStateSP->nextTermFixup = pc; /* offset to following term */
  1563. pc += OFFSET_LEN;
  1564. if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc))
  1565. goto jump_too_big;
  1566. emitStateSP->continueOp = REOP_ENDALT;
  1567. ++emitStateSP;
  1568. JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
  1569. t = (RENode *) t->u.kid2;
  1570. op = t->op;
  1571. JS_ASSERT(op < REOP_LIMIT);
  1572. continue;
  1573. case REOP_ENDALT:
  1574. /*
  1575. * If we already patched emitStateSP->nextTermFixup to jump to
  1576. * a nearer jump, to avoid 16-bit immediate offset overflow, we
  1577. * are done here.
  1578. */
  1579. if (emitStateSP->jumpToJumpFlag)
  1580. break;
  1581. /*
  1582. * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT.
  1583. * REOP_ENDALT is executed only on successful match of the last
  1584. * alternate in a group.
  1585. */
  1586. if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
  1587. goto jump_too_big;
  1588. if (t->op != REOP_ALT) {
  1589. if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc))
  1590. goto jump_too_big;
  1591. }
  1592. /*
  1593. * If the program is bigger than the REOP_JUMP offset range, then
  1594. * we must check for alternates before this one that are part of
  1595. * the same group, and fix up their jump offsets to target jumps
  1596. * close enough to fit in a 16-bit unsigned offset immediate.
  1597. */
  1598. if ((size_t)(pc - re->program) > OFFSET_MAX &&
  1599. emitStateSP > emitStateStack) {
  1600. EmitStateStackEntry *esp, *esp2;
  1601. jsbytecode *alt, *jump;
  1602. ptrdiff_t span, header;
  1603. esp2 = emitStateSP;
  1604. alt = esp2->altHead;
  1605. for (esp = esp2 - 1; esp >= emitStateStack; --esp) {
  1606. if (esp->continueOp == REOP_ENDALT &&
  1607. !esp->jumpToJumpFlag &&
  1608. esp->nextTermFixup + OFFSET_LEN == alt &&
  1609. (size_t)(pc - ((esp->continueNode->op != REOP_ALT)
  1610. ? esp->endTermFixup
  1611. : esp->nextTermFixup)) > OFFSET_MAX) {
  1612. alt = esp->altHead;
  1613. jump = esp->nextTermFixup;
  1614. /*
  1615. * The span must be 1 less than the distance from
  1616. * jump offset to jump offset, so we actually jump
  1617. * to a REOP_JUMP bytecode, not to its offset!
  1618. */
  1619. for (;;) {
  1620. JS_ASSERT(jump < esp2->nextTermFixup);
  1621. span = esp2->nextTermFixup - jump - 1;
  1622. if ((size_t)span <= OFFSET_MAX)
  1623. break;
  1624. do {
  1625. if (--esp2 == esp)
  1626. goto jump_too_big;
  1627. } while (esp2->continueOp != REOP_ENDALT);
  1628. }
  1629. jump[0] = JUMP_OFFSET_HI(span);
  1630. jump[1] = JUMP_OFFSET_LO(span);
  1631. if (esp->continueNode->op != REOP_ALT) {
  1632. /*
  1633. * We must patch the offset at esp->endTermFixup
  1634. * as well, for the REOP_ALTPREREQ{,2} opcodes.
  1635. * If we're unlucky and endTermFixup is more than
  1636. * OFFSET_MAX bytes from its target, we cheat by
  1637. * jumping 6 bytes to the jump whose offset is at
  1638. * esp->nextTermFixup, which has the same target.
  1639. */
  1640. jump = esp->endTermFixup;
  1641. header = esp->nextTermFixup - jump;
  1642. span += header;
  1643. if ((size_t)span > OFFSET_MAX)
  1644. span = header;
  1645. jump[0] = JUMP_OFFSET_HI(span);
  1646. jump[1] = JUMP_OFFSET_LO(span);
  1647. }
  1648. esp->jumpToJumpFlag = JS_TRUE;
  1649. }
  1650. }
  1651. }
  1652. break;
  1653. case REOP_ALT:
  1654. JS_ASSERT(emitStateSP);
  1655. emitStateSP->altHead = pc - 1;
  1656. emitStateSP->nextAltFixup = pc; /* offset to next alternate */
  1657. pc += OFFSET_LEN;
  1658. emitStateSP->continueNode = t;
  1659. emitStateSP->continueOp = REOP_JUMP;
  1660. emitStateSP->jumpToJumpFlag = JS_FALSE;
  1661. ++emitStateSP;
  1662. JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
  1663. t = (RENode *) t->kid;
  1664. op = t->op;
  1665. JS_ASSERT(op < REOP_LIMIT);
  1666. continue;
  1667. case REOP_FLAT:
  1668. /*
  1669. * Coalesce FLATs if possible and if it would not increase bytecode
  1670. * beyond preallocated limit. The latter happens only when bytecode
  1671. * size for coalesced string with offset p and length 2 exceeds 6
  1672. * bytes preallocated for 2 single char nodes, i.e. when
  1673. * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or
  1674. * GetCompactIndexWidth(p) > 4.
  1675. * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more
  1676. * nodes strictly decreases bytecode size, the check has to be
  1677. * done only for the first coalescing.
  1678. */
  1679. if (t->kid &&
  1680. GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4)
  1681. {
  1682. while (t->next &&
  1683. t->next->op == REOP_FLAT &&
  1684. (jschar*)t->kid + t->u.flat.length ==
  1685. (jschar*)t->next->kid) {
  1686. t->u.flat.length += t->next->u.flat.length;
  1687. t->next = t->next->next;
  1688. }
  1689. }
  1690. if (t->kid && t->u.flat.length > 1) {
  1691. pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT;
  1692. pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin);
  1693. pc = WriteCompactIndex(pc, t->u.flat.length);
  1694. } else if (t->u.flat.chr < 256) {
  1695. pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1;
  1696. *pc++ = (jsbytecode) t->u.flat.chr;
  1697. } else {
  1698. pc[-1] = (state->flags & JSREG_FOLD)
  1699. ? REOP_UCFLAT1i
  1700. : REOP_UCFLAT1;
  1701. SET_ARG(pc, t->u.flat.chr);
  1702. pc += ARG_LEN;
  1703. }
  1704. break;
  1705. case REOP_LPAREN:
  1706. JS_ASSERT(emitStateSP);
  1707. pc = WriteCompactIndex(pc, t->u.parenIndex);
  1708. emitStateSP->continueNode = t;
  1709. emitStateSP->continueOp = REOP_RPAREN;
  1710. ++emitStateSP;
  1711. JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
  1712. t = (RENode *) t->kid;
  1713. op = t->op;
  1714. continue;
  1715. case REOP_RPAREN:
  1716. pc = WriteCompactIndex(pc, t->u.parenIndex);
  1717. break;
  1718. case REOP_BACKREF:
  1719. pc = WriteCompactIndex(pc, t->u.parenIndex);
  1720. break;
  1721. case REOP_ASSERT:
  1722. JS_ASSERT(emitStateSP);
  1723. emitStateSP->nextTermFixup = pc;
  1724. pc += OFFSET_LEN;
  1725. emitStateSP->continueNode = t;
  1726. emitStateSP->continueOp = REOP_ASSERTTEST;
  1727. ++emitStateSP;
  1728. JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
  1729. t = (RENode *) t->kid;
  1730. op = t->op;
  1731. continue;
  1732. case REOP_ASSERTTEST:
  1733. case REOP_ASSERTNOTTEST:
  1734. if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc))
  1735. goto jump_too_big;
  1736. break;
  1737. case REOP_ASSERT_NOT:
  1738. JS_ASSERT(emitStateSP);
  1739. emitStateSP->nextTermFixup = pc;
  1740. pc += OFFSET_LEN;
  1741. emitStateSP->continueNode = t;
  1742. emitStateSP->continueOp = REOP_ASSERTNOTTEST;
  1743. ++emitStateSP;
  1744. JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth);
  1745. t = (RENode *) t->kid;
  1746. op = t->op;
  1747. continue;
  1748. case REOP_QUANT:
  1749. JS_ASSERT(emitStateSP);
  1750. if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) {
  1751. pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
  1752. } else if (t->u.range.min == 0 && t->u.range.max == 1) {
  1753. pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
  1754. } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) {
  1755. pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
  1756. } else {
  1757. if (!t->u.range.greedy)
  1758. pc[-1] = REOP_M