PageRenderTime 60ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/js/src/frontend/TokenStream.cpp

http://github.com/zpao/v8monkey
C++ | 2284 lines | 2005 code | 109 blank | 170 comment | 346 complexity | d59dae5f9466e64230357dc615a50a25 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-3.0, AGPL-1.0, LGPL-2.1, BSD-3-Clause, GPL-2.0, JSON, Apache-2.0, 0BSD
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  2. * vim: set ts=8 sw=4 et tw=99:
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is Mozilla Communicator client code, released
  18. * March 31, 1998.
  19. *
  20. * The Initial Developer of the Original Code is
  21. * Netscape Communications Corporation.
  22. * Portions created by the Initial Developer are Copyright (C) 1998
  23. * the Initial Developer. All Rights Reserved.
  24. *
  25. * Contributor(s):
  26. * Nick Fitzgerald <nfitzgerald@mozilla.com>
  27. *
  28. * Alternatively, the contents of this file may be used under the terms of
  29. * either of the GNU General Public License Version 2 or later (the "GPL"),
  30. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  31. * in which case the provisions of the GPL or the LGPL are applicable instead
  32. * of those above. If you wish to allow use of your version of this file only
  33. * under the terms of either the GPL or the LGPL, and not to allow others to
  34. * use your version of this file under the terms of the MPL, indicate your
  35. * decision by deleting the provisions above and replace them with the notice
  36. * and other provisions required by the GPL or the LGPL. If you do not delete
  37. * the provisions above, a recipient may use your version of this file under
  38. * the terms of any one of the MPL, the GPL or the LGPL.
  39. *
  40. * ***** END LICENSE BLOCK ***** */
  41. /*
  42. * JS lexical scanner.
  43. */
  44. #include <stdio.h> /* first to avoid trouble on some systems */
  45. #include <errno.h>
  46. #include <limits.h>
  47. #include <math.h>
  48. #ifdef HAVE_MEMORY_H
  49. #include <memory.h>
  50. #endif
  51. #include <stdarg.h>
  52. #include <stdlib.h>
  53. #include <string.h>
  54. #include "jstypes.h"
  55. #include "jsutil.h"
  56. #include "jsprf.h"
  57. #include "jsapi.h"
  58. #include "jsatom.h"
  59. #include "jscntxt.h"
  60. #include "jsversion.h"
  61. #include "jsexn.h"
  62. #include "jsnum.h"
  63. #include "jsopcode.h"
  64. #include "jsscript.h"
  65. #include "frontend/BytecodeEmitter.h"
  66. #include "frontend/Parser.h"
  67. #include "frontend/TokenStream.h"
  68. #include "vm/RegExpObject.h"
  69. #include "jsscriptinlines.h"
  70. #if JS_HAS_XML_SUPPORT
  71. #include "jsxml.h"
  72. #endif
  73. using namespace js;
  74. using namespace js::unicode;
  75. #define JS_KEYWORD(keyword, type, op, version) \
  76. const char js_##keyword##_str[] = #keyword;
  77. #include "jskeyword.tbl"
  78. #undef JS_KEYWORD
  79. static const KeywordInfo keywords[] = {
  80. #define JS_KEYWORD(keyword, type, op, version) \
  81. {js_##keyword##_str, type, op, version},
  82. #include "jskeyword.tbl"
  83. #undef JS_KEYWORD
  84. };
  85. const KeywordInfo *
  86. js::FindKeyword(const jschar *s, size_t length)
  87. {
  88. JS_ASSERT(length != 0);
  89. register size_t i;
  90. const struct KeywordInfo *kw;
  91. const char *chars;
  92. #define JSKW_LENGTH() length
  93. #define JSKW_AT(column) s[column]
  94. #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
  95. #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
  96. #define JSKW_NO_MATCH() goto no_match;
  97. #include "jsautokw.h"
  98. #undef JSKW_NO_MATCH
  99. #undef JSKW_TEST_GUESS
  100. #undef JSKW_GOT_MATCH
  101. #undef JSKW_AT
  102. #undef JSKW_LENGTH
  103. got_match:
  104. return &keywords[i];
  105. test_guess:
  106. kw = &keywords[i];
  107. chars = kw->chars;
  108. do {
  109. if (*s++ != (unsigned char)(*chars++))
  110. goto no_match;
  111. } while (--length != 0);
  112. return kw;
  113. no_match:
  114. return NULL;
  115. }
  116. JSBool
  117. js::IsIdentifier(JSLinearString *str)
  118. {
  119. const jschar *chars = str->chars();
  120. size_t length = str->length();
  121. if (length == 0)
  122. return JS_FALSE;
  123. jschar c = *chars;
  124. if (!IsIdentifierStart(c))
  125. return JS_FALSE;
  126. const jschar *end = chars + length;
  127. while (++chars != end) {
  128. c = *chars;
  129. if (!IsIdentifierPart(c))
  130. return JS_FALSE;
  131. }
  132. return JS_TRUE;
  133. }
  134. #ifdef _MSC_VER
  135. #pragma warning(push)
  136. #pragma warning(disable:4351)
  137. #endif
  138. /* Initialize members that aren't initialized in |init|. */
  139. TokenStream::TokenStream(JSContext *cx, JSPrincipals *prin, JSPrincipals *originPrin)
  140. : tokens(), cursor(), lookahead(), flags(), listenerTSData(), tokenbuf(cx),
  141. cx(cx), originPrincipals(originPrin ? originPrin : prin)
  142. {
  143. if (originPrincipals)
  144. JSPRINCIPALS_HOLD(cx, originPrincipals);
  145. }
  146. #ifdef _MSC_VER
  147. #pragma warning(pop)
  148. #endif
  149. bool
  150. TokenStream::init(const jschar *base, size_t length, const char *fn, uintN ln, JSVersion v)
  151. {
  152. filename = fn;
  153. lineno = ln;
  154. version = v;
  155. xml = VersionHasXML(v);
  156. userbuf.init(base, length);
  157. linebase = base;
  158. prevLinebase = NULL;
  159. sourceMap = NULL;
  160. JSSourceHandler listener = cx->debugHooks->sourceHandler;
  161. void *listenerData = cx->debugHooks->sourceHandlerData;
  162. if (listener)
  163. listener(fn, ln, base, length, &listenerTSData, listenerData);
  164. /*
  165. * This table holds all the token kinds that satisfy these properties:
  166. * - A single char long.
  167. * - Cannot be a prefix of any longer token (eg. '+' is excluded because
  168. * '+=' is a valid token).
  169. * - Doesn't need tp->t_op set (eg. this excludes '~').
  170. *
  171. * The few token kinds satisfying these properties cover roughly 35--45%
  172. * of the tokens seen in practice.
  173. *
  174. * Nb: oneCharTokens, maybeEOL and maybeStrSpecial could be static, but
  175. * initializing them this way is a bit easier. Don't worry, the time to
  176. * initialize them for each TokenStream is trivial. See bug 639420.
  177. */
  178. memset(oneCharTokens, 0, sizeof(oneCharTokens));
  179. oneCharTokens[unsigned(';')] = TOK_SEMI;
  180. oneCharTokens[unsigned(',')] = TOK_COMMA;
  181. oneCharTokens[unsigned('?')] = TOK_HOOK;
  182. oneCharTokens[unsigned('[')] = TOK_LB;
  183. oneCharTokens[unsigned(']')] = TOK_RB;
  184. oneCharTokens[unsigned('{')] = TOK_LC;
  185. oneCharTokens[unsigned('}')] = TOK_RC;
  186. oneCharTokens[unsigned('(')] = TOK_LP;
  187. oneCharTokens[unsigned(')')] = TOK_RP;
  188. /* See getChar() for an explanation of maybeEOL[]. */
  189. memset(maybeEOL, 0, sizeof(maybeEOL));
  190. maybeEOL[unsigned('\n')] = true;
  191. maybeEOL[unsigned('\r')] = true;
  192. maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
  193. maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
  194. /* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
  195. memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
  196. maybeStrSpecial[unsigned('"')] = true;
  197. maybeStrSpecial[unsigned('\'')] = true;
  198. maybeStrSpecial[unsigned('\\')] = true;
  199. maybeStrSpecial[unsigned('\n')] = true;
  200. maybeStrSpecial[unsigned('\r')] = true;
  201. maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
  202. maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
  203. maybeStrSpecial[unsigned(EOF & 0xff)] = true;
  204. /*
  205. * Set |ln| as the beginning line number of the ungot "current token", so
  206. * that js::Parser::statements (and potentially other such methods, in the
  207. * future) can create parse nodes with good source coordinates before they
  208. * explicitly get any tokens.
  209. *
  210. * Switching the parser/lexer so we always get the next token ahead of the
  211. * parser needing it (the so-called "pump-priming" model) might be a better
  212. * way to address the dependency from statements on the current token.
  213. */
  214. tokens[0].pos.begin.lineno = tokens[0].pos.end.lineno = ln;
  215. return true;
  216. }
  217. TokenStream::~TokenStream()
  218. {
  219. if (flags & TSF_OWNFILENAME)
  220. cx->free_((void *) filename);
  221. if (sourceMap)
  222. cx->free_(sourceMap);
  223. if (originPrincipals)
  224. JSPRINCIPALS_DROP(cx, originPrincipals);
  225. }
  226. /* Use the fastest available getc. */
  227. #if defined(HAVE_GETC_UNLOCKED)
  228. # define fast_getc getc_unlocked
  229. #elif defined(HAVE__GETC_NOLOCK)
  230. # define fast_getc _getc_nolock
  231. #else
  232. # define fast_getc getc
  233. #endif
  234. JS_ALWAYS_INLINE void
  235. TokenStream::updateLineInfoForEOL()
  236. {
  237. prevLinebase = linebase;
  238. linebase = userbuf.addressOfNextRawChar();
  239. lineno++;
  240. }
  241. JS_ALWAYS_INLINE void
  242. TokenStream::updateFlagsForEOL()
  243. {
  244. flags &= ~TSF_DIRTYLINE;
  245. flags |= TSF_EOL;
  246. }
  247. /* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
  248. int32_t
  249. TokenStream::getChar()
  250. {
  251. int32_t c;
  252. if (JS_LIKELY(userbuf.hasRawChars())) {
  253. c = userbuf.getRawChar();
  254. /*
  255. * Normalize the jschar if it was a newline. We need to detect any of
  256. * these four characters: '\n' (0x000a), '\r' (0x000d),
  257. * LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each
  258. * one in turn is slow, so we use a single probabilistic check, and if
  259. * that succeeds, test for them individually.
  260. *
  261. * We use the bottom 8 bits to index into a lookup table, succeeding
  262. * when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which
  263. * are by the far the most common) this gives false positives for '('
  264. * (0x0028) and ')' (0x0029). We could avoid those by incorporating
  265. * the 13th bit of d into the lookup, but that requires extra shifting
  266. * and masking and isn't worthwhile. See TokenStream::init() for the
  267. * initialization of the relevant entries in the table.
  268. */
  269. if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
  270. if (c == '\n')
  271. goto eol;
  272. if (c == '\r') {
  273. /* if it's a \r\n sequence: treat as a single EOL, skip over the \n */
  274. if (userbuf.hasRawChars())
  275. userbuf.matchRawChar('\n');
  276. goto eol;
  277. }
  278. if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
  279. goto eol;
  280. }
  281. return c;
  282. }
  283. flags |= TSF_EOF;
  284. return EOF;
  285. eol:
  286. updateLineInfoForEOL();
  287. return '\n';
  288. }
  289. /*
  290. * This gets the next char. It does nothing special with EOL sequences, not
  291. * even updating the line counters. It can be used safely if (a) the
  292. * resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
  293. * it's an EOL, and (b) the line-related state (lineno, linebase) is not used
  294. * before it's ungotten.
  295. */
  296. int32_t
  297. TokenStream::getCharIgnoreEOL()
  298. {
  299. if (JS_LIKELY(userbuf.hasRawChars()))
  300. return userbuf.getRawChar();
  301. flags |= TSF_EOF;
  302. return EOF;
  303. }
  304. void
  305. TokenStream::ungetChar(int32_t c)
  306. {
  307. if (c == EOF)
  308. return;
  309. JS_ASSERT(!userbuf.atStart());
  310. userbuf.ungetRawChar();
  311. if (c == '\n') {
  312. #ifdef DEBUG
  313. int32_t c2 = userbuf.peekRawChar();
  314. JS_ASSERT(TokenBuf::isRawEOLChar(c2));
  315. #endif
  316. /* if it's a \r\n sequence, also unget the \r */
  317. if (!userbuf.atStart())
  318. userbuf.matchRawCharBackwards('\r');
  319. JS_ASSERT(prevLinebase); /* we should never get more than one EOL char */
  320. linebase = prevLinebase;
  321. prevLinebase = NULL;
  322. lineno--;
  323. } else {
  324. JS_ASSERT(userbuf.peekRawChar() == c);
  325. }
  326. }
  327. void
  328. TokenStream::ungetCharIgnoreEOL(int32_t c)
  329. {
  330. if (c == EOF)
  331. return;
  332. JS_ASSERT(!userbuf.atStart());
  333. userbuf.ungetRawChar();
  334. }
  335. /*
  336. * Return true iff |n| raw characters can be read from this without reading past
  337. * EOF or a newline, and copy those characters into |cp| if so. The characters
  338. * are not consumed: use skipChars(n) to do so after checking that the consumed
  339. * characters had appropriate values.
  340. */
  341. bool
  342. TokenStream::peekChars(intN n, jschar *cp)
  343. {
  344. intN i, j;
  345. int32_t c;
  346. for (i = 0; i < n; i++) {
  347. c = getCharIgnoreEOL();
  348. if (c == EOF)
  349. break;
  350. if (c == '\n') {
  351. ungetCharIgnoreEOL(c);
  352. break;
  353. }
  354. cp[i] = (jschar)c;
  355. }
  356. for (j = i - 1; j >= 0; j--)
  357. ungetCharIgnoreEOL(cp[j]);
  358. return i == n;
  359. }
  360. const jschar *
  361. TokenStream::TokenBuf::findEOL()
  362. {
  363. const jschar *tmp = ptr;
  364. #ifdef DEBUG
  365. /*
  366. * This is the one exception to the "TokenBuf isn't accessed after
  367. * poisoning" rule -- we may end up calling findEOL() in order to set up
  368. * an error.
  369. */
  370. if (!tmp)
  371. tmp = ptrWhenPoisoned;
  372. #endif
  373. while (true) {
  374. if (tmp >= limit)
  375. break;
  376. if (TokenBuf::isRawEOLChar(*tmp++))
  377. break;
  378. }
  379. return tmp;
  380. }
  381. bool
  382. TokenStream::reportCompileErrorNumberVA(ParseNode *pn, uintN flags, uintN errorNumber, va_list ap)
  383. {
  384. JSErrorReport report;
  385. char *message;
  386. jschar *linechars;
  387. char *linebytes;
  388. bool warning;
  389. JSBool ok;
  390. const TokenPos *tp;
  391. uintN i;
  392. if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
  393. return true;
  394. warning = JSREPORT_IS_WARNING(flags);
  395. if (warning && cx->hasWErrorOption()) {
  396. flags &= ~JSREPORT_WARNING;
  397. warning = false;
  398. }
  399. PodZero(&report);
  400. report.flags = flags;
  401. report.errorNumber = errorNumber;
  402. message = NULL;
  403. linechars = NULL;
  404. linebytes = NULL;
  405. MUST_FLOW_THROUGH("out");
  406. ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
  407. errorNumber, &message, &report,
  408. !(flags & JSREPORT_UC), ap);
  409. if (!ok) {
  410. warning = false;
  411. goto out;
  412. }
  413. report.filename = filename;
  414. report.originPrincipals = originPrincipals;
  415. tp = pn ? &pn->pn_pos : &currentToken().pos;
  416. report.lineno = tp->begin.lineno;
  417. /*
  418. * Given a token, T, that we want to complain about: if T's (starting)
  419. * lineno doesn't match TokenStream's lineno, that means we've scanned past
  420. * the line that T starts on, which makes it hard to print some or all of
  421. * T's (starting) line for context.
  422. *
  423. * So we don't even try, leaving report.linebuf and friends zeroed. This
  424. * means that any error involving a multi-line token (eg. an unterminated
  425. * multi-line string literal) won't have a context printed.
  426. */
  427. if (report.lineno == lineno) {
  428. size_t linelength = userbuf.findEOL() - linebase;
  429. linechars = (jschar *)cx->malloc_((linelength + 1) * sizeof(jschar));
  430. if (!linechars) {
  431. warning = false;
  432. goto out;
  433. }
  434. PodCopy(linechars, linebase, linelength);
  435. linechars[linelength] = 0;
  436. linebytes = DeflateString(cx, linechars, linelength);
  437. if (!linebytes) {
  438. warning = false;
  439. goto out;
  440. }
  441. /* Unicode and char versions of the offending source line, without final \n */
  442. report.linebuf = linebytes;
  443. report.uclinebuf = linechars;
  444. /* The lineno check above means we should only see single-line tokens here. */
  445. JS_ASSERT(tp->begin.lineno == tp->end.lineno);
  446. report.tokenptr = report.linebuf + tp->begin.index;
  447. report.uctokenptr = report.uclinebuf + tp->begin.index;
  448. }
  449. /*
  450. * If there's a runtime exception type associated with this error
  451. * number, set that as the pending exception. For errors occuring at
  452. * compile time, this is very likely to be a JSEXN_SYNTAXERR.
  453. *
  454. * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
  455. * flag will be set in report.flags. Proper behavior for an error
  456. * reporter is to ignore a report with this flag for all but top-level
  457. * compilation errors. The exception will remain pending, and so long
  458. * as the non-top-level "load", "eval", or "compile" native function
  459. * returns false, the top-level reporter will eventually receive the
  460. * uncaught exception report.
  461. */
  462. if (!js_ErrorToException(cx, message, &report, NULL, NULL)) {
  463. /*
  464. * If debugErrorHook is present then we give it a chance to veto
  465. * sending the error on to the regular error reporter.
  466. */
  467. bool reportError = true;
  468. if (JSDebugErrorHook hook = cx->debugHooks->debugErrorHook)
  469. reportError = hook(cx, message, &report, cx->debugHooks->debugErrorHookData);
  470. /* Report the error */
  471. if (reportError && cx->errorReporter)
  472. cx->errorReporter(cx, message, &report);
  473. }
  474. out:
  475. if (linebytes)
  476. cx->free_(linebytes);
  477. if (linechars)
  478. cx->free_(linechars);
  479. if (message)
  480. cx->free_(message);
  481. if (report.ucmessage)
  482. cx->free_((void *)report.ucmessage);
  483. if (report.messageArgs) {
  484. if (!(flags & JSREPORT_UC)) {
  485. i = 0;
  486. while (report.messageArgs[i])
  487. cx->free_((void *)report.messageArgs[i++]);
  488. }
  489. cx->free_((void *)report.messageArgs);
  490. }
  491. return warning;
  492. }
  493. bool
  494. js::ReportStrictModeError(JSContext *cx, TokenStream *ts, TreeContext *tc, ParseNode *pn,
  495. uintN errorNumber, ...)
  496. {
  497. JS_ASSERT(ts || tc);
  498. JS_ASSERT(cx == ts->getContext());
  499. /* In strict mode code, this is an error, not merely a warning. */
  500. uintN flags;
  501. if ((ts && ts->isStrictMode()) || (tc && (tc->flags & TCF_STRICT_MODE_CODE))) {
  502. flags = JSREPORT_ERROR;
  503. } else {
  504. if (!cx->hasStrictOption())
  505. return true;
  506. flags = JSREPORT_WARNING;
  507. }
  508. va_list ap;
  509. va_start(ap, errorNumber);
  510. bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
  511. va_end(ap);
  512. return result;
  513. }
  514. bool
  515. js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, ParseNode *pn, uintN flags,
  516. uintN errorNumber, ...)
  517. {
  518. va_list ap;
  519. /*
  520. * We don't accept a TreeContext argument, so we can't implement
  521. * JSREPORT_STRICT_MODE_ERROR here. Use ReportStrictModeError instead,
  522. * or do the checks in the caller and pass plain old JSREPORT_ERROR.
  523. */
  524. JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
  525. va_start(ap, errorNumber);
  526. JS_ASSERT(cx == ts->getContext());
  527. bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
  528. va_end(ap);
  529. return result;
  530. }
  531. #if JS_HAS_XML_SUPPORT
  532. bool
  533. TokenStream::getXMLEntity()
  534. {
  535. ptrdiff_t offset, length, i;
  536. int c, d;
  537. JSBool ispair;
  538. jschar *bp, digit;
  539. char *bytes;
  540. JSErrNum msg;
  541. CharBuffer &tb = tokenbuf;
  542. /* Put the entity, including the '&' already scanned, in tokenbuf. */
  543. offset = tb.length();
  544. if (!tb.append('&'))
  545. return false;
  546. while ((c = getChar()) != ';') {
  547. if (c == EOF || c == '\n') {
  548. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
  549. return false;
  550. }
  551. if (!tb.append(c))
  552. return false;
  553. }
  554. /* Let length be the number of jschars after the '&', including the ';'. */
  555. length = tb.length() - offset;
  556. bp = tb.begin() + offset;
  557. c = d = 0;
  558. ispair = false;
  559. if (length > 2 && bp[1] == '#') {
  560. /* Match a well-formed XML Character Reference. */
  561. i = 2;
  562. if (length > 3 && (bp[i] == 'x' || bp[i] == 'X')) {
  563. if (length > 9) /* at most 6 hex digits allowed */
  564. goto badncr;
  565. while (++i < length) {
  566. digit = bp[i];
  567. if (!JS7_ISHEX(digit))
  568. goto badncr;
  569. c = (c << 4) + JS7_UNHEX(digit);
  570. }
  571. } else {
  572. while (i < length) {
  573. digit = bp[i++];
  574. if (!JS7_ISDEC(digit))
  575. goto badncr;
  576. c = (c * 10) + JS7_UNDEC(digit);
  577. if (c < 0)
  578. goto badncr;
  579. }
  580. }
  581. if (0x10000 <= c && c <= 0x10FFFF) {
  582. /* Form a surrogate pair (c, d) -- c is the high surrogate. */
  583. d = 0xDC00 + (c & 0x3FF);
  584. c = 0xD7C0 + (c >> 10);
  585. ispair = true;
  586. } else {
  587. /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
  588. if (c != 0x9 && c != 0xA && c != 0xD &&
  589. !(0x20 <= c && c <= 0xD7FF) &&
  590. !(0xE000 <= c && c <= 0xFFFD)) {
  591. goto badncr;
  592. }
  593. }
  594. } else {
  595. /* Try to match one of the five XML 1.0 predefined entities. */
  596. switch (length) {
  597. case 3:
  598. if (bp[2] == 't') {
  599. if (bp[1] == 'l')
  600. c = '<';
  601. else if (bp[1] == 'g')
  602. c = '>';
  603. }
  604. break;
  605. case 4:
  606. if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
  607. c = '&';
  608. break;
  609. case 5:
  610. if (bp[3] == 'o') {
  611. if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
  612. c = '\'';
  613. else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
  614. c = '"';
  615. }
  616. break;
  617. }
  618. if (c == 0) {
  619. msg = JSMSG_UNKNOWN_XML_ENTITY;
  620. goto bad;
  621. }
  622. }
  623. /* If we matched, retract tokenbuf and store the entity's value. */
  624. *bp++ = (jschar) c;
  625. if (ispair)
  626. *bp++ = (jschar) d;
  627. tb.shrinkBy(tb.end() - bp);
  628. return true;
  629. badncr:
  630. msg = JSMSG_BAD_XML_NCR;
  631. bad:
  632. /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
  633. JS_ASSERT((tb.end() - bp) >= 1);
  634. bytes = DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
  635. if (bytes) {
  636. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
  637. cx->free_(bytes);
  638. }
  639. return false;
  640. }
  641. bool
  642. TokenStream::getXMLTextOrTag(TokenKind *ttp, Token **tpp)
  643. {
  644. TokenKind tt;
  645. int c, qc;
  646. Token *tp;
  647. JSAtom *atom;
  648. /*
  649. * Look for XML text.
  650. */
  651. if (flags & TSF_XMLTEXTMODE) {
  652. tt = TOK_XMLSPACE; /* veto if non-space, return TOK_XMLTEXT */
  653. tp = newToken(0);
  654. tokenbuf.clear();
  655. qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
  656. while ((c = getChar()) != qc && c != '<' && c != EOF) {
  657. if (c == '&' && qc == '<') {
  658. if (!getXMLEntity())
  659. goto error;
  660. tt = TOK_XMLTEXT;
  661. continue;
  662. }
  663. if (!IsXMLSpace(c))
  664. tt = TOK_XMLTEXT;
  665. if (!tokenbuf.append(c))
  666. goto error;
  667. }
  668. ungetChar(c);
  669. if (tokenbuf.empty()) {
  670. atom = NULL;
  671. } else {
  672. atom = atomize(cx, tokenbuf);
  673. if (!atom)
  674. goto error;
  675. }
  676. tp->pos.end.lineno = lineno;
  677. tp->setAtom(JSOP_STRING, atom);
  678. goto out;
  679. }
  680. /*
  681. * XML tags.
  682. */
  683. else {
  684. JS_ASSERT(flags & TSF_XMLTAGMODE);
  685. tp = newToken(0);
  686. c = getChar();
  687. if (c != EOF && IsXMLSpace(c)) {
  688. do {
  689. c = getChar();
  690. if (c == EOF)
  691. break;
  692. } while (IsXMLSpace(c));
  693. ungetChar(c);
  694. tp->pos.end.lineno = lineno;
  695. tt = TOK_XMLSPACE;
  696. goto out;
  697. }
  698. if (c == EOF) {
  699. tt = TOK_EOF;
  700. goto out;
  701. }
  702. tokenbuf.clear();
  703. if (IsXMLNamespaceStart(c)) {
  704. JSBool sawColon = JS_FALSE;
  705. if (!tokenbuf.append(c))
  706. goto error;
  707. while ((c = getChar()) != EOF && IsXMLNamePart(c)) {
  708. if (c == ':') {
  709. int nextc;
  710. if (sawColon ||
  711. (nextc = peekChar(),
  712. ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
  713. !IsXMLNamePart(nextc))) {
  714. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  715. JSMSG_BAD_XML_QNAME);
  716. goto error;
  717. }
  718. sawColon = JS_TRUE;
  719. }
  720. if (!tokenbuf.append(c))
  721. goto error;
  722. }
  723. ungetChar(c);
  724. atom = atomize(cx, tokenbuf);
  725. if (!atom)
  726. goto error;
  727. tp->setAtom(JSOP_STRING, atom);
  728. tt = TOK_XMLNAME;
  729. goto out;
  730. }
  731. switch (c) {
  732. case '{':
  733. if (flags & TSF_XMLONLYMODE)
  734. goto bad_xml_char;
  735. tt = TOK_LC;
  736. goto out;
  737. case '=':
  738. tt = TOK_ASSIGN;
  739. goto out;
  740. case '"':
  741. case '\'':
  742. qc = c;
  743. while ((c = getChar()) != qc) {
  744. if (c == EOF) {
  745. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  746. JSMSG_UNTERMINATED_STRING);
  747. goto error;
  748. }
  749. /*
  750. * XML attribute values are double-quoted when pretty-printed,
  751. * so escape " if it is expressed directly in a single-quoted
  752. * attribute value.
  753. */
  754. if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
  755. JS_ASSERT(qc == '\'');
  756. if (!tokenbuf.append(js_quot_entity_str,
  757. strlen(js_quot_entity_str)))
  758. goto error;
  759. continue;
  760. }
  761. if (c == '&' && (flags & TSF_XMLONLYMODE)) {
  762. if (!getXMLEntity())
  763. goto error;
  764. continue;
  765. }
  766. if (!tokenbuf.append(c))
  767. goto error;
  768. }
  769. atom = atomize(cx, tokenbuf);
  770. if (!atom)
  771. goto error;
  772. tp->pos.end.lineno = lineno;
  773. tp->setAtom(JSOP_STRING, atom);
  774. tt = TOK_XMLATTR;
  775. goto out;
  776. case '>':
  777. tt = TOK_XMLTAGC;
  778. goto out;
  779. case '/':
  780. if (matchChar('>')) {
  781. tt = TOK_XMLPTAGC;
  782. goto out;
  783. }
  784. /* FALL THROUGH */
  785. bad_xml_char:
  786. default:
  787. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
  788. goto error;
  789. }
  790. JS_NOT_REACHED("getXMLTextOrTag 1");
  791. }
  792. JS_NOT_REACHED("getXMLTextOrTag 2");
  793. out:
  794. *ttp = tt;
  795. *tpp = tp;
  796. return true;
  797. error:
  798. *ttp = TOK_ERROR;
  799. *tpp = tp;
  800. return false;
  801. }
  802. /*
  803. * After much testing, it's clear that Postel's advice to protocol designers
  804. * ("be liberal in what you accept, and conservative in what you send") invites
  805. * a natural-law repercussion for JS as "protocol":
  806. *
  807. * "If you are liberal in what you accept, others will utterly fail to be
  808. * conservative in what they send."
  809. *
  810. * Which means you will get <!-- comments to end of line in the middle of .js
  811. * files, and after if conditions whose then statements are on the next line,
  812. * and other wonders. See at least the following bugs:
  813. * - https://bugzilla.mozilla.org/show_bug.cgi?id=309242
  814. * - https://bugzilla.mozilla.org/show_bug.cgi?id=309712
  815. * - https://bugzilla.mozilla.org/show_bug.cgi?id=310993
  816. *
  817. * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan an XML
  818. * comment or CDATA literal. Instead, we always scan <! as the start of an
  819. * HTML comment hack to end of line, used since Netscape 2 to hide script tag
  820. * content from script-unaware browsers.
  821. *
  822. * But this still leaves XML resources with certain internal structure
  823. * vulnerable to being loaded as script cross-origin, and some internal data
  824. * stolen, so for Firefox 3.5 and beyond, we reject programs whose source
  825. * consists only of XML literals. See:
  826. *
  827. * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
  828. *
  829. * The check for this is in js::frontend::CompileScript.
  830. */
  831. bool
  832. TokenStream::getXMLMarkup(TokenKind *ttp, Token **tpp)
  833. {
  834. TokenKind tt;
  835. int c;
  836. Token *tp = *tpp;
  837. /* Check for XML comment or CDATA section. */
  838. if (matchChar('!')) {
  839. tokenbuf.clear();
  840. /* Scan XML comment. */
  841. if (matchChar('-')) {
  842. if (!matchChar('-'))
  843. goto bad_xml_markup;
  844. while ((c = getChar()) != '-' || !matchChar('-')) {
  845. if (c == EOF)
  846. goto bad_xml_markup;
  847. if (!tokenbuf.append(c))
  848. goto error;
  849. }
  850. if (!matchChar('>'))
  851. goto bad_xml_markup;
  852. JSAtom *commentText = atomize(cx, tokenbuf);
  853. if (!commentText)
  854. goto error;
  855. tp->setAtom(JSOP_XMLCOMMENT, commentText);
  856. tp->pos.end.lineno = lineno;
  857. tt = TOK_XMLCOMMENT;
  858. goto out;
  859. }
  860. /* Scan CDATA section. */
  861. if (matchChar('[')) {
  862. jschar cp[6];
  863. if (peekChars(6, cp) &&
  864. cp[0] == 'C' &&
  865. cp[1] == 'D' &&
  866. cp[2] == 'A' &&
  867. cp[3] == 'T' &&
  868. cp[4] == 'A' &&
  869. cp[5] == '[') {
  870. skipChars(6);
  871. while ((c = getChar()) != ']' ||
  872. !peekChars(2, cp) ||
  873. cp[0] != ']' ||
  874. cp[1] != '>') {
  875. if (c == EOF)
  876. goto bad_xml_markup;
  877. if (!tokenbuf.append(c))
  878. goto error;
  879. }
  880. consumeKnownChar(']');
  881. consumeKnownChar('>');
  882. JSAtom *cdataContent = atomize(cx, tokenbuf);
  883. if (!cdataContent)
  884. goto error;
  885. tp->setAtom(JSOP_XMLCDATA, cdataContent);
  886. tp->pos.end.lineno = lineno;
  887. tt = TOK_XMLCDATA;
  888. goto out;
  889. }
  890. goto bad_xml_markup;
  891. }
  892. }
  893. /* Check for processing instruction. */
  894. if (matchChar('?')) {
  895. bool inTarget = true;
  896. size_t targetLength = 0;
  897. ptrdiff_t contentIndex = -1;
  898. tokenbuf.clear();
  899. while ((c = getChar()) != '?' || peekChar() != '>') {
  900. if (c == EOF)
  901. goto bad_xml_markup;
  902. if (inTarget) {
  903. if (IsXMLSpace(c)) {
  904. if (tokenbuf.empty())
  905. goto bad_xml_markup;
  906. inTarget = false;
  907. } else {
  908. if (!(tokenbuf.empty()
  909. ? IsXMLNamespaceStart(c)
  910. : IsXMLNamespacePart(c))) {
  911. goto bad_xml_markup;
  912. }
  913. ++targetLength;
  914. }
  915. } else {
  916. if (contentIndex < 0 && !IsXMLSpace(c))
  917. contentIndex = tokenbuf.length();
  918. }
  919. if (!tokenbuf.append(c))
  920. goto error;
  921. }
  922. if (targetLength == 0)
  923. goto bad_xml_markup;
  924. JSAtom *data;
  925. if (contentIndex < 0) {
  926. data = cx->runtime->atomState.emptyAtom;
  927. } else {
  928. data = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
  929. tokenbuf.length() - contentIndex);
  930. if (!data)
  931. goto error;
  932. }
  933. tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
  934. consumeKnownChar('>');
  935. JSAtom *target = atomize(cx, tokenbuf);
  936. if (!target)
  937. goto error;
  938. tp->setProcessingInstruction(target->asPropertyName(), data);
  939. tp->pos.end.lineno = lineno;
  940. tt = TOK_XMLPI;
  941. goto out;
  942. }
  943. /* An XML start-of-tag character. */
  944. tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
  945. out:
  946. *ttp = tt;
  947. *tpp = tp;
  948. return true;
  949. bad_xml_markup:
  950. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
  951. error:
  952. *ttp = TOK_ERROR;
  953. *tpp = tp;
  954. return false;
  955. }
  956. #endif /* JS_HAS_XML_SUPPORT */
  957. /*
  958. * We have encountered a '\': check for a Unicode escape sequence after it.
  959. * Return 'true' and the character code value (by value) if we found a
  960. * Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
  961. * advance along the buffer.
  962. */
  963. bool
  964. TokenStream::peekUnicodeEscape(int *result)
  965. {
  966. jschar cp[5];
  967. if (peekChars(5, cp) && cp[0] == 'u' &&
  968. JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
  969. JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
  970. {
  971. *result = (((((JS7_UNHEX(cp[1]) << 4)
  972. + JS7_UNHEX(cp[2])) << 4)
  973. + JS7_UNHEX(cp[3])) << 4)
  974. + JS7_UNHEX(cp[4]);
  975. return true;
  976. }
  977. return false;
  978. }
  979. bool
  980. TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
  981. {
  982. if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
  983. skipChars(5);
  984. return true;
  985. }
  986. return false;
  987. }
  988. bool
  989. TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
  990. {
  991. if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
  992. skipChars(5);
  993. return true;
  994. }
  995. return false;
  996. }
  997. /*
  998. * Helper function which returns true if the first length(q) characters in p are
  999. * the same as the characters in q.
  1000. */
  1001. static bool
  1002. CharsMatch(const jschar *p, const char *q) {
  1003. while (*q) {
  1004. if (*p++ != *q++)
  1005. return false;
  1006. }
  1007. return true;
  1008. }
  1009. bool
  1010. TokenStream::getAtLine()
  1011. {
  1012. int c;
  1013. jschar cp[5];
  1014. uintN i, line, temp;
  1015. char filenameBuf[1024];
  1016. /*
  1017. * Hack for source filters such as the Mozilla XUL preprocessor:
  1018. * "//@line 123\n" sets the number of the *next* line after the
  1019. * comment to 123. If we reach here, we've already seen "//".
  1020. */
  1021. if (peekChars(5, cp) && CharsMatch(cp, "@line")) {
  1022. skipChars(5);
  1023. while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
  1024. continue;
  1025. if (JS7_ISDEC(c)) {
  1026. line = JS7_UNDEC(c);
  1027. while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
  1028. temp = 10 * line + JS7_UNDEC(c);
  1029. if (temp < line) {
  1030. /* Ignore overlarge line numbers. */
  1031. return true;
  1032. }
  1033. line = temp;
  1034. }
  1035. while (c != '\n' && c != EOF && IsSpaceOrBOM2(c))
  1036. c = getChar();
  1037. i = 0;
  1038. if (c == '"') {
  1039. while ((c = getChar()) != EOF && c != '"') {
  1040. if (c == '\n') {
  1041. ungetChar(c);
  1042. return true;
  1043. }
  1044. if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
  1045. return true;
  1046. filenameBuf[i++] = (char) c;
  1047. }
  1048. if (c == '"') {
  1049. while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
  1050. continue;
  1051. }
  1052. }
  1053. filenameBuf[i] = '\0';
  1054. if (c == EOF || c == '\n') {
  1055. if (i > 0) {
  1056. if (flags & TSF_OWNFILENAME)
  1057. cx->free_((void *) filename);
  1058. filename = JS_strdup(cx, filenameBuf);
  1059. if (!filename)
  1060. return false;
  1061. flags |= TSF_OWNFILENAME;
  1062. }
  1063. lineno = line;
  1064. }
  1065. }
  1066. ungetChar(c);
  1067. }
  1068. return true;
  1069. }
  1070. bool
  1071. TokenStream::getAtSourceMappingURL()
  1072. {
  1073. jschar peeked[18];
  1074. /* Match comments of the form @sourceMappingURL=<url> */
  1075. if (peekChars(18, peeked) && CharsMatch(peeked, "@sourceMappingURL=")) {
  1076. skipChars(18);
  1077. tokenbuf.clear();
  1078. jschar c;
  1079. while (!IsSpaceOrBOM2((c = getChar())) &&
  1080. c && c != jschar(EOF))
  1081. tokenbuf.append(c);
  1082. if (tokenbuf.empty())
  1083. /* The source map's URL was missing, but not quite an exception that
  1084. * we should stop and drop everything for, though. */
  1085. return true;
  1086. int len = tokenbuf.length();
  1087. if (sourceMap)
  1088. cx->free_(sourceMap);
  1089. sourceMap = (jschar *) cx->malloc_(sizeof(jschar) * (len + 1));
  1090. if (!sourceMap)
  1091. return false;
  1092. for (int i = 0; i < len; i++)
  1093. sourceMap[i] = tokenbuf[i];
  1094. sourceMap[len] = '\0';
  1095. }
  1096. return true;
  1097. }
  1098. Token *
  1099. TokenStream::newToken(ptrdiff_t adjust)
  1100. {
  1101. cursor = (cursor + 1) & ntokensMask;
  1102. Token *tp = &tokens[cursor];
  1103. tp->ptr = userbuf.addressOfNextRawChar() + adjust;
  1104. tp->pos.begin.index = tp->ptr - linebase;
  1105. tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
  1106. return tp;
  1107. }
  1108. JS_ALWAYS_INLINE JSAtom *
  1109. TokenStream::atomize(JSContext *cx, CharBuffer &cb)
  1110. {
  1111. return js_AtomizeChars(cx, cb.begin(), cb.length());
  1112. }
  1113. #ifdef DEBUG
  1114. bool
  1115. IsTokenSane(Token *tp)
  1116. {
  1117. /*
  1118. * Nb: TOK_EOL should never be used in an actual Token; it should only be
  1119. * returned as a TokenKind from peekTokenSameLine().
  1120. */
  1121. if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
  1122. return false;
  1123. if (tp->pos.begin.lineno == tp->pos.end.lineno) {
  1124. if (tp->pos.begin.index > tp->pos.end.index)
  1125. return false;
  1126. } else {
  1127. /* Only certain token kinds can be multi-line. */
  1128. switch (tp->type) {
  1129. case TOK_STRING:
  1130. case TOK_XMLATTR:
  1131. case TOK_XMLSPACE:
  1132. case TOK_XMLTEXT:
  1133. case TOK_XMLCOMMENT:
  1134. case TOK_XMLCDATA:
  1135. case TOK_XMLPI:
  1136. break;
  1137. default:
  1138. return false;
  1139. }
  1140. }
  1141. return true;
  1142. }
  1143. #endif
  1144. bool
  1145. TokenStream::putIdentInTokenbuf(const jschar *identStart)
  1146. {
  1147. int32_t c, qc;
  1148. const jschar *tmp = userbuf.addressOfNextRawChar();
  1149. userbuf.setAddressOfNextRawChar(identStart);
  1150. tokenbuf.clear();
  1151. for (;;) {
  1152. c = getCharIgnoreEOL();
  1153. if (!IsIdentifierPart(c)) {
  1154. if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
  1155. break;
  1156. c = qc;
  1157. }
  1158. if (!tokenbuf.append(c)) {
  1159. userbuf.setAddressOfNextRawChar(tmp);
  1160. return false;
  1161. }
  1162. }
  1163. userbuf.setAddressOfNextRawChar(tmp);
  1164. return true;
  1165. }
  1166. bool
  1167. TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp)
  1168. {
  1169. JS_ASSERT(!ttp == !topp);
  1170. const KeywordInfo *kw = FindKeyword(s, length);
  1171. if (!kw)
  1172. return true;
  1173. if (kw->tokentype == TOK_RESERVED) {
  1174. return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1175. JSMSG_RESERVED_ID, kw->chars);
  1176. }
  1177. if (kw->tokentype != TOK_STRICT_RESERVED) {
  1178. if (kw->version <= versionNumber()) {
  1179. /* Working keyword. */
  1180. if (ttp) {
  1181. *ttp = kw->tokentype;
  1182. *topp = (JSOp) kw->op;
  1183. return true;
  1184. }
  1185. return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1186. JSMSG_RESERVED_ID, kw->chars);
  1187. }
  1188. /*
  1189. * The keyword is not in this version. Treat it as an identifier,
  1190. * unless it is let or yield which we treat as TOK_STRICT_RESERVED by
  1191. * falling through to the code below (ES5 forbids them in strict mode).
  1192. */
  1193. if (kw->tokentype != TOK_LET && kw->tokentype != TOK_YIELD)
  1194. return true;
  1195. }
  1196. /* Strict reserved word. */
  1197. if (isStrictMode())
  1198. return ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars);
  1199. return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_STRICT | JSREPORT_WARNING,
  1200. JSMSG_RESERVED_ID, kw->chars);
  1201. }
  1202. enum FirstCharKind {
  1203. Other,
  1204. OneChar,
  1205. Ident,
  1206. Dot,
  1207. Equals,
  1208. String,
  1209. Dec,
  1210. Colon,
  1211. Plus,
  1212. HexOct,
  1213. /* These two must be last, so that |c >= Space| matches both. */
  1214. Space,
  1215. EOL
  1216. };
  1217. #define _______ Other
  1218. /*
  1219. * OneChar: 40, 41, 44, 59, 63, 91, 93, 123, 125: '(', ')', ',', ';', '?', '[', ']', '{', '}'
  1220. * Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
  1221. * Dot: 46: '.'
  1222. * Equals: 61: '='
  1223. * String: 34, 39: '"', '\''
  1224. * Dec: 49..57: '1'..'9'
  1225. * Colon: 58: ':'
  1226. * Plus: 43: '+'
  1227. * HexOct: 48: '0'
  1228. * Space: 9, 11, 12: '\t', '\v', '\f'
  1229. * EOL: 10, 13: '\n', '\r'
  1230. */
  1231. static const uint8_t firstCharKinds[] = {
  1232. /* 0 1 2 3 4 5 6 7 8 9 */
  1233. /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
  1234. /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
  1235. /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
  1236. /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
  1237. /* 40+ */ OneChar, OneChar, _______, Plus, OneChar, _______, Dot, _______, HexOct, Dec,
  1238. /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Colon, OneChar,
  1239. /* 60+ */ _______, Equals, _______, OneChar, _______, Ident, Ident, Ident, Ident, Ident,
  1240. /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
  1241. /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
  1242. /* 90+ */ Ident, OneChar, _______, OneChar, _______, Ident, _______, Ident, Ident, Ident,
  1243. /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
  1244. /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
  1245. /* 120+ */ Ident, Ident, Ident, OneChar, _______, OneChar, _______, _______
  1246. };
  1247. #undef _______
  1248. TokenKind
  1249. TokenStream::getTokenInternal()
  1250. {
  1251. TokenKind tt;
  1252. int c, qc;
  1253. Token *tp;
  1254. FirstCharKind c1kind;
  1255. const jschar *numStart;
  1256. bool hasFracOrExp;
  1257. const jschar *identStart;
  1258. bool hadUnicodeEscape;
  1259. #if JS_HAS_XML_SUPPORT
  1260. /*
  1261. * Look for XML text and tags.
  1262. */
  1263. if (flags & (TSF_XMLTEXTMODE|TSF_XMLTAGMODE)) {
  1264. if (!getXMLTextOrTag(&tt, &tp))
  1265. goto error;
  1266. goto out;
  1267. }
  1268. #endif
  1269. retry:
  1270. if (JS_UNLIKELY(!userbuf.hasRawChars())) {
  1271. tp = newToken(0);
  1272. tt = TOK_EOF;
  1273. flags |= TSF_EOF;
  1274. goto out;
  1275. }
  1276. c = userbuf.getRawChar();
  1277. JS_ASSERT(c != EOF);
  1278. /*
  1279. * Chars not in the range 0..127 are rare. Getting them out of the way
  1280. * early allows subsequent checking to be faster.
  1281. */
  1282. if (JS_UNLIKELY(c >= 128)) {
  1283. if (IsSpaceOrBOM2(c)) {
  1284. if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
  1285. updateLineInfoForEOL();
  1286. updateFlagsForEOL();
  1287. }
  1288. goto retry;
  1289. }
  1290. tp = newToken(-1);
  1291. /* '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. */
  1292. JS_STATIC_ASSERT('$' < 128 && '_' < 128);
  1293. if (IsLetter(c)) {
  1294. identStart = userbuf.addressOfNextRawChar() - 1;
  1295. hadUnicodeEscape = false;
  1296. goto identifier;
  1297. }
  1298. goto badchar;
  1299. }
  1300. /*
  1301. * Get the token kind, based on the first char. The ordering of c1kind
  1302. * comparison is based on the frequency of tokens in real code. Minified
  1303. * and non-minified code have different characteristics, mostly in that
  1304. * whitespace occurs much less in minified code. Token kinds that fall in
  1305. * the 'Other' category typically account for less than 2% of all tokens,
  1306. * so their order doesn't matter much.
  1307. */
  1308. c1kind = FirstCharKind(firstCharKinds[c]);
  1309. /*
  1310. * Skip over whitespace chars; update line state on EOLs. Even though
  1311. * whitespace isn't very common in minified code we have to handle it first
  1312. * (and jump back to 'retry') before calling newToken().
  1313. */
  1314. if (c1kind >= Space) {
  1315. if (c1kind == EOL) {
  1316. /* If it's a \r\n sequence: treat as a single EOL, skip over the \n. */
  1317. if (c == '\r' && userbuf.hasRawChars())
  1318. userbuf.matchRawChar('\n');
  1319. updateLineInfoForEOL();
  1320. updateFlagsForEOL();
  1321. }
  1322. goto retry;
  1323. }
  1324. tp = newToken(-1);
  1325. /*
  1326. * Look for an unambiguous single-char token.
  1327. */
  1328. if (c1kind == OneChar) {
  1329. tt = (TokenKind)oneCharTokens[c];
  1330. goto out;
  1331. }
  1332. /*
  1333. * Look for an identifier.
  1334. */
  1335. if (c1kind == Ident) {
  1336. identStart = userbuf.addressOfNextRawChar() - 1;
  1337. hadUnicodeEscape = false;
  1338. identifier:
  1339. for (;;) {
  1340. c = getCharIgnoreEOL();
  1341. if (c == EOF)
  1342. break;
  1343. if (!IsIdentifierPart(c)) {
  1344. if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
  1345. break;
  1346. hadUnicodeEscape = true;
  1347. }
  1348. }
  1349. ungetCharIgnoreEOL(c);
  1350. /* Convert the escapes by putting into tokenbuf. */
  1351. if (hadUnicodeEscape && !putIdentInTokenbuf(identStart))
  1352. goto error;
  1353. /* Check for keywords unless parser asks us to ignore keywords. */
  1354. if (!(flags & TSF_KEYWORD_IS_NAME)) {
  1355. const jschar *chars;
  1356. size_t length;
  1357. if (hadUnicodeEscape) {
  1358. chars = tokenbuf.begin();
  1359. length = tokenbuf.length();
  1360. } else {
  1361. chars = identStart;
  1362. length = userbuf.addressOfNextRawChar() - identStart;
  1363. }
  1364. tt = TOK_NAME;
  1365. if (!checkForKeyword(chars, length, &tt, &tp->t_op))
  1366. goto error;
  1367. if (tt != TOK_NAME)
  1368. goto out;
  1369. }
  1370. /*
  1371. * Identifiers containing no Unicode escapes can be atomized directly
  1372. * from userbuf. The rest must use the escapes converted via
  1373. * tokenbuf before atomizing.
  1374. */
  1375. JSAtom *atom;
  1376. if (!hadUnicodeEscape)
  1377. atom = js_AtomizeChars(cx, identStart, userbuf.addressOfNextRawChar() - identStart);
  1378. else
  1379. atom = atomize(cx, tokenbuf);
  1380. if (!atom)
  1381. goto error;
  1382. tp->setName(JSOP_NAME, atom->asPropertyName());
  1383. tt = TOK_NAME;
  1384. goto out;
  1385. }
  1386. if (c1kind == Dot) {
  1387. c = getCharIgnoreEOL();
  1388. if (JS7_ISDEC(c)) {
  1389. numStart = userbuf.addressOfNextRawChar() - 2;
  1390. goto decimal_dot;
  1391. }
  1392. #if JS_HAS_XML_SUPPORT
  1393. if (c == '.') {
  1394. tt = TOK_DBLDOT;
  1395. goto out;
  1396. }
  1397. #endif
  1398. ungetCharIgnoreEOL(c);
  1399. tt = TOK_DOT;
  1400. goto out;
  1401. }
  1402. if (c1kind == Equals) {
  1403. if (matchChar('=')) {
  1404. if (matchChar('=')) {
  1405. tp->t_op = JSOP_STRICTEQ;
  1406. tt = TOK_STRICTEQ;
  1407. } else {
  1408. tp->t_op = JSOP_EQ;
  1409. tt = TOK_EQ;
  1410. }
  1411. } else {
  1412. tp->t_op = JSOP_NOP;
  1413. tt = TOK_ASSIGN;
  1414. }
  1415. goto out;
  1416. }
  1417. /*
  1418. * Look for a string.
  1419. */
  1420. if (c1kind == String) {
  1421. qc = c;
  1422. tokenbuf.clear();
  1423. while (true) {
  1424. /*
  1425. * We need to detect any of these chars: " or ', \n (or its
  1426. * equivalents), \\, EOF. We use maybeStrSpecial[] in a manner
  1427. * similar to maybeEOL[], see above. Because we detect EOL
  1428. * sequences here and put them back immediately, we can use
  1429. * getCharIgnoreEOL().
  1430. */
  1431. c = getCharIgnoreEOL();
  1432. if (maybeStrSpecial[c & 0xff]) {
  1433. if (c == qc)
  1434. break;
  1435. if (c == '\\') {
  1436. switch (c = getChar()) {
  1437. case 'b': c = '\b'; break;
  1438. case 'f': c = '\f'; break;
  1439. case 'n': c = '\n'; break;
  1440. case 'r': c = '\r'; break;
  1441. case 't': c = '\t'; break;
  1442. case 'v': c = '\v'; break;
  1443. default:
  1444. if ('0' <= c && c < '8') {
  1445. int32_t val = JS7_UNDEC(c);
  1446. c = peekChar();
  1447. /* Strict mode code allows only \0, then a non-digit. */
  1448. if (val != 0 || JS7_ISDEC(c)) {
  1449. if (!ReportStrictModeError(cx, this, NULL, NULL,
  1450. JSMSG_DEPRECATED_OCTAL)) {
  1451. goto error;
  1452. }
  1453. setOctalCharacterEscape();
  1454. }
  1455. if ('0' <= c && c < '8') {
  1456. val = 8 * val + JS7_UNDEC(c);
  1457. getChar();
  1458. c = peekChar();
  1459. if ('0' <= c && c < '8') {
  1460. int32_t save = val;
  1461. val = 8 * val + JS7_UNDEC(c);
  1462. if (val <= 0377)
  1463. getChar();
  1464. else
  1465. val = save;
  1466. }
  1467. }
  1468. c = (jschar)val;
  1469. } else if (c == 'u') {
  1470. jschar cp[4];
  1471. if (peekChars(4, cp) &&
  1472. JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
  1473. JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
  1474. c = (((((JS7_UNHEX(cp[0]) << 4)
  1475. + JS7_UNHEX(cp[1])) << 4)
  1476. + JS7_UNHEX(cp[2])) << 4)
  1477. + JS7_UNHEX(cp[3]);
  1478. skipChars(4);
  1479. } else {
  1480. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1481. JSMSG_MALFORMED_ESCAPE, "Unicode");
  1482. goto error;
  1483. }
  1484. } else if (c == 'x') {
  1485. jschar cp[2];
  1486. if (peekChars(2, cp) &&
  1487. JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
  1488. c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
  1489. skipChars(2);
  1490. } else {
  1491. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1492. JSMSG_MALFORMED_ESCAPE, "hexadecimal");
  1493. goto error;
  1494. }
  1495. } else if (c == '\n') {
  1496. /*
  1497. * ES5 7.8.4: an escaped line terminator represents
  1498. * no character.
  1499. */
  1500. continue;
  1501. }
  1502. break;
  1503. }
  1504. } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
  1505. ungetCharIgnoreEOL(c);
  1506. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1507. JSMSG_UNTERMINATED_STRING);
  1508. goto error;
  1509. }
  1510. }
  1511. if (!tokenbuf.append(c))
  1512. goto error;
  1513. }
  1514. JSAtom *atom = atomize(cx, tokenbuf);
  1515. if (!atom)
  1516. goto error;
  1517. tp->pos.end.lineno = lineno;
  1518. tp->setAtom(JSOP_STRING, atom);
  1519. tt = TOK_STRING;
  1520. goto out;
  1521. }
  1522. /*
  1523. * Look for a decimal number.
  1524. */
  1525. if (c1kind == Dec) {
  1526. numStart = userbuf.addressOfNextRawChar() - 1;
  1527. decimal:
  1528. hasFracOrExp = false;
  1529. while (JS7_ISDEC(c))
  1530. c = getCharIgnoreEOL();
  1531. if (c == '.') {
  1532. decimal_dot:
  1533. hasFracOrExp = true;
  1534. do {
  1535. c = getCharIgnoreEOL();
  1536. } while (JS7_ISDEC(c));
  1537. }
  1538. if (c == 'e' || c == 'E') {
  1539. hasFracOrExp = true;
  1540. c = getCharIgnoreEOL();
  1541. if (c == '+' || c == '-')
  1542. c = getCharIgnoreEOL();
  1543. if (!JS7_ISDEC(c)) {
  1544. ungetCharIgnoreEOL(c);
  1545. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1546. JSMSG_MISSING_EXPONENT);
  1547. goto error;
  1548. }
  1549. do {
  1550. c = getCharIgnoreEOL();
  1551. } while (JS7_ISDEC(c));
  1552. }
  1553. ungetCharIgnoreEOL(c);
  1554. if (c != EOF && IsIdentifierStart(c)) {
  1555. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
  1556. goto error;
  1557. }
  1558. /*
  1559. * Unlike identifiers and strings, numbers cannot contain escaped
  1560. * chars, so we don't need to use tokenbuf. Instead we can just
  1561. * convert the jschars in userbuf directly to the numeric value.
  1562. */
  1563. jsdouble dval;
  1564. const jschar *dummy;
  1565. if (!hasFracOrExp) {
  1566. if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), 10, &dummy, &dval))
  1567. goto error;
  1568. } else {
  1569. if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
  1570. goto error;
  1571. }
  1572. tp->setNumber(dval);
  1573. tt = TOK_NUMBER;
  1574. goto out;
  1575. }
  1576. if (c1kind == Colon) {
  1577. #if JS_HAS_XML_SUPPORT
  1578. if (matchChar(':')) {
  1579. tt = TOK_DBLCOLON;
  1580. goto out;
  1581. }
  1582. #endif
  1583. tp->t_op = JSOP_NOP;
  1584. tt = TOK_COLON;
  1585. goto out;
  1586. }
  1587. if (c1kind == Plus) {
  1588. if (matchChar('=')) {
  1589. tp->t_op = JSOP_ADD;
  1590. tt = TOK_ADDASSIGN;
  1591. } else if (matchChar('+')) {
  1592. tt = TOK_INC;
  1593. } else {
  1594. tp->t_op = JSOP_POS;
  1595. tt = TOK_PLUS;
  1596. }
  1597. goto out;
  1598. }
  1599. /*
  1600. * Look for a hexadecimal or octal number.
  1601. */
  1602. if (c1kind == HexOct) {
  1603. int radix;
  1604. c = getCharIgnoreEOL();
  1605. if (c == 'x' || c == 'X') {
  1606. radix = 16;
  1607. c = getCharIgnoreEOL();
  1608. if (!JS7_ISHEX(c)) {
  1609. ungetCharIgnoreEOL(c);
  1610. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_MISSING_HEXDIGITS);
  1611. goto error;
  1612. }
  1613. numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0x' */
  1614. while (JS7_ISHEX(c))
  1615. c = getCharIgnoreEOL();
  1616. } else if (JS7_ISDEC(c)) {
  1617. radix = 8;
  1618. numStart = userbuf.addressOfNextRawChar() - 1; /* one past the '0' */
  1619. while (JS7_ISDEC(c)) {
  1620. /* Octal integer literals are not permitted in strict mode code. */
  1621. if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
  1622. goto error;
  1623. /*
  1624. * Outside strict mode, we permit 08 and 09 as decimal numbers,
  1625. * which makes our behaviour a superset of the ECMA numeric
  1626. * grammar. We might not always be so permissive, so we warn
  1627. * about it.
  1628. */
  1629. if (c >= '8') {
  1630. if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
  1631. JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
  1632. goto error;
  1633. }
  1634. goto decimal; /* use the decimal scanner for the rest of the number */
  1635. }
  1636. c = getCharIgnoreEOL();
  1637. }
  1638. } else {
  1639. /* '0' not followed by 'x', 'X' or a digit; scan as a decimal number. */
  1640. numStart = userbuf.addressOfNextRawChar() - 1;
  1641. goto decimal;
  1642. }
  1643. ungetCharIgnoreEOL(c);
  1644. if (c != EOF && IsIdentifierStart(c)) {
  1645. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
  1646. goto error;
  1647. }
  1648. jsdouble dval;
  1649. const jschar *dummy;
  1650. if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
  1651. goto error;
  1652. tp->setNumber(dval);
  1653. tt = TOK_NUMBER;
  1654. goto out;
  1655. }
  1656. /*
  1657. * This handles everything else.
  1658. */
  1659. JS_ASSERT(c1kind == Other);
  1660. switch (c) {
  1661. case '\\':
  1662. hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
  1663. if (hadUnicodeEscape) {
  1664. identStart = userbuf.addressOfNextRawChar() - 6;
  1665. goto identifier;
  1666. }
  1667. goto badchar;
  1668. case '|':
  1669. if (matchChar(c)) {
  1670. tt = TOK_OR;
  1671. } else if (matchChar('=')) {
  1672. tp->t_op = JSOP_BITOR;
  1673. tt = TOK_BITORASSIGN;
  1674. } else {
  1675. tt = TOK_BITOR;
  1676. }
  1677. break;
  1678. case '^':
  1679. if (matchChar('=')) {
  1680. tp->t_op = JSOP_BITXOR;
  1681. tt = TOK_BITXORASSIGN;
  1682. } else {
  1683. tt = TOK_BITXOR;
  1684. }
  1685. break;
  1686. case '&':
  1687. if (matchChar('&')) {
  1688. tt = TOK_AND;
  1689. } else if (matchChar('=')) {
  1690. tp->t_op = JSOP_BITAND;
  1691. tt = TOK_BITANDASSIGN;
  1692. } else {
  1693. tt = TOK_BITAND;
  1694. }
  1695. break;
  1696. case '!':
  1697. if (matchChar('=')) {
  1698. if (matchChar('=')) {
  1699. tp->t_op = JSOP_STRICTNE;
  1700. tt = TOK_STRICTNE;
  1701. } else {
  1702. tp->t_op = JSOP_NE;
  1703. tt = TOK_NE;
  1704. }
  1705. } else {
  1706. tp->t_op = JSOP_NOT;
  1707. tt = TOK_NOT;
  1708. }
  1709. break;
  1710. #if JS_HAS_XML_SUPPORT
  1711. case '@':
  1712. tt = TOK_AT;
  1713. break;
  1714. #endif
  1715. case '<':
  1716. #if JS_HAS_XML_SUPPORT
  1717. if ((flags & TSF_OPERAND) && !isStrictMode() && (hasXML() || peekChar() != '!')) {
  1718. if (!getXMLMarkup(&tt, &tp))
  1719. goto error;
  1720. goto out;
  1721. }
  1722. #endif
  1723. /* NB: treat HTML begin-comment as comment-till-end-of-line */
  1724. if (matchChar('!')) {
  1725. if (matchChar('-')) {
  1726. if (matchChar('-')) {
  1727. flags |= TSF_IN_HTML_COMMENT;
  1728. goto skipline;
  1729. }
  1730. ungetChar('-');
  1731. }
  1732. ungetChar('!');
  1733. }
  1734. if (matchChar('<')) {
  1735. tp->t_op = JSOP_LSH;
  1736. tt = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
  1737. } else {
  1738. if (matchChar('=')) {
  1739. tp->t_op = JSOP_LE;
  1740. tt = TOK_LE;
  1741. } else {
  1742. tp->t_op = JSOP_LT;
  1743. tt = TOK_LT;
  1744. }
  1745. }
  1746. break;
  1747. case '>':
  1748. if (matchChar('>')) {
  1749. if (matchChar('>')) {
  1750. tp->t_op = JSOP_URSH;
  1751. tt = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
  1752. } else {
  1753. tp->t_op = JSOP_RSH;
  1754. tt = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
  1755. }
  1756. } else {
  1757. if (matchChar('=')) {
  1758. tp->t_op = JSOP_GE;
  1759. tt = TOK_GE;
  1760. } else {
  1761. tp->t_op = JSOP_GT;
  1762. tt = TOK_GT;
  1763. }
  1764. }
  1765. break;
  1766. case '*':
  1767. tp->t_op = JSOP_MUL;
  1768. tt = matchChar('=') ? TOK_MULASSIGN : TOK_STAR;
  1769. break;
  1770. case '/':
  1771. /*
  1772. * Look for a single-line comment.
  1773. */
  1774. if (matchChar('/')) {
  1775. if (cx->hasAtLineOption() && !getAtLine())
  1776. goto error;
  1777. if (!getAtSourceMappingURL())
  1778. goto error;
  1779. skipline:
  1780. /* Optimize line skipping if we are not in an HTML comment. */
  1781. if (flags & TSF_IN_HTML_COMMENT) {
  1782. while ((c = getChar()) != EOF && c != '\n') {
  1783. if (c == '-' && matchChar('-') && matchChar('>'))
  1784. flags &= ~TSF_IN_HTML_COMMENT;
  1785. }
  1786. } else {
  1787. while ((c = getChar()) != EOF && c != '\n')
  1788. continue;
  1789. }
  1790. ungetChar(c);
  1791. cursor = (cursor - 1) & ntokensMask;
  1792. goto retry;
  1793. }
  1794. /*
  1795. * Look for a multi-line comment.
  1796. */
  1797. if (matchChar('*')) {
  1798. uintN linenoBefore = lineno;
  1799. while ((c = getChar()) != EOF &&
  1800. !(c == '*' && matchChar('/'))) {
  1801. /* Ignore all characters until comment close. */
  1802. }
  1803. if (c == EOF) {
  1804. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1805. JSMSG_UNTERMINATED_COMMENT);
  1806. goto error;
  1807. }
  1808. if (linenoBefore != lineno)
  1809. updateFlagsForEOL();
  1810. cursor = (cursor - 1) & ntokensMask;
  1811. goto retry;
  1812. }
  1813. /*
  1814. * Look for a regexp.
  1815. */
  1816. if (flags & TSF_OPERAND) {
  1817. tokenbuf.clear();
  1818. bool inCharClass = false;
  1819. for (;;) {
  1820. c = getChar();
  1821. if (c == '\\') {
  1822. if (!tokenbuf.append(c))
  1823. goto error;
  1824. c = getChar();
  1825. } else if (c == '[') {
  1826. inCharClass = true;
  1827. } else if (c == ']') {
  1828. inCharClass = false;
  1829. } else if (c == '/' && !inCharClass) {
  1830. /* For compat with IE, allow unescaped / in char classes. */
  1831. break;
  1832. }
  1833. if (c == '\n' || c == EOF) {
  1834. ungetChar(c);
  1835. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
  1836. JSMSG_UNTERMINATED_REGEXP);
  1837. goto error;
  1838. }
  1839. if (!tokenbuf.append(c))
  1840. goto error;
  1841. }
  1842. RegExpFlag reflags = NoFlags;
  1843. uintN length = tokenbuf.length() + 1;
  1844. while (true) {
  1845. c = peekChar();
  1846. if (c == 'g' && !(reflags & GlobalFlag))
  1847. reflags = RegExpFlag(reflags | GlobalFlag);
  1848. else if (c == 'i' && !(reflags & IgnoreCaseFlag))
  1849. reflags = RegExpFlag(reflags | IgnoreCaseFlag);
  1850. else if (c == 'm' && !(reflags & MultilineFlag))
  1851. reflags = RegExpFlag(reflags | MultilineFlag);
  1852. else if (c == 'y' && !(reflags & StickyFlag))
  1853. reflags = RegExpFlag(reflags | StickyFlag);
  1854. else
  1855. break;
  1856. getChar();
  1857. length++;
  1858. }
  1859. c = peekChar();
  1860. if (JS7_ISLET(c)) {
  1861. char buf[2] = { '\0', '\0' };
  1862. tp->pos.begin.index += length + 1;
  1863. buf[0] = char(c);
  1864. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
  1865. buf);
  1866. (void) getChar();
  1867. goto error;
  1868. }
  1869. tp->setRegExpFlags(reflags);
  1870. tt = TOK_REGEXP;
  1871. break;
  1872. }
  1873. tp->t_op = JSOP_DIV;
  1874. tt = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
  1875. break;
  1876. case '%':
  1877. tp->t_op = JSOP_MOD;
  1878. tt = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
  1879. break;
  1880. case '~':
  1881. tp->t_op = JSOP_BITNOT;
  1882. tt = TOK_BITNOT;
  1883. break;
  1884. case '-':
  1885. if (matchChar('=')) {
  1886. tp->t_op = JSOP_SUB;
  1887. tt = TOK_SUBASSIGN;
  1888. } else if (matchChar(c)) {
  1889. if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
  1890. flags &= ~TSF_IN_HTML_COMMENT;
  1891. goto skipline;
  1892. }
  1893. tt = TOK_DEC;
  1894. } else {
  1895. tp->t_op = JSOP_NEG;
  1896. tt = TOK_MINUS;
  1897. }
  1898. break;
  1899. badchar:
  1900. default:
  1901. ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
  1902. goto error;
  1903. }
  1904. out:
  1905. flags |= TSF_DIRTYLINE;
  1906. tp->pos.end.index = userbuf.addressOfNextRawChar() - linebase;
  1907. tp->type = tt;
  1908. JS_ASSERT(IsTokenSane(tp));
  1909. return tt;
  1910. error:
  1911. /*
  1912. * For erroneous multi-line tokens we won't have changed end.lineno (it'll
  1913. * still be equal to begin.lineno) so we revert end.index to be equal to
  1914. * begin.index + 1 (as if it's a 1-char token) to avoid having inconsistent
  1915. * begin/end positions. end.index isn't used in error messages anyway.
  1916. */
  1917. flags |= TSF_DIRTYLINE;
  1918. tp->pos.end.index = tp->pos.begin.index + 1;
  1919. tp->type = TOK_ERROR;
  1920. JS_ASSERT(IsTokenSane(tp));
  1921. #ifdef DEBUG
  1922. /*
  1923. * Poisoning userbuf on error establishes an invariant: once an erroneous
  1924. * token has been seen, userbuf will not be consulted again. This is true
  1925. * because the parser will either (a) deal with the TOK_ERROR token by
  1926. * aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
  1927. * match what it expected, it will unget the token, and the next getToken()
  1928. * call will immediately return the just-gotten TOK_ERROR token again
  1929. * without consulting userbuf, thanks to the lookahead buffer.
  1930. */
  1931. userbuf.poison();
  1932. #endif
  1933. return TOK_ERROR;
  1934. }
  1935. JS_FRIEND_API(int)
  1936. js_fgets(char *buf, int size, FILE *file)
  1937. {
  1938. int n, i, c;
  1939. JSBool crflag;
  1940. n = size - 1;
  1941. if (n < 0)
  1942. return -1;
  1943. crflag = JS_FALSE;
  1944. for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
  1945. buf[i] = c;
  1946. if (c == '\n') { /* any \n ends a line */
  1947. i++; /* keep the \n; we know there is room for \0 */
  1948. break;
  1949. }
  1950. if (crflag) { /* \r not followed by \n ends line at the \r */
  1951. ungetc(c, file);
  1952. break; /* and overwrite c in buf with \0 */
  1953. }
  1954. crflag = (c == '\r');
  1955. }
  1956. buf[i] = '\0';
  1957. return i;
  1958. }
  1959. #ifdef DEBUG
  1960. const char *
  1961. TokenKindToString(TokenKind tt)
  1962. {
  1963. switch (tt) {
  1964. case TOK_ERROR: return "TOK_ERROR";
  1965. case TOK_EOF: return "TOK_EOF";
  1966. case TOK_EOL: return "TOK_EOL";
  1967. case TOK_SEMI: return "TOK_SEMI";
  1968. case TOK_COMMA: return "TOK_COMMA";
  1969. case TOK_HOOK: return "TOK_HOOK";
  1970. case TOK_COLON: return "TOK_COLON";
  1971. case TOK_OR: return "TOK_OR";
  1972. case TOK_AND: return "TOK_AND";
  1973. case TOK_BITOR: return "TOK_BITOR";
  1974. case TOK_BITXOR: return "TOK_BITXOR";
  1975. case TOK_BITAND: return "TOK_BITAND";
  1976. case TOK_PLUS: return "TOK_PLUS";
  1977. case TOK_MINUS: return "TOK_MINUS";
  1978. case TOK_STAR: return "TOK_STAR";
  1979. case TOK_DIV: return "TOK_DIV";
  1980. case TOK_MOD: return "TOK_MOD";
  1981. case TOK_INC: return "TOK_INC";
  1982. case TOK_DEC: return "TOK_DEC";
  1983. case TOK_DOT: return "TOK_DOT";
  1984. case TOK_LB: return "TOK_LB";
  1985. case TOK_RB: return "TOK_RB";
  1986. case TOK_LC: return "TOK_LC";
  1987. case TOK_RC: return "TOK_RC";
  1988. case TOK_LP: return "TOK_LP";
  1989. case TOK_RP: return "TOK_RP";
  1990. case TOK_NAME: return "TOK_NAME";
  1991. case TOK_NUMBER: return "TOK_NUMBER";
  1992. case TOK_STRING: return "TOK_STRING";
  1993. case TOK_REGEXP: return "TOK_REGEXP";
  1994. case TOK_TRUE: return "TOK_TRUE";
  1995. case TOK_FALSE: return "TOK_FALSE";
  1996. case TOK_NULL: return "TOK_NULL";
  1997. case TOK_THIS: return "TOK_THIS";
  1998. case TOK_FUNCTION: return "TOK_FUNCTION";
  1999. case TOK_IF: return "TOK_IF";
  2000. case TOK_ELSE: return "TOK_ELSE";
  2001. case TOK_SWITCH: return "TOK_SWITCH";
  2002. case TOK_CASE: return "TOK_CASE";
  2003. case TOK_DEFAULT: return "TOK_DEFAULT";
  2004. case TOK_WHILE: return "TOK_WHILE";
  2005. case TOK_DO: return "TOK_DO";
  2006. case TOK_FOR: return "TOK_FOR";
  2007. case TOK_BREAK: return "TOK_BREAK";
  2008. case TOK_CONTINUE: return "TOK_CONTINUE";
  2009. case TOK_IN: return "TOK_IN";
  2010. case TOK_VAR: return "TOK_VAR";
  2011. case TOK_CONST: return "TOK_CONST";
  2012. case TOK_WITH: return "TOK_WITH";
  2013. case TOK_RETURN: return "TOK_RETURN";
  2014. case TOK_NEW: return "TOK_NEW";
  2015. case TOK_DELETE: return "TOK_DELETE";
  2016. case TOK_TRY: return "TOK_TRY";
  2017. case TOK_CATCH: return "TOK_CATCH";
  2018. case TOK_FINALLY: return "TOK_FINALLY";
  2019. case TOK_THROW: return "TOK_THROW";
  2020. case TOK_INSTANCEOF: return "TOK_INSTANCEOF";
  2021. case TOK_DEBUGGER: return "TOK_DEBUGGER";
  2022. case TOK_XMLSTAGO: return "TOK_XMLSTAGO";
  2023. case TOK_XMLETAGO: return "TOK_XMLETAGO";
  2024. case TOK_XMLPTAGC: return "TOK_XMLPTAGC";
  2025. case TOK_XMLTAGC: return "TOK_XMLTAGC";
  2026. case TOK_XMLNAME: return "TOK_XMLNAME";
  2027. case TOK_XMLATTR: return "TOK_XMLATTR";
  2028. case TOK_XMLSPACE: return "TOK_XMLSPACE";
  2029. case TOK_XMLTEXT: return "TOK_XMLTEXT";
  2030. case TOK_XMLCOMMENT: return "TOK_XMLCOMMENT";
  2031. case TOK_XMLCDATA: return "TOK_XMLCDATA";
  2032. case TOK_XMLPI: return "TOK_XMLPI";
  2033. case TOK_AT: return "TOK_AT";
  2034. case TOK_DBLCOLON: return "TOK_DBLCOLON";
  2035. case TOK_DBLDOT: return "TOK_DBLDOT";
  2036. case TOK_FILTER: return "TOK_FILTER";
  2037. case TOK_XMLELEM: return "TOK_XMLELEM";
  2038. case TOK_XMLLIST: return "TOK_XMLLIST";
  2039. case TOK_YIELD: return "TOK_YIELD";
  2040. case TOK_LEXICALSCOPE: return "TOK_LEXICALSCOPE";
  2041. case TOK_LET: return "TOK_LET";
  2042. case TOK_RESERVED: return "TOK_RESERVED";
  2043. case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
  2044. case TOK_STRICTEQ: return "TOK_STRICTEQ";
  2045. case TOK_EQ: return "TOK_EQ";
  2046. case TOK_STRICTNE: return "TOK_STRICTNE";
  2047. case TOK_NE: return "TOK_NE";
  2048. case TOK_TYPEOF: return "TOK_TYPEOF";
  2049. case TOK_VOID: return "TOK_VOID";
  2050. case TOK_NOT: return "TOK_NOT";
  2051. case TOK_BITNOT: return "TOK_BITNOT";
  2052. case TOK_LT: return "TOK_LT";
  2053. case TOK_LE: return "TOK_LE";
  2054. case TOK_GT: return "TOK_GT";
  2055. case TOK_GE: return "TOK_GE";
  2056. case TOK_LSH: return "TOK_LSH";
  2057. case TOK_RSH: return "TOK_RSH";
  2058. case TOK_URSH: return "TOK_URSH";
  2059. case TOK_ASSIGN: return "TOK_ASSIGN";
  2060. case TOK_ADDASSIGN: return "TOK_ADDASSIGN";
  2061. case TOK_SUBASSIGN: return "TOK_SUBASSIGN";
  2062. case TOK_BITORASSIGN: return "TOK_BITORASSIGN";
  2063. case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN";
  2064. case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN";
  2065. case TOK_LSHASSIGN: return "TOK_LSHASSIGN";
  2066. case TOK_RSHASSIGN: return "TOK_RSHASSIGN";
  2067. case TOK_URSHASSIGN: return "TOK_URSHASSIGN";
  2068. case TOK_MULASSIGN: return "TOK_MULASSIGN";
  2069. case TOK_DIVASSIGN: return "TOK_DIVASSIGN";
  2070. case TOK_MODASSIGN: return "TOK_MODASSIGN";
  2071. case TOK_LIMIT: break;
  2072. }
  2073. return "<bad TokenKind>";
  2074. }
  2075. #endif