PageRenderTime 163ms CodeModel.GetById 24ms app.highlight 125ms RepoModel.GetById 1ms app.codeStats 0ms

/js/src/frontend/TokenStream.cpp

http://github.com/zpao/v8monkey
C++ | 2284 lines | 2005 code | 109 blank | 170 comment | 346 complexity | d59dae5f9466e64230357dc615a50a25 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2 * vim: set ts=8 sw=4 et tw=99:
   3 *
   4 * ***** BEGIN LICENSE BLOCK *****
   5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version
   8 * 1.1 (the "License"); you may not use this file except in compliance with
   9 * the License. You may obtain a copy of the License at
  10 * http://www.mozilla.org/MPL/
  11 *
  12 * Software distributed under the License is distributed on an "AS IS" basis,
  13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14 * for the specific language governing rights and limitations under the
  15 * License.
  16 *
  17 * The Original Code is Mozilla Communicator client code, released
  18 * March 31, 1998.
  19 *
  20 * The Initial Developer of the Original Code is
  21 * Netscape Communications Corporation.
  22 * Portions created by the Initial Developer are Copyright (C) 1998
  23 * the Initial Developer. All Rights Reserved.
  24 *
  25 * Contributor(s):
  26 *   Nick Fitzgerald <nfitzgerald@mozilla.com>
  27 *
  28 * Alternatively, the contents of this file may be used under the terms of
  29 * either of the GNU General Public License Version 2 or later (the "GPL"),
  30 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  31 * in which case the provisions of the GPL or the LGPL are applicable instead
  32 * of those above. If you wish to allow use of your version of this file only
  33 * under the terms of either the GPL or the LGPL, and not to allow others to
  34 * use your version of this file under the terms of the MPL, indicate your
  35 * decision by deleting the provisions above and replace them with the notice
  36 * and other provisions required by the GPL or the LGPL. If you do not delete
  37 * the provisions above, a recipient may use your version of this file under
  38 * the terms of any one of the MPL, the GPL or the LGPL.
  39 *
  40 * ***** END LICENSE BLOCK ***** */
  41
  42/*
  43 * JS lexical scanner.
  44 */
  45#include <stdio.h>      /* first to avoid trouble on some systems */
  46#include <errno.h>
  47#include <limits.h>
  48#include <math.h>
  49#ifdef HAVE_MEMORY_H
  50#include <memory.h>
  51#endif
  52#include <stdarg.h>
  53#include <stdlib.h>
  54#include <string.h>
  55#include "jstypes.h"
  56#include "jsutil.h"
  57#include "jsprf.h"
  58#include "jsapi.h"
  59#include "jsatom.h"
  60#include "jscntxt.h"
  61#include "jsversion.h"
  62#include "jsexn.h"
  63#include "jsnum.h"
  64#include "jsopcode.h"
  65#include "jsscript.h"
  66
  67#include "frontend/BytecodeEmitter.h"
  68#include "frontend/Parser.h"
  69#include "frontend/TokenStream.h"
  70#include "vm/RegExpObject.h"
  71
  72#include "jsscriptinlines.h"
  73
  74#if JS_HAS_XML_SUPPORT
  75#include "jsxml.h"
  76#endif
  77
  78using namespace js;
  79using namespace js::unicode;
  80
  81#define JS_KEYWORD(keyword, type, op, version) \
  82    const char js_##keyword##_str[] = #keyword;
  83#include "jskeyword.tbl"
  84#undef JS_KEYWORD
  85
  86static const KeywordInfo keywords[] = {
  87#define JS_KEYWORD(keyword, type, op, version) \
  88    {js_##keyword##_str, type, op, version},
  89#include "jskeyword.tbl"
  90#undef JS_KEYWORD
  91};
  92
  93const KeywordInfo *
  94js::FindKeyword(const jschar *s, size_t length)
  95{
  96    JS_ASSERT(length != 0);
  97
  98    register size_t i;
  99    const struct KeywordInfo *kw;
 100    const char *chars;
 101
 102#define JSKW_LENGTH()           length
 103#define JSKW_AT(column)         s[column]
 104#define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
 105#define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
 106#define JSKW_NO_MATCH()         goto no_match;
 107#include "jsautokw.h"
 108#undef JSKW_NO_MATCH
 109#undef JSKW_TEST_GUESS
 110#undef JSKW_GOT_MATCH
 111#undef JSKW_AT
 112#undef JSKW_LENGTH
 113
 114  got_match:
 115    return &keywords[i];
 116
 117  test_guess:
 118    kw = &keywords[i];
 119    chars = kw->chars;
 120    do {
 121        if (*s++ != (unsigned char)(*chars++))
 122            goto no_match;
 123    } while (--length != 0);
 124    return kw;
 125
 126  no_match:
 127    return NULL;
 128}
 129
 130JSBool
 131js::IsIdentifier(JSLinearString *str)
 132{
 133    const jschar *chars = str->chars();
 134    size_t length = str->length();
 135
 136    if (length == 0)
 137        return JS_FALSE;
 138    jschar c = *chars;
 139    if (!IsIdentifierStart(c))
 140        return JS_FALSE;
 141    const jschar *end = chars + length;
 142    while (++chars != end) {
 143        c = *chars;
 144        if (!IsIdentifierPart(c))
 145            return JS_FALSE;
 146    }
 147    return JS_TRUE;
 148}
 149
 150#ifdef _MSC_VER
 151#pragma warning(push)
 152#pragma warning(disable:4351)
 153#endif
 154
 155/* Initialize members that aren't initialized in |init|. */
 156TokenStream::TokenStream(JSContext *cx, JSPrincipals *prin, JSPrincipals *originPrin)
 157  : tokens(), cursor(), lookahead(), flags(), listenerTSData(), tokenbuf(cx),
 158    cx(cx), originPrincipals(originPrin ? originPrin : prin)
 159{
 160    if (originPrincipals)
 161        JSPRINCIPALS_HOLD(cx, originPrincipals);
 162}
 163
 164#ifdef _MSC_VER
 165#pragma warning(pop)
 166#endif
 167
 168bool
 169TokenStream::init(const jschar *base, size_t length, const char *fn, uintN ln, JSVersion v)
 170{
 171    filename = fn;
 172    lineno = ln;
 173    version = v;
 174    xml = VersionHasXML(v);
 175
 176    userbuf.init(base, length);
 177    linebase = base;
 178    prevLinebase = NULL;
 179    sourceMap = NULL;
 180
 181    JSSourceHandler listener = cx->debugHooks->sourceHandler;
 182    void *listenerData = cx->debugHooks->sourceHandlerData;
 183
 184    if (listener)
 185        listener(fn, ln, base, length, &listenerTSData, listenerData);
 186
 187    /*
 188     * This table holds all the token kinds that satisfy these properties:
 189     * - A single char long.
 190     * - Cannot be a prefix of any longer token (eg. '+' is excluded because
 191     *   '+=' is a valid token).
 192     * - Doesn't need tp->t_op set (eg. this excludes '~').
 193     *
 194     * The few token kinds satisfying these properties cover roughly 35--45%
 195     * of the tokens seen in practice.
 196     *
 197     * Nb: oneCharTokens, maybeEOL and maybeStrSpecial could be static, but
 198     * initializing them this way is a bit easier.  Don't worry, the time to
 199     * initialize them for each TokenStream is trivial.  See bug 639420.
 200     */
 201    memset(oneCharTokens, 0, sizeof(oneCharTokens));
 202    oneCharTokens[unsigned(';')] = TOK_SEMI;
 203    oneCharTokens[unsigned(',')] = TOK_COMMA;
 204    oneCharTokens[unsigned('?')] = TOK_HOOK;
 205    oneCharTokens[unsigned('[')] = TOK_LB;
 206    oneCharTokens[unsigned(']')] = TOK_RB;
 207    oneCharTokens[unsigned('{')] = TOK_LC;
 208    oneCharTokens[unsigned('}')] = TOK_RC;
 209    oneCharTokens[unsigned('(')] = TOK_LP;
 210    oneCharTokens[unsigned(')')] = TOK_RP;
 211
 212    /* See getChar() for an explanation of maybeEOL[]. */
 213    memset(maybeEOL, 0, sizeof(maybeEOL));
 214    maybeEOL[unsigned('\n')] = true;
 215    maybeEOL[unsigned('\r')] = true;
 216    maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
 217    maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
 218
 219    /* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
 220    memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
 221    maybeStrSpecial[unsigned('"')] = true;
 222    maybeStrSpecial[unsigned('\'')] = true;
 223    maybeStrSpecial[unsigned('\\')] = true;
 224    maybeStrSpecial[unsigned('\n')] = true;
 225    maybeStrSpecial[unsigned('\r')] = true;
 226    maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
 227    maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
 228    maybeStrSpecial[unsigned(EOF & 0xff)] = true;
 229
 230    /*
 231     * Set |ln| as the beginning line number of the ungot "current token", so
 232     * that js::Parser::statements (and potentially other such methods, in the
 233     * future) can create parse nodes with good source coordinates before they
 234     * explicitly get any tokens.
 235     *
 236     * Switching the parser/lexer so we always get the next token ahead of the
 237     * parser needing it (the so-called "pump-priming" model) might be a better
 238     * way to address the dependency from statements on the current token.
 239     */
 240    tokens[0].pos.begin.lineno = tokens[0].pos.end.lineno = ln;
 241    return true;
 242}
 243
 244TokenStream::~TokenStream()
 245{
 246    if (flags & TSF_OWNFILENAME)
 247        cx->free_((void *) filename);
 248    if (sourceMap)
 249        cx->free_(sourceMap);
 250    if (originPrincipals)
 251        JSPRINCIPALS_DROP(cx, originPrincipals);
 252}
 253
 254/* Use the fastest available getc. */
 255#if defined(HAVE_GETC_UNLOCKED)
 256# define fast_getc getc_unlocked
 257#elif defined(HAVE__GETC_NOLOCK)
 258# define fast_getc _getc_nolock
 259#else
 260# define fast_getc getc
 261#endif
 262
 263JS_ALWAYS_INLINE void
 264TokenStream::updateLineInfoForEOL()
 265{
 266    prevLinebase = linebase;
 267    linebase = userbuf.addressOfNextRawChar();
 268    lineno++;
 269}
 270
 271JS_ALWAYS_INLINE void
 272TokenStream::updateFlagsForEOL()
 273{
 274    flags &= ~TSF_DIRTYLINE;
 275    flags |= TSF_EOL;
 276}
 277
 278/* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
 279int32_t
 280TokenStream::getChar()
 281{
 282    int32_t c;
 283    if (JS_LIKELY(userbuf.hasRawChars())) {
 284        c = userbuf.getRawChar();
 285
 286        /*
 287         * Normalize the jschar if it was a newline.  We need to detect any of
 288         * these four characters:  '\n' (0x000a), '\r' (0x000d),
 289         * LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029).  Testing for each
 290         * one in turn is slow, so we use a single probabilistic check, and if
 291         * that succeeds, test for them individually.
 292         *
 293         * We use the bottom 8 bits to index into a lookup table, succeeding
 294         * when d&0xff is 0xa, 0xd, 0x28 or 0x29.  Among ASCII chars (which
 295         * are by the far the most common) this gives false positives for '('
 296         * (0x0028) and ')' (0x0029).  We could avoid those by incorporating
 297         * the 13th bit of d into the lookup, but that requires extra shifting
 298         * and masking and isn't worthwhile.  See TokenStream::init() for the
 299         * initialization of the relevant entries in the table.
 300         */
 301        if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
 302            if (c == '\n')
 303                goto eol;
 304            if (c == '\r') {
 305                /* if it's a \r\n sequence: treat as a single EOL, skip over the \n */
 306                if (userbuf.hasRawChars())
 307                    userbuf.matchRawChar('\n');
 308                goto eol;
 309            }
 310            if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
 311                goto eol;
 312        }
 313        return c;
 314    }
 315
 316    flags |= TSF_EOF;
 317    return EOF;
 318
 319  eol:
 320    updateLineInfoForEOL();
 321    return '\n';
 322}
 323
 324/*
 325 * This gets the next char. It does nothing special with EOL sequences, not
 326 * even updating the line counters.  It can be used safely if (a) the
 327 * resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
 328 * it's an EOL, and (b) the line-related state (lineno, linebase) is not used
 329 * before it's ungotten.
 330 */
 331int32_t
 332TokenStream::getCharIgnoreEOL()
 333{
 334    if (JS_LIKELY(userbuf.hasRawChars()))
 335        return userbuf.getRawChar();
 336
 337    flags |= TSF_EOF;
 338    return EOF;
 339}
 340
 341void
 342TokenStream::ungetChar(int32_t c)
 343{
 344    if (c == EOF)
 345        return;
 346    JS_ASSERT(!userbuf.atStart());
 347    userbuf.ungetRawChar();
 348    if (c == '\n') {
 349#ifdef DEBUG
 350        int32_t c2 = userbuf.peekRawChar();
 351        JS_ASSERT(TokenBuf::isRawEOLChar(c2));
 352#endif
 353
 354        /* if it's a \r\n sequence, also unget the \r */
 355        if (!userbuf.atStart())
 356            userbuf.matchRawCharBackwards('\r');
 357
 358        JS_ASSERT(prevLinebase);    /* we should never get more than one EOL char */
 359        linebase = prevLinebase;
 360        prevLinebase = NULL;
 361        lineno--;
 362    } else {
 363        JS_ASSERT(userbuf.peekRawChar() == c);
 364    }
 365}
 366
 367void
 368TokenStream::ungetCharIgnoreEOL(int32_t c)
 369{
 370    if (c == EOF)
 371        return;
 372    JS_ASSERT(!userbuf.atStart());
 373    userbuf.ungetRawChar();
 374}
 375
 376/*
 377 * Return true iff |n| raw characters can be read from this without reading past
 378 * EOF or a newline, and copy those characters into |cp| if so.  The characters
 379 * are not consumed: use skipChars(n) to do so after checking that the consumed
 380 * characters had appropriate values.
 381 */
 382bool
 383TokenStream::peekChars(intN n, jschar *cp)
 384{
 385    intN i, j;
 386    int32_t c;
 387
 388    for (i = 0; i < n; i++) {
 389        c = getCharIgnoreEOL();
 390        if (c == EOF)
 391            break;
 392        if (c == '\n') {
 393            ungetCharIgnoreEOL(c);
 394            break;
 395        }
 396        cp[i] = (jschar)c;
 397    }
 398    for (j = i - 1; j >= 0; j--)
 399        ungetCharIgnoreEOL(cp[j]);
 400    return i == n;
 401}
 402
 403const jschar *
 404TokenStream::TokenBuf::findEOL()
 405{
 406    const jschar *tmp = ptr;
 407#ifdef DEBUG
 408    /*
 409     * This is the one exception to the "TokenBuf isn't accessed after
 410     * poisoning" rule -- we may end up calling findEOL() in order to set up
 411     * an error.
 412     */
 413    if (!tmp)
 414        tmp = ptrWhenPoisoned;
 415#endif
 416
 417    while (true) {
 418        if (tmp >= limit)
 419            break;
 420        if (TokenBuf::isRawEOLChar(*tmp++))
 421            break;
 422    }
 423    return tmp;
 424}
 425
 426bool
 427TokenStream::reportCompileErrorNumberVA(ParseNode *pn, uintN flags, uintN errorNumber, va_list ap)
 428{
 429    JSErrorReport report;
 430    char *message;
 431    jschar *linechars;
 432    char *linebytes;
 433    bool warning;
 434    JSBool ok;
 435    const TokenPos *tp;
 436    uintN i;
 437
 438    if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
 439        return true;
 440
 441    warning = JSREPORT_IS_WARNING(flags);
 442    if (warning && cx->hasWErrorOption()) {
 443        flags &= ~JSREPORT_WARNING;
 444        warning = false;
 445    }
 446
 447    PodZero(&report);
 448    report.flags = flags;
 449    report.errorNumber = errorNumber;
 450    message = NULL;
 451    linechars = NULL;
 452    linebytes = NULL;
 453
 454    MUST_FLOW_THROUGH("out");
 455    ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
 456                                 errorNumber, &message, &report,
 457                                 !(flags & JSREPORT_UC), ap);
 458    if (!ok) {
 459        warning = false;
 460        goto out;
 461    }
 462
 463    report.filename = filename;
 464    report.originPrincipals = originPrincipals;
 465
 466    tp = pn ? &pn->pn_pos : &currentToken().pos;
 467    report.lineno = tp->begin.lineno;
 468
 469    /*
 470     * Given a token, T, that we want to complain about: if T's (starting)
 471     * lineno doesn't match TokenStream's lineno, that means we've scanned past
 472     * the line that T starts on, which makes it hard to print some or all of
 473     * T's (starting) line for context.
 474     *
 475     * So we don't even try, leaving report.linebuf and friends zeroed.  This
 476     * means that any error involving a multi-line token (eg. an unterminated
 477     * multi-line string literal) won't have a context printed.
 478     */
 479    if (report.lineno == lineno) {
 480        size_t linelength = userbuf.findEOL() - linebase;
 481
 482        linechars = (jschar *)cx->malloc_((linelength + 1) * sizeof(jschar));
 483        if (!linechars) {
 484            warning = false;
 485            goto out;
 486        }
 487        PodCopy(linechars, linebase, linelength);
 488        linechars[linelength] = 0;
 489        linebytes = DeflateString(cx, linechars, linelength);
 490        if (!linebytes) {
 491            warning = false;
 492            goto out;
 493        }
 494
 495        /* Unicode and char versions of the offending source line, without final \n */
 496        report.linebuf = linebytes;
 497        report.uclinebuf = linechars;
 498
 499        /* The lineno check above means we should only see single-line tokens here. */
 500        JS_ASSERT(tp->begin.lineno == tp->end.lineno);
 501        report.tokenptr = report.linebuf + tp->begin.index;
 502        report.uctokenptr = report.uclinebuf + tp->begin.index;
 503    }
 504
 505    /*
 506     * If there's a runtime exception type associated with this error
 507     * number, set that as the pending exception.  For errors occuring at
 508     * compile time, this is very likely to be a JSEXN_SYNTAXERR.
 509     *
 510     * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
 511     * flag will be set in report.flags.  Proper behavior for an error
 512     * reporter is to ignore a report with this flag for all but top-level
 513     * compilation errors.  The exception will remain pending, and so long
 514     * as the non-top-level "load", "eval", or "compile" native function
 515     * returns false, the top-level reporter will eventually receive the
 516     * uncaught exception report.
 517     */
 518    if (!js_ErrorToException(cx, message, &report, NULL, NULL)) {
 519        /*
 520         * If debugErrorHook is present then we give it a chance to veto
 521         * sending the error on to the regular error reporter.
 522         */
 523        bool reportError = true;
 524        if (JSDebugErrorHook hook = cx->debugHooks->debugErrorHook)
 525            reportError = hook(cx, message, &report, cx->debugHooks->debugErrorHookData);
 526
 527        /* Report the error */
 528        if (reportError && cx->errorReporter)
 529            cx->errorReporter(cx, message, &report);
 530    }
 531
 532  out:
 533    if (linebytes)
 534        cx->free_(linebytes);
 535    if (linechars)
 536        cx->free_(linechars);
 537    if (message)
 538        cx->free_(message);
 539    if (report.ucmessage)
 540        cx->free_((void *)report.ucmessage);
 541
 542    if (report.messageArgs) {
 543        if (!(flags & JSREPORT_UC)) {
 544            i = 0;
 545            while (report.messageArgs[i])
 546                cx->free_((void *)report.messageArgs[i++]);
 547        }
 548        cx->free_((void *)report.messageArgs);
 549    }
 550
 551    return warning;
 552}
 553
 554bool
 555js::ReportStrictModeError(JSContext *cx, TokenStream *ts, TreeContext *tc, ParseNode *pn,
 556                          uintN errorNumber, ...)
 557{
 558    JS_ASSERT(ts || tc);
 559    JS_ASSERT(cx == ts->getContext());
 560
 561    /* In strict mode code, this is an error, not merely a warning. */
 562    uintN flags;
 563    if ((ts && ts->isStrictMode()) || (tc && (tc->flags & TCF_STRICT_MODE_CODE))) {
 564        flags = JSREPORT_ERROR;
 565    } else {
 566        if (!cx->hasStrictOption())
 567            return true;
 568        flags = JSREPORT_WARNING;
 569    }
 570
 571    va_list ap;
 572    va_start(ap, errorNumber);
 573    bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 574    va_end(ap);
 575
 576    return result;
 577}
 578
 579bool
 580js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, ParseNode *pn, uintN flags,
 581                             uintN errorNumber, ...)
 582{
 583    va_list ap;
 584
 585    /*
 586     * We don't accept a TreeContext argument, so we can't implement
 587     * JSREPORT_STRICT_MODE_ERROR here.  Use ReportStrictModeError instead,
 588     * or do the checks in the caller and pass plain old JSREPORT_ERROR.
 589     */
 590    JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
 591
 592    va_start(ap, errorNumber);
 593    JS_ASSERT(cx == ts->getContext());
 594    bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 595    va_end(ap);
 596
 597    return result;
 598}
 599
 600#if JS_HAS_XML_SUPPORT
 601
 602bool
 603TokenStream::getXMLEntity()
 604{
 605    ptrdiff_t offset, length, i;
 606    int c, d;
 607    JSBool ispair;
 608    jschar *bp, digit;
 609    char *bytes;
 610    JSErrNum msg;
 611
 612    CharBuffer &tb = tokenbuf;
 613
 614    /* Put the entity, including the '&' already scanned, in tokenbuf. */
 615    offset = tb.length();
 616    if (!tb.append('&'))
 617        return false;
 618    while ((c = getChar()) != ';') {
 619        if (c == EOF || c == '\n') {
 620            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
 621            return false;
 622        }
 623        if (!tb.append(c))
 624            return false;
 625    }
 626
 627    /* Let length be the number of jschars after the '&', including the ';'. */
 628    length = tb.length() - offset;
 629    bp = tb.begin() + offset;
 630    c = d = 0;
 631    ispair = false;
 632    if (length > 2 && bp[1] == '#') {
 633        /* Match a well-formed XML Character Reference. */
 634        i = 2;
 635        if (length > 3 && (bp[i] == 'x' || bp[i] == 'X')) {
 636            if (length > 9)     /* at most 6 hex digits allowed */
 637                goto badncr;
 638            while (++i < length) {
 639                digit = bp[i];
 640                if (!JS7_ISHEX(digit))
 641                    goto badncr;
 642                c = (c << 4) + JS7_UNHEX(digit);
 643            }
 644        } else {
 645            while (i < length) {
 646                digit = bp[i++];
 647                if (!JS7_ISDEC(digit))
 648                    goto badncr;
 649                c = (c * 10) + JS7_UNDEC(digit);
 650                if (c < 0)
 651                    goto badncr;
 652            }
 653        }
 654
 655        if (0x10000 <= c && c <= 0x10FFFF) {
 656            /* Form a surrogate pair (c, d) -- c is the high surrogate. */
 657            d = 0xDC00 + (c & 0x3FF);
 658            c = 0xD7C0 + (c >> 10);
 659            ispair = true;
 660        } else {
 661            /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
 662            if (c != 0x9 && c != 0xA && c != 0xD &&
 663                !(0x20 <= c && c <= 0xD7FF) &&
 664                !(0xE000 <= c && c <= 0xFFFD)) {
 665                goto badncr;
 666            }
 667        }
 668    } else {
 669        /* Try to match one of the five XML 1.0 predefined entities. */
 670        switch (length) {
 671          case 3:
 672            if (bp[2] == 't') {
 673                if (bp[1] == 'l')
 674                    c = '<';
 675                else if (bp[1] == 'g')
 676                    c = '>';
 677            }
 678            break;
 679          case 4:
 680            if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
 681                c = '&';
 682            break;
 683          case 5:
 684            if (bp[3] == 'o') {
 685                if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
 686                    c = '\'';
 687                else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
 688                    c = '"';
 689            }
 690            break;
 691        }
 692        if (c == 0) {
 693            msg = JSMSG_UNKNOWN_XML_ENTITY;
 694            goto bad;
 695        }
 696    }
 697
 698    /* If we matched, retract tokenbuf and store the entity's value. */
 699    *bp++ = (jschar) c;
 700    if (ispair)
 701        *bp++ = (jschar) d;
 702    tb.shrinkBy(tb.end() - bp);
 703    return true;
 704
 705  badncr:
 706    msg = JSMSG_BAD_XML_NCR;
 707  bad:
 708    /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
 709    JS_ASSERT((tb.end() - bp) >= 1);
 710    bytes = DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
 711    if (bytes) {
 712        ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
 713        cx->free_(bytes);
 714    }
 715    return false;
 716}
 717
 718bool
 719TokenStream::getXMLTextOrTag(TokenKind *ttp, Token **tpp)
 720{
 721    TokenKind tt;
 722    int c, qc;
 723    Token *tp;
 724    JSAtom *atom;
 725
 726    /*
 727     * Look for XML text.
 728     */
 729    if (flags & TSF_XMLTEXTMODE) {
 730        tt = TOK_XMLSPACE;      /* veto if non-space, return TOK_XMLTEXT */
 731        tp = newToken(0);
 732        tokenbuf.clear();
 733        qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
 734
 735        while ((c = getChar()) != qc && c != '<' && c != EOF) {
 736            if (c == '&' && qc == '<') {
 737                if (!getXMLEntity())
 738                    goto error;
 739                tt = TOK_XMLTEXT;
 740                continue;
 741            }
 742
 743            if (!IsXMLSpace(c))
 744                tt = TOK_XMLTEXT;
 745            if (!tokenbuf.append(c))
 746                goto error;
 747        }
 748        ungetChar(c);
 749
 750        if (tokenbuf.empty()) {
 751            atom = NULL;
 752        } else {
 753            atom = atomize(cx, tokenbuf);
 754            if (!atom)
 755                goto error;
 756        }
 757        tp->pos.end.lineno = lineno;
 758        tp->setAtom(JSOP_STRING, atom);
 759        goto out;
 760    }
 761
 762    /*
 763     * XML tags.
 764     */
 765    else {
 766        JS_ASSERT(flags & TSF_XMLTAGMODE);
 767        tp = newToken(0);
 768        c = getChar();
 769        if (c != EOF && IsXMLSpace(c)) {
 770            do {
 771                c = getChar();
 772                if (c == EOF)
 773                    break;
 774            } while (IsXMLSpace(c));
 775            ungetChar(c);
 776            tp->pos.end.lineno = lineno;
 777            tt = TOK_XMLSPACE;
 778            goto out;
 779        }
 780
 781        if (c == EOF) {
 782            tt = TOK_EOF;
 783            goto out;
 784        }
 785
 786        tokenbuf.clear();
 787        if (IsXMLNamespaceStart(c)) {
 788            JSBool sawColon = JS_FALSE;
 789
 790            if (!tokenbuf.append(c))
 791                goto error;
 792            while ((c = getChar()) != EOF && IsXMLNamePart(c)) {
 793                if (c == ':') {
 794                    int nextc;
 795
 796                    if (sawColon ||
 797                        (nextc = peekChar(),
 798                         ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
 799                         !IsXMLNamePart(nextc))) {
 800                        ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 801                                                 JSMSG_BAD_XML_QNAME);
 802                        goto error;
 803                    }
 804                    sawColon = JS_TRUE;
 805                }
 806
 807                if (!tokenbuf.append(c))
 808                    goto error;
 809            }
 810
 811            ungetChar(c);
 812            atom = atomize(cx, tokenbuf);
 813            if (!atom)
 814                goto error;
 815            tp->setAtom(JSOP_STRING, atom);
 816            tt = TOK_XMLNAME;
 817            goto out;
 818        }
 819
 820        switch (c) {
 821          case '{':
 822            if (flags & TSF_XMLONLYMODE)
 823                goto bad_xml_char;
 824            tt = TOK_LC;
 825            goto out;
 826
 827          case '=':
 828            tt = TOK_ASSIGN;
 829            goto out;
 830
 831          case '"':
 832          case '\'':
 833            qc = c;
 834            while ((c = getChar()) != qc) {
 835                if (c == EOF) {
 836                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 837                                             JSMSG_UNTERMINATED_STRING);
 838                    goto error;
 839                }
 840
 841                /*
 842                 * XML attribute values are double-quoted when pretty-printed,
 843                 * so escape " if it is expressed directly in a single-quoted
 844                 * attribute value.
 845                 */
 846                if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
 847                    JS_ASSERT(qc == '\'');
 848                    if (!tokenbuf.append(js_quot_entity_str,
 849                                     strlen(js_quot_entity_str)))
 850                        goto error;
 851                    continue;
 852                }
 853
 854                if (c == '&' && (flags & TSF_XMLONLYMODE)) {
 855                    if (!getXMLEntity())
 856                        goto error;
 857                    continue;
 858                }
 859
 860                if (!tokenbuf.append(c))
 861                    goto error;
 862            }
 863            atom = atomize(cx, tokenbuf);
 864            if (!atom)
 865                goto error;
 866            tp->pos.end.lineno = lineno;
 867            tp->setAtom(JSOP_STRING, atom);
 868            tt = TOK_XMLATTR;
 869            goto out;
 870
 871          case '>':
 872            tt = TOK_XMLTAGC;
 873            goto out;
 874
 875          case '/':
 876            if (matchChar('>')) {
 877                tt = TOK_XMLPTAGC;
 878                goto out;
 879            }
 880            /* FALL THROUGH */
 881
 882          bad_xml_char:
 883          default:
 884            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
 885            goto error;
 886        }
 887        JS_NOT_REACHED("getXMLTextOrTag 1");
 888    }
 889    JS_NOT_REACHED("getXMLTextOrTag 2");
 890
 891  out:
 892    *ttp = tt;
 893    *tpp = tp;
 894    return true;
 895
 896  error:
 897    *ttp = TOK_ERROR;
 898    *tpp = tp;
 899    return false;
 900}
 901
 902/*
 903 * After much testing, it's clear that Postel's advice to protocol designers
 904 * ("be liberal in what you accept, and conservative in what you send") invites
 905 * a natural-law repercussion for JS as "protocol":
 906 *
 907 * "If you are liberal in what you accept, others will utterly fail to be
 908 *  conservative in what they send."
 909 *
 910 * Which means you will get <!-- comments to end of line in the middle of .js
 911 * files, and after if conditions whose then statements are on the next line,
 912 * and other wonders.  See at least the following bugs:
 913 * - https://bugzilla.mozilla.org/show_bug.cgi?id=309242
 914 * - https://bugzilla.mozilla.org/show_bug.cgi?id=309712
 915 * - https://bugzilla.mozilla.org/show_bug.cgi?id=310993
 916 *
 917 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan an XML
 918 * comment or CDATA literal.  Instead, we always scan <! as the start of an
 919 * HTML comment hack to end of line, used since Netscape 2 to hide script tag
 920 * content from script-unaware browsers.
 921 *
 922 * But this still leaves XML resources with certain internal structure
 923 * vulnerable to being loaded as script cross-origin, and some internal data
 924 * stolen, so for Firefox 3.5 and beyond, we reject programs whose source
 925 * consists only of XML literals. See:
 926 *
 927 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
 928 *
 929 * The check for this is in js::frontend::CompileScript.
 930 */
 931bool
 932TokenStream::getXMLMarkup(TokenKind *ttp, Token **tpp)
 933{
 934    TokenKind tt;
 935    int c;
 936    Token *tp = *tpp;
 937
 938    /* Check for XML comment or CDATA section. */
 939    if (matchChar('!')) {
 940        tokenbuf.clear();
 941
 942        /* Scan XML comment. */
 943        if (matchChar('-')) {
 944            if (!matchChar('-'))
 945                goto bad_xml_markup;
 946            while ((c = getChar()) != '-' || !matchChar('-')) {
 947                if (c == EOF)
 948                    goto bad_xml_markup;
 949                if (!tokenbuf.append(c))
 950                    goto error;
 951            }
 952            if (!matchChar('>'))
 953                goto bad_xml_markup;
 954
 955            JSAtom *commentText = atomize(cx, tokenbuf);
 956            if (!commentText)
 957                goto error;
 958            tp->setAtom(JSOP_XMLCOMMENT, commentText);
 959            tp->pos.end.lineno = lineno;
 960            tt = TOK_XMLCOMMENT;
 961            goto out;
 962        }
 963
 964        /* Scan CDATA section. */
 965        if (matchChar('[')) {
 966            jschar cp[6];
 967            if (peekChars(6, cp) &&
 968                cp[0] == 'C' &&
 969                cp[1] == 'D' &&
 970                cp[2] == 'A' &&
 971                cp[3] == 'T' &&
 972                cp[4] == 'A' &&
 973                cp[5] == '[') {
 974                skipChars(6);
 975                while ((c = getChar()) != ']' ||
 976                       !peekChars(2, cp) ||
 977                       cp[0] != ']' ||
 978                       cp[1] != '>') {
 979                    if (c == EOF)
 980                        goto bad_xml_markup;
 981                    if (!tokenbuf.append(c))
 982                        goto error;
 983                }
 984                consumeKnownChar(']');
 985                consumeKnownChar('>');
 986
 987                JSAtom *cdataContent = atomize(cx, tokenbuf);
 988                if (!cdataContent)
 989                    goto error;
 990
 991                tp->setAtom(JSOP_XMLCDATA, cdataContent);
 992                tp->pos.end.lineno = lineno;
 993                tt = TOK_XMLCDATA;
 994                goto out;
 995            }
 996            goto bad_xml_markup;
 997        }
 998    }
 999
1000    /* Check for processing instruction. */
1001    if (matchChar('?')) {
1002        bool inTarget = true;
1003        size_t targetLength = 0;
1004        ptrdiff_t contentIndex = -1;
1005
1006        tokenbuf.clear();
1007        while ((c = getChar()) != '?' || peekChar() != '>') {
1008            if (c == EOF)
1009                goto bad_xml_markup;
1010            if (inTarget) {
1011                if (IsXMLSpace(c)) {
1012                    if (tokenbuf.empty())
1013                        goto bad_xml_markup;
1014                    inTarget = false;
1015                } else {
1016                    if (!(tokenbuf.empty()
1017                          ? IsXMLNamespaceStart(c)
1018                          : IsXMLNamespacePart(c))) {
1019                        goto bad_xml_markup;
1020                    }
1021                    ++targetLength;
1022                }
1023            } else {
1024                if (contentIndex < 0 && !IsXMLSpace(c))
1025                    contentIndex = tokenbuf.length();
1026            }
1027            if (!tokenbuf.append(c))
1028                goto error;
1029        }
1030        if (targetLength == 0)
1031            goto bad_xml_markup;
1032
1033        JSAtom *data;
1034        if (contentIndex < 0) {
1035            data = cx->runtime->atomState.emptyAtom;
1036        } else {
1037            data = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
1038                                   tokenbuf.length() - contentIndex);
1039            if (!data)
1040                goto error;
1041        }
1042        tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1043        consumeKnownChar('>');
1044        JSAtom *target = atomize(cx, tokenbuf);
1045        if (!target)
1046            goto error;
1047        tp->setProcessingInstruction(target->asPropertyName(), data);
1048        tp->pos.end.lineno = lineno;
1049        tt = TOK_XMLPI;
1050        goto out;
1051    }
1052
1053    /* An XML start-of-tag character. */
1054    tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1055
1056  out:
1057    *ttp = tt;
1058    *tpp = tp;
1059    return true;
1060
1061  bad_xml_markup:
1062    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1063  error:
1064    *ttp = TOK_ERROR;
1065    *tpp = tp;
1066    return false;
1067}
1068#endif /* JS_HAS_XML_SUPPORT */
1069
1070/*
1071 * We have encountered a '\': check for a Unicode escape sequence after it.
1072 * Return 'true' and the character code value (by value) if we found a
1073 * Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
1074 * advance along the buffer.
1075 */
1076bool
1077TokenStream::peekUnicodeEscape(int *result)
1078{
1079    jschar cp[5];
1080
1081    if (peekChars(5, cp) && cp[0] == 'u' &&
1082        JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
1083        JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
1084    {
1085        *result = (((((JS7_UNHEX(cp[1]) << 4)
1086                + JS7_UNHEX(cp[2])) << 4)
1087              + JS7_UNHEX(cp[3])) << 4)
1088            + JS7_UNHEX(cp[4]);
1089        return true;
1090    }
1091    return false;
1092}
1093
1094bool
1095TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
1096{
1097    if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
1098        skipChars(5);
1099        return true;
1100    }
1101    return false;
1102}
1103
1104bool
1105TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
1106{
1107    if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
1108        skipChars(5);
1109        return true;
1110    }
1111    return false;
1112}
1113
1114/*
1115 * Helper function which returns true if the first length(q) characters in p are
1116 * the same as the characters in q.
1117 */
1118static bool
1119CharsMatch(const jschar *p, const char *q) {
1120    while (*q) {
1121        if (*p++ != *q++)
1122            return false;
1123    }
1124    return true;
1125}
1126
1127bool
1128TokenStream::getAtLine()
1129{
1130    int c;
1131    jschar cp[5];
1132    uintN i, line, temp;
1133    char filenameBuf[1024];
1134
1135    /*
1136     * Hack for source filters such as the Mozilla XUL preprocessor:
1137     * "//@line 123\n" sets the number of the *next* line after the
1138     * comment to 123.  If we reach here, we've already seen "//".
1139     */
1140    if (peekChars(5, cp) && CharsMatch(cp, "@line")) {
1141        skipChars(5);
1142        while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1143            continue;
1144        if (JS7_ISDEC(c)) {
1145            line = JS7_UNDEC(c);
1146            while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1147                temp = 10 * line + JS7_UNDEC(c);
1148                if (temp < line) {
1149                    /* Ignore overlarge line numbers. */
1150                    return true;
1151                }
1152                line = temp;
1153            }
1154            while (c != '\n' && c != EOF && IsSpaceOrBOM2(c))
1155                c = getChar();
1156            i = 0;
1157            if (c == '"') {
1158                while ((c = getChar()) != EOF && c != '"') {
1159                    if (c == '\n') {
1160                        ungetChar(c);
1161                        return true;
1162                    }
1163                    if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1164                        return true;
1165                    filenameBuf[i++] = (char) c;
1166                }
1167                if (c == '"') {
1168                    while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1169                        continue;
1170                }
1171            }
1172            filenameBuf[i] = '\0';
1173            if (c == EOF || c == '\n') {
1174                if (i > 0) {
1175                    if (flags & TSF_OWNFILENAME)
1176                        cx->free_((void *) filename);
1177                    filename = JS_strdup(cx, filenameBuf);
1178                    if (!filename)
1179                        return false;
1180                    flags |= TSF_OWNFILENAME;
1181                }
1182                lineno = line;
1183            }
1184        }
1185        ungetChar(c);
1186    }
1187    return true;
1188}
1189
1190bool
1191TokenStream::getAtSourceMappingURL()
1192{
1193    jschar peeked[18];
1194
1195    /* Match comments of the form @sourceMappingURL=<url> */
1196    if (peekChars(18, peeked) && CharsMatch(peeked, "@sourceMappingURL=")) {
1197        skipChars(18);
1198        tokenbuf.clear();
1199
1200        jschar c;
1201        while (!IsSpaceOrBOM2((c = getChar())) &&
1202               c && c != jschar(EOF))
1203            tokenbuf.append(c);
1204
1205        if (tokenbuf.empty())
1206            /* The source map's URL was missing, but not quite an exception that
1207             * we should stop and drop everything for, though. */
1208            return true;
1209
1210        int len = tokenbuf.length();
1211
1212        if (sourceMap)
1213            cx->free_(sourceMap);
1214        sourceMap = (jschar *) cx->malloc_(sizeof(jschar) * (len + 1));
1215        if (!sourceMap)
1216            return false;
1217
1218        for (int i = 0; i < len; i++)
1219            sourceMap[i] = tokenbuf[i];
1220        sourceMap[len] = '\0';
1221    }
1222    return true;
1223}
1224
1225Token *
1226TokenStream::newToken(ptrdiff_t adjust)
1227{
1228    cursor = (cursor + 1) & ntokensMask;
1229    Token *tp = &tokens[cursor];
1230    tp->ptr = userbuf.addressOfNextRawChar() + adjust;
1231    tp->pos.begin.index = tp->ptr - linebase;
1232    tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
1233    return tp;
1234}
1235
1236JS_ALWAYS_INLINE JSAtom *
1237TokenStream::atomize(JSContext *cx, CharBuffer &cb)
1238{
1239    return js_AtomizeChars(cx, cb.begin(), cb.length());
1240}
1241
1242#ifdef DEBUG
1243bool
1244IsTokenSane(Token *tp)
1245{
1246    /*
1247     * Nb: TOK_EOL should never be used in an actual Token;  it should only be
1248     * returned as a TokenKind from peekTokenSameLine().
1249     */
1250    if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
1251        return false;
1252
1253    if (tp->pos.begin.lineno == tp->pos.end.lineno) {
1254        if (tp->pos.begin.index > tp->pos.end.index)
1255            return false;
1256    } else {
1257        /* Only certain token kinds can be multi-line. */
1258        switch (tp->type) {
1259          case TOK_STRING:
1260          case TOK_XMLATTR:
1261          case TOK_XMLSPACE:
1262          case TOK_XMLTEXT:
1263          case TOK_XMLCOMMENT:
1264          case TOK_XMLCDATA:
1265          case TOK_XMLPI:
1266            break;
1267          default:
1268            return false;
1269        }
1270    }
1271    return true;
1272}
1273#endif
1274
1275bool
1276TokenStream::putIdentInTokenbuf(const jschar *identStart)
1277{
1278    int32_t c, qc;
1279    const jschar *tmp = userbuf.addressOfNextRawChar();
1280    userbuf.setAddressOfNextRawChar(identStart);
1281
1282    tokenbuf.clear();
1283    for (;;) {
1284        c = getCharIgnoreEOL();
1285        if (!IsIdentifierPart(c)) {
1286            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1287                break;
1288            c = qc;
1289        }
1290        if (!tokenbuf.append(c)) {
1291            userbuf.setAddressOfNextRawChar(tmp);
1292            return false;
1293        }
1294    }
1295    userbuf.setAddressOfNextRawChar(tmp);
1296    return true;
1297}
1298
1299bool
1300TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp)
1301{
1302    JS_ASSERT(!ttp == !topp);
1303
1304    const KeywordInfo *kw = FindKeyword(s, length);
1305    if (!kw)
1306        return true;
1307
1308    if (kw->tokentype == TOK_RESERVED) {
1309        return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1310                                        JSMSG_RESERVED_ID, kw->chars);
1311    }
1312
1313    if (kw->tokentype != TOK_STRICT_RESERVED) {
1314        if (kw->version <= versionNumber()) {
1315            /* Working keyword. */
1316            if (ttp) {
1317                *ttp = kw->tokentype;
1318                *topp = (JSOp) kw->op;
1319                return true;
1320            }
1321            return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1322                                            JSMSG_RESERVED_ID, kw->chars);
1323        }
1324
1325        /*
1326         * The keyword is not in this version. Treat it as an identifier,
1327         * unless it is let or yield which we treat as TOK_STRICT_RESERVED by
1328         * falling through to the code below (ES5 forbids them in strict mode).
1329         */
1330        if (kw->tokentype != TOK_LET && kw->tokentype != TOK_YIELD)
1331            return true;
1332    }
1333
1334    /* Strict reserved word. */
1335    if (isStrictMode())
1336        return ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars);
1337    return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_STRICT | JSREPORT_WARNING,
1338                                    JSMSG_RESERVED_ID, kw->chars);
1339}
1340
1341enum FirstCharKind {
1342    Other,
1343    OneChar,
1344    Ident,
1345    Dot,
1346    Equals,
1347    String,
1348    Dec,
1349    Colon,
1350    Plus,
1351    HexOct,
1352
1353    /* These two must be last, so that |c >= Space| matches both. */
1354    Space,
1355    EOL
1356};
1357
1358#define _______ Other
1359
1360/*
1361 * OneChar: 40, 41, 44, 59, 63, 91, 93, 123, 125: '(', ')', ',', ';', '?', '[', ']', '{', '}'
1362 * Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1363 * Dot:     46: '.'
1364 * Equals:  61: '='
1365 * String:  34, 39: '"', '\''
1366 * Dec:     49..57: '1'..'9'
1367 * Colon:   58: ':'
1368 * Plus:    43: '+'
1369 * HexOct:  48: '0'
1370 * Space:   9, 11, 12: '\t', '\v', '\f'
1371 * EOL:     10, 13: '\n', '\r'
1372 */
1373static const uint8_t firstCharKinds[] = {
1374/*         0        1        2        3        4        5        6        7        8        9    */
1375/*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
1376/*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
1377/*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1378/*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
1379/*  40+ */ OneChar, OneChar, _______,    Plus, OneChar, _______,     Dot, _______,  HexOct,     Dec,
1380/*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,   Colon, OneChar,
1381/*  60+ */ _______,  Equals, _______, OneChar, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
1382/*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1383/*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1384/*  90+ */   Ident, OneChar, _______, OneChar, _______,   Ident, _______,   Ident,   Ident,   Ident,
1385/* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1386/* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1387/* 120+ */   Ident,   Ident,   Ident, OneChar, _______, OneChar, _______, _______
1388};
1389
1390#undef _______
1391
1392TokenKind
1393TokenStream::getTokenInternal()
1394{
1395    TokenKind tt;
1396    int c, qc;
1397    Token *tp;
1398    FirstCharKind c1kind;
1399    const jschar *numStart;
1400    bool hasFracOrExp;
1401    const jschar *identStart;
1402    bool hadUnicodeEscape;
1403
1404#if JS_HAS_XML_SUPPORT
1405    /*
1406     * Look for XML text and tags.
1407     */
1408    if (flags & (TSF_XMLTEXTMODE|TSF_XMLTAGMODE)) {
1409        if (!getXMLTextOrTag(&tt, &tp))
1410            goto error;
1411        goto out;
1412    }
1413#endif
1414
1415  retry:
1416    if (JS_UNLIKELY(!userbuf.hasRawChars())) {
1417        tp = newToken(0);
1418        tt = TOK_EOF;
1419        flags |= TSF_EOF;
1420        goto out;
1421    }
1422
1423    c = userbuf.getRawChar();
1424    JS_ASSERT(c != EOF);
1425
1426    /*
1427     * Chars not in the range 0..127 are rare.  Getting them out of the way
1428     * early allows subsequent checking to be faster.
1429     */
1430    if (JS_UNLIKELY(c >= 128)) {
1431        if (IsSpaceOrBOM2(c)) {
1432            if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1433                updateLineInfoForEOL();
1434                updateFlagsForEOL();
1435            }
1436
1437            goto retry;
1438        }
1439
1440        tp = newToken(-1);
1441
1442        /* '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. */
1443        JS_STATIC_ASSERT('$' < 128 && '_' < 128);
1444        if (IsLetter(c)) {
1445            identStart = userbuf.addressOfNextRawChar() - 1;
1446            hadUnicodeEscape = false;
1447            goto identifier;
1448        }
1449
1450        goto badchar;
1451    }
1452
1453    /*
1454     * Get the token kind, based on the first char.  The ordering of c1kind
1455     * comparison is based on the frequency of tokens in real code.  Minified
1456     * and non-minified code have different characteristics, mostly in that
1457     * whitespace occurs much less in minified code.  Token kinds that fall in
1458     * the 'Other' category typically account for less than 2% of all tokens,
1459     * so their order doesn't matter much.
1460     */
1461    c1kind = FirstCharKind(firstCharKinds[c]);
1462
1463    /*
1464     * Skip over whitespace chars;  update line state on EOLs.  Even though
1465     * whitespace isn't very common in minified code we have to handle it first
1466     * (and jump back to 'retry') before calling newToken().
1467     */
1468    if (c1kind >= Space) {
1469        if (c1kind == EOL) {
1470            /* If it's a \r\n sequence: treat as a single EOL, skip over the \n. */
1471            if (c == '\r' && userbuf.hasRawChars())
1472                userbuf.matchRawChar('\n');
1473            updateLineInfoForEOL();
1474            updateFlagsForEOL();
1475        }
1476        goto retry;
1477    }
1478
1479    tp = newToken(-1);
1480
1481    /*
1482     * Look for an unambiguous single-char token.
1483     */
1484    if (c1kind == OneChar) {
1485        tt = (TokenKind)oneCharTokens[c];
1486        goto out;
1487    }
1488
1489    /*
1490     * Look for an identifier.
1491     */
1492    if (c1kind == Ident) {
1493        identStart = userbuf.addressOfNextRawChar() - 1;
1494        hadUnicodeEscape = false;
1495
1496      identifier:
1497        for (;;) {
1498            c = getCharIgnoreEOL();
1499            if (c == EOF)
1500                break;
1501            if (!IsIdentifierPart(c)) {
1502                if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1503                    break;
1504                hadUnicodeEscape = true;
1505            }
1506        }
1507        ungetCharIgnoreEOL(c);
1508
1509        /* Convert the escapes by putting into tokenbuf. */
1510        if (hadUnicodeEscape && !putIdentInTokenbuf(identStart))
1511            goto error;
1512
1513        /* Check for keywords unless parser asks us to ignore keywords. */
1514        if (!(flags & TSF_KEYWORD_IS_NAME)) {
1515            const jschar *chars;
1516            size_t length;
1517            if (hadUnicodeEscape) {
1518                chars = tokenbuf.begin();
1519                length = tokenbuf.length();
1520            } else {
1521                chars = identStart;
1522                length = userbuf.addressOfNextRawChar() - identStart;
1523            }
1524            tt = TOK_NAME;
1525            if (!checkForKeyword(chars, length, &tt, &tp->t_op))
1526                goto error;
1527            if (tt != TOK_NAME)
1528                goto out;
1529        }
1530
1531        /*
1532         * Identifiers containing no Unicode escapes can be atomized directly
1533         * from userbuf.  The rest must use the escapes converted via
1534         * tokenbuf before atomizing.
1535         */
1536        JSAtom *atom;
1537        if (!hadUnicodeEscape)
1538            atom = js_AtomizeChars(cx, identStart, userbuf.addressOfNextRawChar() - identStart);
1539        else
1540            atom = atomize(cx, tokenbuf);
1541        if (!atom)
1542            goto error;
1543        tp->setName(JSOP_NAME, atom->asPropertyName());
1544        tt = TOK_NAME;
1545        goto out;
1546    }
1547
1548    if (c1kind == Dot) {
1549        c = getCharIgnoreEOL();
1550        if (JS7_ISDEC(c)) {
1551            numStart = userbuf.addressOfNextRawChar() - 2;
1552            goto decimal_dot;
1553        }
1554#if JS_HAS_XML_SUPPORT
1555        if (c == '.') {
1556            tt = TOK_DBLDOT;
1557            goto out;
1558        }
1559#endif
1560        ungetCharIgnoreEOL(c);
1561        tt = TOK_DOT;
1562        goto out;
1563    }
1564
1565    if (c1kind == Equals) {
1566        if (matchChar('=')) {
1567            if (matchChar('=')) {
1568                tp->t_op = JSOP_STRICTEQ;
1569                tt = TOK_STRICTEQ;
1570            } else {
1571                tp->t_op = JSOP_EQ;
1572                tt = TOK_EQ;
1573            }
1574        } else {
1575            tp->t_op = JSOP_NOP;
1576            tt = TOK_ASSIGN;
1577        }
1578        goto out;
1579    }
1580
1581    /*
1582     * Look for a string.
1583     */
1584    if (c1kind == String) {
1585        qc = c;
1586        tokenbuf.clear();
1587        while (true) {
1588            /*
1589             * We need to detect any of these chars:  " or ', \n (or its
1590             * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
1591             * similar to maybeEOL[], see above.  Because we detect EOL
1592             * sequences here and put them back immediately, we can use
1593             * getCharIgnoreEOL().
1594             */
1595            c = getCharIgnoreEOL();
1596            if (maybeStrSpecial[c & 0xff]) {
1597                if (c == qc)
1598                    break;
1599                if (c == '\\') {
1600                    switch (c = getChar()) {
1601                      case 'b': c = '\b'; break;
1602                      case 'f': c = '\f'; break;
1603                      case 'n': c = '\n'; break;
1604                      case 'r': c = '\r'; break;
1605                      case 't': c = '\t'; break;
1606                      case 'v': c = '\v'; break;
1607
1608                      default:
1609                        if ('0' <= c && c < '8') {
1610                            int32_t val = JS7_UNDEC(c);
1611
1612                            c = peekChar();
1613                            /* Strict mode code allows only \0, then a non-digit. */
1614                            if (val != 0 || JS7_ISDEC(c)) {
1615                                if (!ReportStrictModeError(cx, this, NULL, NULL,
1616                                                           JSMSG_DEPRECATED_OCTAL)) {
1617                                    goto error;
1618                                }
1619                                setOctalCharacterEscape();
1620                            }
1621                            if ('0' <= c && c < '8') {
1622                                val = 8 * val + JS7_UNDEC(c);
1623                                getChar();
1624                                c = peekChar();
1625                                if ('0' <= c && c < '8') {
1626                                    int32_t save = val;
1627                                    val = 8 * val + JS7_UNDEC(c);
1628                                    if (val <= 0377)
1629                                        getChar();
1630                                    else
1631                                        val = save;
1632                                }
1633                            }
1634
1635                            c = (jschar)val;
1636                        } else if (c == 'u') {
1637                            jschar cp[4];
1638                            if (peekChars(4, cp) &&
1639                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1640                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1641                                c = (((((JS7_UNHEX(cp[0]) << 4)
1642                            

Large files files are truncated, but you can click here to view the full file