PageRenderTime 182ms CodeModel.GetById 14ms app.highlight 151ms RepoModel.GetById 2ms app.codeStats 0ms

/js/src/frontend/TokenStream.cpp

http://github.com/zpao/v8monkey
C++ | 2284 lines | 2005 code | 109 blank | 170 comment | 346 complexity | d59dae5f9466e64230357dc615a50a25 MD5 | raw file
   1/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2 * vim: set ts=8 sw=4 et tw=99:
   3 *
   4 * ***** BEGIN LICENSE BLOCK *****
   5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version
   8 * 1.1 (the "License"); you may not use this file except in compliance with
   9 * the License. You may obtain a copy of the License at
  10 * http://www.mozilla.org/MPL/
  11 *
  12 * Software distributed under the License is distributed on an "AS IS" basis,
  13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14 * for the specific language governing rights and limitations under the
  15 * License.
  16 *
  17 * The Original Code is Mozilla Communicator client code, released
  18 * March 31, 1998.
  19 *
  20 * The Initial Developer of the Original Code is
  21 * Netscape Communications Corporation.
  22 * Portions created by the Initial Developer are Copyright (C) 1998
  23 * the Initial Developer. All Rights Reserved.
  24 *
  25 * Contributor(s):
  26 *   Nick Fitzgerald <nfitzgerald@mozilla.com>
  27 *
  28 * Alternatively, the contents of this file may be used under the terms of
  29 * either of the GNU General Public License Version 2 or later (the "GPL"),
  30 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  31 * in which case the provisions of the GPL or the LGPL are applicable instead
  32 * of those above. If you wish to allow use of your version of this file only
  33 * under the terms of either the GPL or the LGPL, and not to allow others to
  34 * use your version of this file under the terms of the MPL, indicate your
  35 * decision by deleting the provisions above and replace them with the notice
  36 * and other provisions required by the GPL or the LGPL. If you do not delete
  37 * the provisions above, a recipient may use your version of this file under
  38 * the terms of any one of the MPL, the GPL or the LGPL.
  39 *
  40 * ***** END LICENSE BLOCK ***** */
  41
  42/*
  43 * JS lexical scanner.
  44 */
  45#include <stdio.h>      /* first to avoid trouble on some systems */
  46#include <errno.h>
  47#include <limits.h>
  48#include <math.h>
  49#ifdef HAVE_MEMORY_H
  50#include <memory.h>
  51#endif
  52#include <stdarg.h>
  53#include <stdlib.h>
  54#include <string.h>
  55#include "jstypes.h"
  56#include "jsutil.h"
  57#include "jsprf.h"
  58#include "jsapi.h"
  59#include "jsatom.h"
  60#include "jscntxt.h"
  61#include "jsversion.h"
  62#include "jsexn.h"
  63#include "jsnum.h"
  64#include "jsopcode.h"
  65#include "jsscript.h"
  66
  67#include "frontend/BytecodeEmitter.h"
  68#include "frontend/Parser.h"
  69#include "frontend/TokenStream.h"
  70#include "vm/RegExpObject.h"
  71
  72#include "jsscriptinlines.h"
  73
  74#if JS_HAS_XML_SUPPORT
  75#include "jsxml.h"
  76#endif
  77
  78using namespace js;
  79using namespace js::unicode;
  80
  81#define JS_KEYWORD(keyword, type, op, version) \
  82    const char js_##keyword##_str[] = #keyword;
  83#include "jskeyword.tbl"
  84#undef JS_KEYWORD
  85
  86static const KeywordInfo keywords[] = {
  87#define JS_KEYWORD(keyword, type, op, version) \
  88    {js_##keyword##_str, type, op, version},
  89#include "jskeyword.tbl"
  90#undef JS_KEYWORD
  91};
  92
  93const KeywordInfo *
  94js::FindKeyword(const jschar *s, size_t length)
  95{
  96    JS_ASSERT(length != 0);
  97
  98    register size_t i;
  99    const struct KeywordInfo *kw;
 100    const char *chars;
 101
 102#define JSKW_LENGTH()           length
 103#define JSKW_AT(column)         s[column]
 104#define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
 105#define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
 106#define JSKW_NO_MATCH()         goto no_match;
 107#include "jsautokw.h"
 108#undef JSKW_NO_MATCH
 109#undef JSKW_TEST_GUESS
 110#undef JSKW_GOT_MATCH
 111#undef JSKW_AT
 112#undef JSKW_LENGTH
 113
 114  got_match:
 115    return &keywords[i];
 116
 117  test_guess:
 118    kw = &keywords[i];
 119    chars = kw->chars;
 120    do {
 121        if (*s++ != (unsigned char)(*chars++))
 122            goto no_match;
 123    } while (--length != 0);
 124    return kw;
 125
 126  no_match:
 127    return NULL;
 128}
 129
 130JSBool
 131js::IsIdentifier(JSLinearString *str)
 132{
 133    const jschar *chars = str->chars();
 134    size_t length = str->length();
 135
 136    if (length == 0)
 137        return JS_FALSE;
 138    jschar c = *chars;
 139    if (!IsIdentifierStart(c))
 140        return JS_FALSE;
 141    const jschar *end = chars + length;
 142    while (++chars != end) {
 143        c = *chars;
 144        if (!IsIdentifierPart(c))
 145            return JS_FALSE;
 146    }
 147    return JS_TRUE;
 148}
 149
 150#ifdef _MSC_VER
 151#pragma warning(push)
 152#pragma warning(disable:4351)
 153#endif
 154
 155/* Initialize members that aren't initialized in |init|. */
 156TokenStream::TokenStream(JSContext *cx, JSPrincipals *prin, JSPrincipals *originPrin)
 157  : tokens(), cursor(), lookahead(), flags(), listenerTSData(), tokenbuf(cx),
 158    cx(cx), originPrincipals(originPrin ? originPrin : prin)
 159{
 160    if (originPrincipals)
 161        JSPRINCIPALS_HOLD(cx, originPrincipals);
 162}
 163
 164#ifdef _MSC_VER
 165#pragma warning(pop)
 166#endif
 167
 168bool
 169TokenStream::init(const jschar *base, size_t length, const char *fn, uintN ln, JSVersion v)
 170{
 171    filename = fn;
 172    lineno = ln;
 173    version = v;
 174    xml = VersionHasXML(v);
 175
 176    userbuf.init(base, length);
 177    linebase = base;
 178    prevLinebase = NULL;
 179    sourceMap = NULL;
 180
 181    JSSourceHandler listener = cx->debugHooks->sourceHandler;
 182    void *listenerData = cx->debugHooks->sourceHandlerData;
 183
 184    if (listener)
 185        listener(fn, ln, base, length, &listenerTSData, listenerData);
 186
 187    /*
 188     * This table holds all the token kinds that satisfy these properties:
 189     * - A single char long.
 190     * - Cannot be a prefix of any longer token (eg. '+' is excluded because
 191     *   '+=' is a valid token).
 192     * - Doesn't need tp->t_op set (eg. this excludes '~').
 193     *
 194     * The few token kinds satisfying these properties cover roughly 35--45%
 195     * of the tokens seen in practice.
 196     *
 197     * Nb: oneCharTokens, maybeEOL and maybeStrSpecial could be static, but
 198     * initializing them this way is a bit easier.  Don't worry, the time to
 199     * initialize them for each TokenStream is trivial.  See bug 639420.
 200     */
 201    memset(oneCharTokens, 0, sizeof(oneCharTokens));
 202    oneCharTokens[unsigned(';')] = TOK_SEMI;
 203    oneCharTokens[unsigned(',')] = TOK_COMMA;
 204    oneCharTokens[unsigned('?')] = TOK_HOOK;
 205    oneCharTokens[unsigned('[')] = TOK_LB;
 206    oneCharTokens[unsigned(']')] = TOK_RB;
 207    oneCharTokens[unsigned('{')] = TOK_LC;
 208    oneCharTokens[unsigned('}')] = TOK_RC;
 209    oneCharTokens[unsigned('(')] = TOK_LP;
 210    oneCharTokens[unsigned(')')] = TOK_RP;
 211
 212    /* See getChar() for an explanation of maybeEOL[]. */
 213    memset(maybeEOL, 0, sizeof(maybeEOL));
 214    maybeEOL[unsigned('\n')] = true;
 215    maybeEOL[unsigned('\r')] = true;
 216    maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
 217    maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
 218
 219    /* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
 220    memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
 221    maybeStrSpecial[unsigned('"')] = true;
 222    maybeStrSpecial[unsigned('\'')] = true;
 223    maybeStrSpecial[unsigned('\\')] = true;
 224    maybeStrSpecial[unsigned('\n')] = true;
 225    maybeStrSpecial[unsigned('\r')] = true;
 226    maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
 227    maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
 228    maybeStrSpecial[unsigned(EOF & 0xff)] = true;
 229
 230    /*
 231     * Set |ln| as the beginning line number of the ungot "current token", so
 232     * that js::Parser::statements (and potentially other such methods, in the
 233     * future) can create parse nodes with good source coordinates before they
 234     * explicitly get any tokens.
 235     *
 236     * Switching the parser/lexer so we always get the next token ahead of the
 237     * parser needing it (the so-called "pump-priming" model) might be a better
 238     * way to address the dependency from statements on the current token.
 239     */
 240    tokens[0].pos.begin.lineno = tokens[0].pos.end.lineno = ln;
 241    return true;
 242}
 243
 244TokenStream::~TokenStream()
 245{
 246    if (flags & TSF_OWNFILENAME)
 247        cx->free_((void *) filename);
 248    if (sourceMap)
 249        cx->free_(sourceMap);
 250    if (originPrincipals)
 251        JSPRINCIPALS_DROP(cx, originPrincipals);
 252}
 253
 254/* Use the fastest available getc. */
 255#if defined(HAVE_GETC_UNLOCKED)
 256# define fast_getc getc_unlocked
 257#elif defined(HAVE__GETC_NOLOCK)
 258# define fast_getc _getc_nolock
 259#else
 260# define fast_getc getc
 261#endif
 262
 263JS_ALWAYS_INLINE void
 264TokenStream::updateLineInfoForEOL()
 265{
 266    prevLinebase = linebase;
 267    linebase = userbuf.addressOfNextRawChar();
 268    lineno++;
 269}
 270
 271JS_ALWAYS_INLINE void
 272TokenStream::updateFlagsForEOL()
 273{
 274    flags &= ~TSF_DIRTYLINE;
 275    flags |= TSF_EOL;
 276}
 277
 278/* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
 279int32_t
 280TokenStream::getChar()
 281{
 282    int32_t c;
 283    if (JS_LIKELY(userbuf.hasRawChars())) {
 284        c = userbuf.getRawChar();
 285
 286        /*
 287         * Normalize the jschar if it was a newline.  We need to detect any of
 288         * these four characters:  '\n' (0x000a), '\r' (0x000d),
 289         * LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029).  Testing for each
 290         * one in turn is slow, so we use a single probabilistic check, and if
 291         * that succeeds, test for them individually.
 292         *
 293         * We use the bottom 8 bits to index into a lookup table, succeeding
 294         * when d&0xff is 0xa, 0xd, 0x28 or 0x29.  Among ASCII chars (which
 295         * are by the far the most common) this gives false positives for '('
 296         * (0x0028) and ')' (0x0029).  We could avoid those by incorporating
 297         * the 13th bit of d into the lookup, but that requires extra shifting
 298         * and masking and isn't worthwhile.  See TokenStream::init() for the
 299         * initialization of the relevant entries in the table.
 300         */
 301        if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
 302            if (c == '\n')
 303                goto eol;
 304            if (c == '\r') {
 305                /* if it's a \r\n sequence: treat as a single EOL, skip over the \n */
 306                if (userbuf.hasRawChars())
 307                    userbuf.matchRawChar('\n');
 308                goto eol;
 309            }
 310            if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
 311                goto eol;
 312        }
 313        return c;
 314    }
 315
 316    flags |= TSF_EOF;
 317    return EOF;
 318
 319  eol:
 320    updateLineInfoForEOL();
 321    return '\n';
 322}
 323
 324/*
 325 * This gets the next char. It does nothing special with EOL sequences, not
 326 * even updating the line counters.  It can be used safely if (a) the
 327 * resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
 328 * it's an EOL, and (b) the line-related state (lineno, linebase) is not used
 329 * before it's ungotten.
 330 */
 331int32_t
 332TokenStream::getCharIgnoreEOL()
 333{
 334    if (JS_LIKELY(userbuf.hasRawChars()))
 335        return userbuf.getRawChar();
 336
 337    flags |= TSF_EOF;
 338    return EOF;
 339}
 340
 341void
 342TokenStream::ungetChar(int32_t c)
 343{
 344    if (c == EOF)
 345        return;
 346    JS_ASSERT(!userbuf.atStart());
 347    userbuf.ungetRawChar();
 348    if (c == '\n') {
 349#ifdef DEBUG
 350        int32_t c2 = userbuf.peekRawChar();
 351        JS_ASSERT(TokenBuf::isRawEOLChar(c2));
 352#endif
 353
 354        /* if it's a \r\n sequence, also unget the \r */
 355        if (!userbuf.atStart())
 356            userbuf.matchRawCharBackwards('\r');
 357
 358        JS_ASSERT(prevLinebase);    /* we should never get more than one EOL char */
 359        linebase = prevLinebase;
 360        prevLinebase = NULL;
 361        lineno--;
 362    } else {
 363        JS_ASSERT(userbuf.peekRawChar() == c);
 364    }
 365}
 366
 367void
 368TokenStream::ungetCharIgnoreEOL(int32_t c)
 369{
 370    if (c == EOF)
 371        return;
 372    JS_ASSERT(!userbuf.atStart());
 373    userbuf.ungetRawChar();
 374}
 375
 376/*
 377 * Return true iff |n| raw characters can be read from this without reading past
 378 * EOF or a newline, and copy those characters into |cp| if so.  The characters
 379 * are not consumed: use skipChars(n) to do so after checking that the consumed
 380 * characters had appropriate values.
 381 */
 382bool
 383TokenStream::peekChars(intN n, jschar *cp)
 384{
 385    intN i, j;
 386    int32_t c;
 387
 388    for (i = 0; i < n; i++) {
 389        c = getCharIgnoreEOL();
 390        if (c == EOF)
 391            break;
 392        if (c == '\n') {
 393            ungetCharIgnoreEOL(c);
 394            break;
 395        }
 396        cp[i] = (jschar)c;
 397    }
 398    for (j = i - 1; j >= 0; j--)
 399        ungetCharIgnoreEOL(cp[j]);
 400    return i == n;
 401}
 402
 403const jschar *
 404TokenStream::TokenBuf::findEOL()
 405{
 406    const jschar *tmp = ptr;
 407#ifdef DEBUG
 408    /*
 409     * This is the one exception to the "TokenBuf isn't accessed after
 410     * poisoning" rule -- we may end up calling findEOL() in order to set up
 411     * an error.
 412     */
 413    if (!tmp)
 414        tmp = ptrWhenPoisoned;
 415#endif
 416
 417    while (true) {
 418        if (tmp >= limit)
 419            break;
 420        if (TokenBuf::isRawEOLChar(*tmp++))
 421            break;
 422    }
 423    return tmp;
 424}
 425
 426bool
 427TokenStream::reportCompileErrorNumberVA(ParseNode *pn, uintN flags, uintN errorNumber, va_list ap)
 428{
 429    JSErrorReport report;
 430    char *message;
 431    jschar *linechars;
 432    char *linebytes;
 433    bool warning;
 434    JSBool ok;
 435    const TokenPos *tp;
 436    uintN i;
 437
 438    if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
 439        return true;
 440
 441    warning = JSREPORT_IS_WARNING(flags);
 442    if (warning && cx->hasWErrorOption()) {
 443        flags &= ~JSREPORT_WARNING;
 444        warning = false;
 445    }
 446
 447    PodZero(&report);
 448    report.flags = flags;
 449    report.errorNumber = errorNumber;
 450    message = NULL;
 451    linechars = NULL;
 452    linebytes = NULL;
 453
 454    MUST_FLOW_THROUGH("out");
 455    ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
 456                                 errorNumber, &message, &report,
 457                                 !(flags & JSREPORT_UC), ap);
 458    if (!ok) {
 459        warning = false;
 460        goto out;
 461    }
 462
 463    report.filename = filename;
 464    report.originPrincipals = originPrincipals;
 465
 466    tp = pn ? &pn->pn_pos : &currentToken().pos;
 467    report.lineno = tp->begin.lineno;
 468
 469    /*
 470     * Given a token, T, that we want to complain about: if T's (starting)
 471     * lineno doesn't match TokenStream's lineno, that means we've scanned past
 472     * the line that T starts on, which makes it hard to print some or all of
 473     * T's (starting) line for context.
 474     *
 475     * So we don't even try, leaving report.linebuf and friends zeroed.  This
 476     * means that any error involving a multi-line token (eg. an unterminated
 477     * multi-line string literal) won't have a context printed.
 478     */
 479    if (report.lineno == lineno) {
 480        size_t linelength = userbuf.findEOL() - linebase;
 481
 482        linechars = (jschar *)cx->malloc_((linelength + 1) * sizeof(jschar));
 483        if (!linechars) {
 484            warning = false;
 485            goto out;
 486        }
 487        PodCopy(linechars, linebase, linelength);
 488        linechars[linelength] = 0;
 489        linebytes = DeflateString(cx, linechars, linelength);
 490        if (!linebytes) {
 491            warning = false;
 492            goto out;
 493        }
 494
 495        /* Unicode and char versions of the offending source line, without final \n */
 496        report.linebuf = linebytes;
 497        report.uclinebuf = linechars;
 498
 499        /* The lineno check above means we should only see single-line tokens here. */
 500        JS_ASSERT(tp->begin.lineno == tp->end.lineno);
 501        report.tokenptr = report.linebuf + tp->begin.index;
 502        report.uctokenptr = report.uclinebuf + tp->begin.index;
 503    }
 504
 505    /*
 506     * If there's a runtime exception type associated with this error
 507     * number, set that as the pending exception.  For errors occuring at
 508     * compile time, this is very likely to be a JSEXN_SYNTAXERR.
 509     *
 510     * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
 511     * flag will be set in report.flags.  Proper behavior for an error
 512     * reporter is to ignore a report with this flag for all but top-level
 513     * compilation errors.  The exception will remain pending, and so long
 514     * as the non-top-level "load", "eval", or "compile" native function
 515     * returns false, the top-level reporter will eventually receive the
 516     * uncaught exception report.
 517     */
 518    if (!js_ErrorToException(cx, message, &report, NULL, NULL)) {
 519        /*
 520         * If debugErrorHook is present then we give it a chance to veto
 521         * sending the error on to the regular error reporter.
 522         */
 523        bool reportError = true;
 524        if (JSDebugErrorHook hook = cx->debugHooks->debugErrorHook)
 525            reportError = hook(cx, message, &report, cx->debugHooks->debugErrorHookData);
 526
 527        /* Report the error */
 528        if (reportError && cx->errorReporter)
 529            cx->errorReporter(cx, message, &report);
 530    }
 531
 532  out:
 533    if (linebytes)
 534        cx->free_(linebytes);
 535    if (linechars)
 536        cx->free_(linechars);
 537    if (message)
 538        cx->free_(message);
 539    if (report.ucmessage)
 540        cx->free_((void *)report.ucmessage);
 541
 542    if (report.messageArgs) {
 543        if (!(flags & JSREPORT_UC)) {
 544            i = 0;
 545            while (report.messageArgs[i])
 546                cx->free_((void *)report.messageArgs[i++]);
 547        }
 548        cx->free_((void *)report.messageArgs);
 549    }
 550
 551    return warning;
 552}
 553
 554bool
 555js::ReportStrictModeError(JSContext *cx, TokenStream *ts, TreeContext *tc, ParseNode *pn,
 556                          uintN errorNumber, ...)
 557{
 558    JS_ASSERT(ts || tc);
 559    JS_ASSERT(cx == ts->getContext());
 560
 561    /* In strict mode code, this is an error, not merely a warning. */
 562    uintN flags;
 563    if ((ts && ts->isStrictMode()) || (tc && (tc->flags & TCF_STRICT_MODE_CODE))) {
 564        flags = JSREPORT_ERROR;
 565    } else {
 566        if (!cx->hasStrictOption())
 567            return true;
 568        flags = JSREPORT_WARNING;
 569    }
 570
 571    va_list ap;
 572    va_start(ap, errorNumber);
 573    bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 574    va_end(ap);
 575
 576    return result;
 577}
 578
 579bool
 580js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, ParseNode *pn, uintN flags,
 581                             uintN errorNumber, ...)
 582{
 583    va_list ap;
 584
 585    /*
 586     * We don't accept a TreeContext argument, so we can't implement
 587     * JSREPORT_STRICT_MODE_ERROR here.  Use ReportStrictModeError instead,
 588     * or do the checks in the caller and pass plain old JSREPORT_ERROR.
 589     */
 590    JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
 591
 592    va_start(ap, errorNumber);
 593    JS_ASSERT(cx == ts->getContext());
 594    bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 595    va_end(ap);
 596
 597    return result;
 598}
 599
 600#if JS_HAS_XML_SUPPORT
 601
 602bool
 603TokenStream::getXMLEntity()
 604{
 605    ptrdiff_t offset, length, i;
 606    int c, d;
 607    JSBool ispair;
 608    jschar *bp, digit;
 609    char *bytes;
 610    JSErrNum msg;
 611
 612    CharBuffer &tb = tokenbuf;
 613
 614    /* Put the entity, including the '&' already scanned, in tokenbuf. */
 615    offset = tb.length();
 616    if (!tb.append('&'))
 617        return false;
 618    while ((c = getChar()) != ';') {
 619        if (c == EOF || c == '\n') {
 620            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
 621            return false;
 622        }
 623        if (!tb.append(c))
 624            return false;
 625    }
 626
 627    /* Let length be the number of jschars after the '&', including the ';'. */
 628    length = tb.length() - offset;
 629    bp = tb.begin() + offset;
 630    c = d = 0;
 631    ispair = false;
 632    if (length > 2 && bp[1] == '#') {
 633        /* Match a well-formed XML Character Reference. */
 634        i = 2;
 635        if (length > 3 && (bp[i] == 'x' || bp[i] == 'X')) {
 636            if (length > 9)     /* at most 6 hex digits allowed */
 637                goto badncr;
 638            while (++i < length) {
 639                digit = bp[i];
 640                if (!JS7_ISHEX(digit))
 641                    goto badncr;
 642                c = (c << 4) + JS7_UNHEX(digit);
 643            }
 644        } else {
 645            while (i < length) {
 646                digit = bp[i++];
 647                if (!JS7_ISDEC(digit))
 648                    goto badncr;
 649                c = (c * 10) + JS7_UNDEC(digit);
 650                if (c < 0)
 651                    goto badncr;
 652            }
 653        }
 654
 655        if (0x10000 <= c && c <= 0x10FFFF) {
 656            /* Form a surrogate pair (c, d) -- c is the high surrogate. */
 657            d = 0xDC00 + (c & 0x3FF);
 658            c = 0xD7C0 + (c >> 10);
 659            ispair = true;
 660        } else {
 661            /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
 662            if (c != 0x9 && c != 0xA && c != 0xD &&
 663                !(0x20 <= c && c <= 0xD7FF) &&
 664                !(0xE000 <= c && c <= 0xFFFD)) {
 665                goto badncr;
 666            }
 667        }
 668    } else {
 669        /* Try to match one of the five XML 1.0 predefined entities. */
 670        switch (length) {
 671          case 3:
 672            if (bp[2] == 't') {
 673                if (bp[1] == 'l')
 674                    c = '<';
 675                else if (bp[1] == 'g')
 676                    c = '>';
 677            }
 678            break;
 679          case 4:
 680            if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
 681                c = '&';
 682            break;
 683          case 5:
 684            if (bp[3] == 'o') {
 685                if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
 686                    c = '\'';
 687                else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
 688                    c = '"';
 689            }
 690            break;
 691        }
 692        if (c == 0) {
 693            msg = JSMSG_UNKNOWN_XML_ENTITY;
 694            goto bad;
 695        }
 696    }
 697
 698    /* If we matched, retract tokenbuf and store the entity's value. */
 699    *bp++ = (jschar) c;
 700    if (ispair)
 701        *bp++ = (jschar) d;
 702    tb.shrinkBy(tb.end() - bp);
 703    return true;
 704
 705  badncr:
 706    msg = JSMSG_BAD_XML_NCR;
 707  bad:
 708    /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
 709    JS_ASSERT((tb.end() - bp) >= 1);
 710    bytes = DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
 711    if (bytes) {
 712        ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
 713        cx->free_(bytes);
 714    }
 715    return false;
 716}
 717
 718bool
 719TokenStream::getXMLTextOrTag(TokenKind *ttp, Token **tpp)
 720{
 721    TokenKind tt;
 722    int c, qc;
 723    Token *tp;
 724    JSAtom *atom;
 725
 726    /*
 727     * Look for XML text.
 728     */
 729    if (flags & TSF_XMLTEXTMODE) {
 730        tt = TOK_XMLSPACE;      /* veto if non-space, return TOK_XMLTEXT */
 731        tp = newToken(0);
 732        tokenbuf.clear();
 733        qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
 734
 735        while ((c = getChar()) != qc && c != '<' && c != EOF) {
 736            if (c == '&' && qc == '<') {
 737                if (!getXMLEntity())
 738                    goto error;
 739                tt = TOK_XMLTEXT;
 740                continue;
 741            }
 742
 743            if (!IsXMLSpace(c))
 744                tt = TOK_XMLTEXT;
 745            if (!tokenbuf.append(c))
 746                goto error;
 747        }
 748        ungetChar(c);
 749
 750        if (tokenbuf.empty()) {
 751            atom = NULL;
 752        } else {
 753            atom = atomize(cx, tokenbuf);
 754            if (!atom)
 755                goto error;
 756        }
 757        tp->pos.end.lineno = lineno;
 758        tp->setAtom(JSOP_STRING, atom);
 759        goto out;
 760    }
 761
 762    /*
 763     * XML tags.
 764     */
 765    else {
 766        JS_ASSERT(flags & TSF_XMLTAGMODE);
 767        tp = newToken(0);
 768        c = getChar();
 769        if (c != EOF && IsXMLSpace(c)) {
 770            do {
 771                c = getChar();
 772                if (c == EOF)
 773                    break;
 774            } while (IsXMLSpace(c));
 775            ungetChar(c);
 776            tp->pos.end.lineno = lineno;
 777            tt = TOK_XMLSPACE;
 778            goto out;
 779        }
 780
 781        if (c == EOF) {
 782            tt = TOK_EOF;
 783            goto out;
 784        }
 785
 786        tokenbuf.clear();
 787        if (IsXMLNamespaceStart(c)) {
 788            JSBool sawColon = JS_FALSE;
 789
 790            if (!tokenbuf.append(c))
 791                goto error;
 792            while ((c = getChar()) != EOF && IsXMLNamePart(c)) {
 793                if (c == ':') {
 794                    int nextc;
 795
 796                    if (sawColon ||
 797                        (nextc = peekChar(),
 798                         ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
 799                         !IsXMLNamePart(nextc))) {
 800                        ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 801                                                 JSMSG_BAD_XML_QNAME);
 802                        goto error;
 803                    }
 804                    sawColon = JS_TRUE;
 805                }
 806
 807                if (!tokenbuf.append(c))
 808                    goto error;
 809            }
 810
 811            ungetChar(c);
 812            atom = atomize(cx, tokenbuf);
 813            if (!atom)
 814                goto error;
 815            tp->setAtom(JSOP_STRING, atom);
 816            tt = TOK_XMLNAME;
 817            goto out;
 818        }
 819
 820        switch (c) {
 821          case '{':
 822            if (flags & TSF_XMLONLYMODE)
 823                goto bad_xml_char;
 824            tt = TOK_LC;
 825            goto out;
 826
 827          case '=':
 828            tt = TOK_ASSIGN;
 829            goto out;
 830
 831          case '"':
 832          case '\'':
 833            qc = c;
 834            while ((c = getChar()) != qc) {
 835                if (c == EOF) {
 836                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 837                                             JSMSG_UNTERMINATED_STRING);
 838                    goto error;
 839                }
 840
 841                /*
 842                 * XML attribute values are double-quoted when pretty-printed,
 843                 * so escape " if it is expressed directly in a single-quoted
 844                 * attribute value.
 845                 */
 846                if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
 847                    JS_ASSERT(qc == '\'');
 848                    if (!tokenbuf.append(js_quot_entity_str,
 849                                     strlen(js_quot_entity_str)))
 850                        goto error;
 851                    continue;
 852                }
 853
 854                if (c == '&' && (flags & TSF_XMLONLYMODE)) {
 855                    if (!getXMLEntity())
 856                        goto error;
 857                    continue;
 858                }
 859
 860                if (!tokenbuf.append(c))
 861                    goto error;
 862            }
 863            atom = atomize(cx, tokenbuf);
 864            if (!atom)
 865                goto error;
 866            tp->pos.end.lineno = lineno;
 867            tp->setAtom(JSOP_STRING, atom);
 868            tt = TOK_XMLATTR;
 869            goto out;
 870
 871          case '>':
 872            tt = TOK_XMLTAGC;
 873            goto out;
 874
 875          case '/':
 876            if (matchChar('>')) {
 877                tt = TOK_XMLPTAGC;
 878                goto out;
 879            }
 880            /* FALL THROUGH */
 881
 882          bad_xml_char:
 883          default:
 884            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
 885            goto error;
 886        }
 887        JS_NOT_REACHED("getXMLTextOrTag 1");
 888    }
 889    JS_NOT_REACHED("getXMLTextOrTag 2");
 890
 891  out:
 892    *ttp = tt;
 893    *tpp = tp;
 894    return true;
 895
 896  error:
 897    *ttp = TOK_ERROR;
 898    *tpp = tp;
 899    return false;
 900}
 901
 902/*
 903 * After much testing, it's clear that Postel's advice to protocol designers
 904 * ("be liberal in what you accept, and conservative in what you send") invites
 905 * a natural-law repercussion for JS as "protocol":
 906 *
 907 * "If you are liberal in what you accept, others will utterly fail to be
 908 *  conservative in what they send."
 909 *
 910 * Which means you will get <!-- comments to end of line in the middle of .js
 911 * files, and after if conditions whose then statements are on the next line,
 912 * and other wonders.  See at least the following bugs:
 913 * - https://bugzilla.mozilla.org/show_bug.cgi?id=309242
 914 * - https://bugzilla.mozilla.org/show_bug.cgi?id=309712
 915 * - https://bugzilla.mozilla.org/show_bug.cgi?id=310993
 916 *
 917 * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan an XML
 918 * comment or CDATA literal.  Instead, we always scan <! as the start of an
 919 * HTML comment hack to end of line, used since Netscape 2 to hide script tag
 920 * content from script-unaware browsers.
 921 *
 922 * But this still leaves XML resources with certain internal structure
 923 * vulnerable to being loaded as script cross-origin, and some internal data
 924 * stolen, so for Firefox 3.5 and beyond, we reject programs whose source
 925 * consists only of XML literals. See:
 926 *
 927 * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
 928 *
 929 * The check for this is in js::frontend::CompileScript.
 930 */
 931bool
 932TokenStream::getXMLMarkup(TokenKind *ttp, Token **tpp)
 933{
 934    TokenKind tt;
 935    int c;
 936    Token *tp = *tpp;
 937
 938    /* Check for XML comment or CDATA section. */
 939    if (matchChar('!')) {
 940        tokenbuf.clear();
 941
 942        /* Scan XML comment. */
 943        if (matchChar('-')) {
 944            if (!matchChar('-'))
 945                goto bad_xml_markup;
 946            while ((c = getChar()) != '-' || !matchChar('-')) {
 947                if (c == EOF)
 948                    goto bad_xml_markup;
 949                if (!tokenbuf.append(c))
 950                    goto error;
 951            }
 952            if (!matchChar('>'))
 953                goto bad_xml_markup;
 954
 955            JSAtom *commentText = atomize(cx, tokenbuf);
 956            if (!commentText)
 957                goto error;
 958            tp->setAtom(JSOP_XMLCOMMENT, commentText);
 959            tp->pos.end.lineno = lineno;
 960            tt = TOK_XMLCOMMENT;
 961            goto out;
 962        }
 963
 964        /* Scan CDATA section. */
 965        if (matchChar('[')) {
 966            jschar cp[6];
 967            if (peekChars(6, cp) &&
 968                cp[0] == 'C' &&
 969                cp[1] == 'D' &&
 970                cp[2] == 'A' &&
 971                cp[3] == 'T' &&
 972                cp[4] == 'A' &&
 973                cp[5] == '[') {
 974                skipChars(6);
 975                while ((c = getChar()) != ']' ||
 976                       !peekChars(2, cp) ||
 977                       cp[0] != ']' ||
 978                       cp[1] != '>') {
 979                    if (c == EOF)
 980                        goto bad_xml_markup;
 981                    if (!tokenbuf.append(c))
 982                        goto error;
 983                }
 984                consumeKnownChar(']');
 985                consumeKnownChar('>');
 986
 987                JSAtom *cdataContent = atomize(cx, tokenbuf);
 988                if (!cdataContent)
 989                    goto error;
 990
 991                tp->setAtom(JSOP_XMLCDATA, cdataContent);
 992                tp->pos.end.lineno = lineno;
 993                tt = TOK_XMLCDATA;
 994                goto out;
 995            }
 996            goto bad_xml_markup;
 997        }
 998    }
 999
1000    /* Check for processing instruction. */
1001    if (matchChar('?')) {
1002        bool inTarget = true;
1003        size_t targetLength = 0;
1004        ptrdiff_t contentIndex = -1;
1005
1006        tokenbuf.clear();
1007        while ((c = getChar()) != '?' || peekChar() != '>') {
1008            if (c == EOF)
1009                goto bad_xml_markup;
1010            if (inTarget) {
1011                if (IsXMLSpace(c)) {
1012                    if (tokenbuf.empty())
1013                        goto bad_xml_markup;
1014                    inTarget = false;
1015                } else {
1016                    if (!(tokenbuf.empty()
1017                          ? IsXMLNamespaceStart(c)
1018                          : IsXMLNamespacePart(c))) {
1019                        goto bad_xml_markup;
1020                    }
1021                    ++targetLength;
1022                }
1023            } else {
1024                if (contentIndex < 0 && !IsXMLSpace(c))
1025                    contentIndex = tokenbuf.length();
1026            }
1027            if (!tokenbuf.append(c))
1028                goto error;
1029        }
1030        if (targetLength == 0)
1031            goto bad_xml_markup;
1032
1033        JSAtom *data;
1034        if (contentIndex < 0) {
1035            data = cx->runtime->atomState.emptyAtom;
1036        } else {
1037            data = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
1038                                   tokenbuf.length() - contentIndex);
1039            if (!data)
1040                goto error;
1041        }
1042        tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1043        consumeKnownChar('>');
1044        JSAtom *target = atomize(cx, tokenbuf);
1045        if (!target)
1046            goto error;
1047        tp->setProcessingInstruction(target->asPropertyName(), data);
1048        tp->pos.end.lineno = lineno;
1049        tt = TOK_XMLPI;
1050        goto out;
1051    }
1052
1053    /* An XML start-of-tag character. */
1054    tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1055
1056  out:
1057    *ttp = tt;
1058    *tpp = tp;
1059    return true;
1060
1061  bad_xml_markup:
1062    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1063  error:
1064    *ttp = TOK_ERROR;
1065    *tpp = tp;
1066    return false;
1067}
1068#endif /* JS_HAS_XML_SUPPORT */
1069
1070/*
1071 * We have encountered a '\': check for a Unicode escape sequence after it.
1072 * Return 'true' and the character code value (by value) if we found a
1073 * Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
1074 * advance along the buffer.
1075 */
1076bool
1077TokenStream::peekUnicodeEscape(int *result)
1078{
1079    jschar cp[5];
1080
1081    if (peekChars(5, cp) && cp[0] == 'u' &&
1082        JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
1083        JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
1084    {
1085        *result = (((((JS7_UNHEX(cp[1]) << 4)
1086                + JS7_UNHEX(cp[2])) << 4)
1087              + JS7_UNHEX(cp[3])) << 4)
1088            + JS7_UNHEX(cp[4]);
1089        return true;
1090    }
1091    return false;
1092}
1093
1094bool
1095TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
1096{
1097    if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
1098        skipChars(5);
1099        return true;
1100    }
1101    return false;
1102}
1103
1104bool
1105TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
1106{
1107    if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
1108        skipChars(5);
1109        return true;
1110    }
1111    return false;
1112}
1113
1114/*
1115 * Helper function which returns true if the first length(q) characters in p are
1116 * the same as the characters in q.
1117 */
1118static bool
1119CharsMatch(const jschar *p, const char *q) {
1120    while (*q) {
1121        if (*p++ != *q++)
1122            return false;
1123    }
1124    return true;
1125}
1126
1127bool
1128TokenStream::getAtLine()
1129{
1130    int c;
1131    jschar cp[5];
1132    uintN i, line, temp;
1133    char filenameBuf[1024];
1134
1135    /*
1136     * Hack for source filters such as the Mozilla XUL preprocessor:
1137     * "//@line 123\n" sets the number of the *next* line after the
1138     * comment to 123.  If we reach here, we've already seen "//".
1139     */
1140    if (peekChars(5, cp) && CharsMatch(cp, "@line")) {
1141        skipChars(5);
1142        while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1143            continue;
1144        if (JS7_ISDEC(c)) {
1145            line = JS7_UNDEC(c);
1146            while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1147                temp = 10 * line + JS7_UNDEC(c);
1148                if (temp < line) {
1149                    /* Ignore overlarge line numbers. */
1150                    return true;
1151                }
1152                line = temp;
1153            }
1154            while (c != '\n' && c != EOF && IsSpaceOrBOM2(c))
1155                c = getChar();
1156            i = 0;
1157            if (c == '"') {
1158                while ((c = getChar()) != EOF && c != '"') {
1159                    if (c == '\n') {
1160                        ungetChar(c);
1161                        return true;
1162                    }
1163                    if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1164                        return true;
1165                    filenameBuf[i++] = (char) c;
1166                }
1167                if (c == '"') {
1168                    while ((c = getChar()) != '\n' && c != EOF && IsSpaceOrBOM2(c))
1169                        continue;
1170                }
1171            }
1172            filenameBuf[i] = '\0';
1173            if (c == EOF || c == '\n') {
1174                if (i > 0) {
1175                    if (flags & TSF_OWNFILENAME)
1176                        cx->free_((void *) filename);
1177                    filename = JS_strdup(cx, filenameBuf);
1178                    if (!filename)
1179                        return false;
1180                    flags |= TSF_OWNFILENAME;
1181                }
1182                lineno = line;
1183            }
1184        }
1185        ungetChar(c);
1186    }
1187    return true;
1188}
1189
1190bool
1191TokenStream::getAtSourceMappingURL()
1192{
1193    jschar peeked[18];
1194
1195    /* Match comments of the form @sourceMappingURL=<url> */
1196    if (peekChars(18, peeked) && CharsMatch(peeked, "@sourceMappingURL=")) {
1197        skipChars(18);
1198        tokenbuf.clear();
1199
1200        jschar c;
1201        while (!IsSpaceOrBOM2((c = getChar())) &&
1202               c && c != jschar(EOF))
1203            tokenbuf.append(c);
1204
1205        if (tokenbuf.empty())
1206            /* The source map's URL was missing, but not quite an exception that
1207             * we should stop and drop everything for, though. */
1208            return true;
1209
1210        int len = tokenbuf.length();
1211
1212        if (sourceMap)
1213            cx->free_(sourceMap);
1214        sourceMap = (jschar *) cx->malloc_(sizeof(jschar) * (len + 1));
1215        if (!sourceMap)
1216            return false;
1217
1218        for (int i = 0; i < len; i++)
1219            sourceMap[i] = tokenbuf[i];
1220        sourceMap[len] = '\0';
1221    }
1222    return true;
1223}
1224
1225Token *
1226TokenStream::newToken(ptrdiff_t adjust)
1227{
1228    cursor = (cursor + 1) & ntokensMask;
1229    Token *tp = &tokens[cursor];
1230    tp->ptr = userbuf.addressOfNextRawChar() + adjust;
1231    tp->pos.begin.index = tp->ptr - linebase;
1232    tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
1233    return tp;
1234}
1235
1236JS_ALWAYS_INLINE JSAtom *
1237TokenStream::atomize(JSContext *cx, CharBuffer &cb)
1238{
1239    return js_AtomizeChars(cx, cb.begin(), cb.length());
1240}
1241
1242#ifdef DEBUG
1243bool
1244IsTokenSane(Token *tp)
1245{
1246    /*
1247     * Nb: TOK_EOL should never be used in an actual Token;  it should only be
1248     * returned as a TokenKind from peekTokenSameLine().
1249     */
1250    if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
1251        return false;
1252
1253    if (tp->pos.begin.lineno == tp->pos.end.lineno) {
1254        if (tp->pos.begin.index > tp->pos.end.index)
1255            return false;
1256    } else {
1257        /* Only certain token kinds can be multi-line. */
1258        switch (tp->type) {
1259          case TOK_STRING:
1260          case TOK_XMLATTR:
1261          case TOK_XMLSPACE:
1262          case TOK_XMLTEXT:
1263          case TOK_XMLCOMMENT:
1264          case TOK_XMLCDATA:
1265          case TOK_XMLPI:
1266            break;
1267          default:
1268            return false;
1269        }
1270    }
1271    return true;
1272}
1273#endif
1274
1275bool
1276TokenStream::putIdentInTokenbuf(const jschar *identStart)
1277{
1278    int32_t c, qc;
1279    const jschar *tmp = userbuf.addressOfNextRawChar();
1280    userbuf.setAddressOfNextRawChar(identStart);
1281
1282    tokenbuf.clear();
1283    for (;;) {
1284        c = getCharIgnoreEOL();
1285        if (!IsIdentifierPart(c)) {
1286            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1287                break;
1288            c = qc;
1289        }
1290        if (!tokenbuf.append(c)) {
1291            userbuf.setAddressOfNextRawChar(tmp);
1292            return false;
1293        }
1294    }
1295    userbuf.setAddressOfNextRawChar(tmp);
1296    return true;
1297}
1298
1299bool
1300TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp, JSOp *topp)
1301{
1302    JS_ASSERT(!ttp == !topp);
1303
1304    const KeywordInfo *kw = FindKeyword(s, length);
1305    if (!kw)
1306        return true;
1307
1308    if (kw->tokentype == TOK_RESERVED) {
1309        return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1310                                        JSMSG_RESERVED_ID, kw->chars);
1311    }
1312
1313    if (kw->tokentype != TOK_STRICT_RESERVED) {
1314        if (kw->version <= versionNumber()) {
1315            /* Working keyword. */
1316            if (ttp) {
1317                *ttp = kw->tokentype;
1318                *topp = (JSOp) kw->op;
1319                return true;
1320            }
1321            return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1322                                            JSMSG_RESERVED_ID, kw->chars);
1323        }
1324
1325        /*
1326         * The keyword is not in this version. Treat it as an identifier,
1327         * unless it is let or yield which we treat as TOK_STRICT_RESERVED by
1328         * falling through to the code below (ES5 forbids them in strict mode).
1329         */
1330        if (kw->tokentype != TOK_LET && kw->tokentype != TOK_YIELD)
1331            return true;
1332    }
1333
1334    /* Strict reserved word. */
1335    if (isStrictMode())
1336        return ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars);
1337    return ReportCompileErrorNumber(cx, this, NULL, JSREPORT_STRICT | JSREPORT_WARNING,
1338                                    JSMSG_RESERVED_ID, kw->chars);
1339}
1340
1341enum FirstCharKind {
1342    Other,
1343    OneChar,
1344    Ident,
1345    Dot,
1346    Equals,
1347    String,
1348    Dec,
1349    Colon,
1350    Plus,
1351    HexOct,
1352
1353    /* These two must be last, so that |c >= Space| matches both. */
1354    Space,
1355    EOL
1356};
1357
1358#define _______ Other
1359
1360/*
1361 * OneChar: 40, 41, 44, 59, 63, 91, 93, 123, 125: '(', ')', ',', ';', '?', '[', ']', '{', '}'
1362 * Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1363 * Dot:     46: '.'
1364 * Equals:  61: '='
1365 * String:  34, 39: '"', '\''
1366 * Dec:     49..57: '1'..'9'
1367 * Colon:   58: ':'
1368 * Plus:    43: '+'
1369 * HexOct:  48: '0'
1370 * Space:   9, 11, 12: '\t', '\v', '\f'
1371 * EOL:     10, 13: '\n', '\r'
1372 */
1373static const uint8_t firstCharKinds[] = {
1374/*         0        1        2        3        4        5        6        7        8        9    */
1375/*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
1376/*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
1377/*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1378/*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
1379/*  40+ */ OneChar, OneChar, _______,    Plus, OneChar, _______,     Dot, _______,  HexOct,     Dec,
1380/*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,   Colon, OneChar,
1381/*  60+ */ _______,  Equals, _______, OneChar, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
1382/*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1383/*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1384/*  90+ */   Ident, OneChar, _______, OneChar, _______,   Ident, _______,   Ident,   Ident,   Ident,
1385/* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1386/* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
1387/* 120+ */   Ident,   Ident,   Ident, OneChar, _______, OneChar, _______, _______
1388};
1389
1390#undef _______
1391
1392TokenKind
1393TokenStream::getTokenInternal()
1394{
1395    TokenKind tt;
1396    int c, qc;
1397    Token *tp;
1398    FirstCharKind c1kind;
1399    const jschar *numStart;
1400    bool hasFracOrExp;
1401    const jschar *identStart;
1402    bool hadUnicodeEscape;
1403
1404#if JS_HAS_XML_SUPPORT
1405    /*
1406     * Look for XML text and tags.
1407     */
1408    if (flags & (TSF_XMLTEXTMODE|TSF_XMLTAGMODE)) {
1409        if (!getXMLTextOrTag(&tt, &tp))
1410            goto error;
1411        goto out;
1412    }
1413#endif
1414
1415  retry:
1416    if (JS_UNLIKELY(!userbuf.hasRawChars())) {
1417        tp = newToken(0);
1418        tt = TOK_EOF;
1419        flags |= TSF_EOF;
1420        goto out;
1421    }
1422
1423    c = userbuf.getRawChar();
1424    JS_ASSERT(c != EOF);
1425
1426    /*
1427     * Chars not in the range 0..127 are rare.  Getting them out of the way
1428     * early allows subsequent checking to be faster.
1429     */
1430    if (JS_UNLIKELY(c >= 128)) {
1431        if (IsSpaceOrBOM2(c)) {
1432            if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1433                updateLineInfoForEOL();
1434                updateFlagsForEOL();
1435            }
1436
1437            goto retry;
1438        }
1439
1440        tp = newToken(-1);
1441
1442        /* '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. */
1443        JS_STATIC_ASSERT('$' < 128 && '_' < 128);
1444        if (IsLetter(c)) {
1445            identStart = userbuf.addressOfNextRawChar() - 1;
1446            hadUnicodeEscape = false;
1447            goto identifier;
1448        }
1449
1450        goto badchar;
1451    }
1452
1453    /*
1454     * Get the token kind, based on the first char.  The ordering of c1kind
1455     * comparison is based on the frequency of tokens in real code.  Minified
1456     * and non-minified code have different characteristics, mostly in that
1457     * whitespace occurs much less in minified code.  Token kinds that fall in
1458     * the 'Other' category typically account for less than 2% of all tokens,
1459     * so their order doesn't matter much.
1460     */
1461    c1kind = FirstCharKind(firstCharKinds[c]);
1462
1463    /*
1464     * Skip over whitespace chars;  update line state on EOLs.  Even though
1465     * whitespace isn't very common in minified code we have to handle it first
1466     * (and jump back to 'retry') before calling newToken().
1467     */
1468    if (c1kind >= Space) {
1469        if (c1kind == EOL) {
1470            /* If it's a \r\n sequence: treat as a single EOL, skip over the \n. */
1471            if (c == '\r' && userbuf.hasRawChars())
1472                userbuf.matchRawChar('\n');
1473            updateLineInfoForEOL();
1474            updateFlagsForEOL();
1475        }
1476        goto retry;
1477    }
1478
1479    tp = newToken(-1);
1480
1481    /*
1482     * Look for an unambiguous single-char token.
1483     */
1484    if (c1kind == OneChar) {
1485        tt = (TokenKind)oneCharTokens[c];
1486        goto out;
1487    }
1488
1489    /*
1490     * Look for an identifier.
1491     */
1492    if (c1kind == Ident) {
1493        identStart = userbuf.addressOfNextRawChar() - 1;
1494        hadUnicodeEscape = false;
1495
1496      identifier:
1497        for (;;) {
1498            c = getCharIgnoreEOL();
1499            if (c == EOF)
1500                break;
1501            if (!IsIdentifierPart(c)) {
1502                if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1503                    break;
1504                hadUnicodeEscape = true;
1505            }
1506        }
1507        ungetCharIgnoreEOL(c);
1508
1509        /* Convert the escapes by putting into tokenbuf. */
1510        if (hadUnicodeEscape && !putIdentInTokenbuf(identStart))
1511            goto error;
1512
1513        /* Check for keywords unless parser asks us to ignore keywords. */
1514        if (!(flags & TSF_KEYWORD_IS_NAME)) {
1515            const jschar *chars;
1516            size_t length;
1517            if (hadUnicodeEscape) {
1518                chars = tokenbuf.begin();
1519                length = tokenbuf.length();
1520            } else {
1521                chars = identStart;
1522                length = userbuf.addressOfNextRawChar() - identStart;
1523            }
1524            tt = TOK_NAME;
1525            if (!checkForKeyword(chars, length, &tt, &tp->t_op))
1526                goto error;
1527            if (tt != TOK_NAME)
1528                goto out;
1529        }
1530
1531        /*
1532         * Identifiers containing no Unicode escapes can be atomized directly
1533         * from userbuf.  The rest must use the escapes converted via
1534         * tokenbuf before atomizing.
1535         */
1536        JSAtom *atom;
1537        if (!hadUnicodeEscape)
1538            atom = js_AtomizeChars(cx, identStart, userbuf.addressOfNextRawChar() - identStart);
1539        else
1540            atom = atomize(cx, tokenbuf);
1541        if (!atom)
1542            goto error;
1543        tp->setName(JSOP_NAME, atom->asPropertyName());
1544        tt = TOK_NAME;
1545        goto out;
1546    }
1547
1548    if (c1kind == Dot) {
1549        c = getCharIgnoreEOL();
1550        if (JS7_ISDEC(c)) {
1551            numStart = userbuf.addressOfNextRawChar() - 2;
1552            goto decimal_dot;
1553        }
1554#if JS_HAS_XML_SUPPORT
1555        if (c == '.') {
1556            tt = TOK_DBLDOT;
1557            goto out;
1558        }
1559#endif
1560        ungetCharIgnoreEOL(c);
1561        tt = TOK_DOT;
1562        goto out;
1563    }
1564
1565    if (c1kind == Equals) {
1566        if (matchChar('=')) {
1567            if (matchChar('=')) {
1568                tp->t_op = JSOP_STRICTEQ;
1569                tt = TOK_STRICTEQ;
1570            } else {
1571                tp->t_op = JSOP_EQ;
1572                tt = TOK_EQ;
1573            }
1574        } else {
1575            tp->t_op = JSOP_NOP;
1576            tt = TOK_ASSIGN;
1577        }
1578        goto out;
1579    }
1580
1581    /*
1582     * Look for a string.
1583     */
1584    if (c1kind == String) {
1585        qc = c;
1586        tokenbuf.clear();
1587        while (true) {
1588            /*
1589             * We need to detect any of these chars:  " or ', \n (or its
1590             * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
1591             * similar to maybeEOL[], see above.  Because we detect EOL
1592             * sequences here and put them back immediately, we can use
1593             * getCharIgnoreEOL().
1594             */
1595            c = getCharIgnoreEOL();
1596            if (maybeStrSpecial[c & 0xff]) {
1597                if (c == qc)
1598                    break;
1599                if (c == '\\') {
1600                    switch (c = getChar()) {
1601                      case 'b': c = '\b'; break;
1602                      case 'f': c = '\f'; break;
1603                      case 'n': c = '\n'; break;
1604                      case 'r': c = '\r'; break;
1605                      case 't': c = '\t'; break;
1606                      case 'v': c = '\v'; break;
1607
1608                      default:
1609                        if ('0' <= c && c < '8') {
1610                            int32_t val = JS7_UNDEC(c);
1611
1612                            c = peekChar();
1613                            /* Strict mode code allows only \0, then a non-digit. */
1614                            if (val != 0 || JS7_ISDEC(c)) {
1615                                if (!ReportStrictModeError(cx, this, NULL, NULL,
1616                                                           JSMSG_DEPRECATED_OCTAL)) {
1617                                    goto error;
1618                                }
1619                                setOctalCharacterEscape();
1620                            }
1621                            if ('0' <= c && c < '8') {
1622                                val = 8 * val + JS7_UNDEC(c);
1623                                getChar();
1624                                c = peekChar();
1625                                if ('0' <= c && c < '8') {
1626                                    int32_t save = val;
1627                                    val = 8 * val + JS7_UNDEC(c);
1628                                    if (val <= 0377)
1629                                        getChar();
1630                                    else
1631                                        val = save;
1632                                }
1633                            }
1634
1635                            c = (jschar)val;
1636                        } else if (c == 'u') {
1637                            jschar cp[4];
1638                            if (peekChars(4, cp) &&
1639                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1640                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1641                                c = (((((JS7_UNHEX(cp[0]) << 4)
1642                                        + JS7_UNHEX(cp[1])) << 4)
1643                                      + JS7_UNHEX(cp[2])) << 4)
1644                                    + JS7_UNHEX(cp[3]);
1645                                skipChars(4);
1646                            } else {
1647                                ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1648                                                         JSMSG_MALFORMED_ESCAPE, "Unicode");
1649                                goto error;
1650                            }
1651                        } else if (c == 'x') {
1652                            jschar cp[2];
1653                            if (peekChars(2, cp) &&
1654                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1655                                c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1656                                skipChars(2);
1657                            } else {
1658                                ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1659                                                         JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1660                                goto error;
1661                            }
1662                        } else if (c == '\n') {
1663                            /*
1664                             * ES5 7.8.4: an escaped line terminator represents
1665                             * no character.
1666                             */
1667                            continue;
1668                        }
1669                        break;
1670                    }
1671                } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
1672                    ungetCharIgnoreEOL(c);
1673                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1674                                             JSMSG_UNTERMINATED_STRING);
1675                    goto error;
1676                }
1677            }
1678            if (!tokenbuf.append(c))
1679                goto error;
1680        }
1681        JSAtom *atom = atomize(cx, tokenbuf);
1682        if (!atom)
1683            goto error;
1684        tp->pos.end.lineno = lineno;
1685        tp->setAtom(JSOP_STRING, atom);
1686        tt = TOK_STRING;
1687        goto out;
1688    }
1689
1690    /*
1691     * Look for a decimal number.
1692     */
1693    if (c1kind == Dec) {
1694        numStart = userbuf.addressOfNextRawChar() - 1;
1695
1696      decimal:
1697        hasFracOrExp = false;
1698        while (JS7_ISDEC(c))
1699            c = getCharIgnoreEOL();
1700
1701        if (c == '.') {
1702          decimal_dot:
1703            hasFracOrExp = true;
1704            do {
1705                c = getCharIgnoreEOL();
1706            } while (JS7_ISDEC(c));
1707        }
1708        if (c == 'e' || c == 'E') {
1709            hasFracOrExp = true;
1710            c = getCharIgnoreEOL();
1711            if (c == '+' || c == '-')
1712                c = getCharIgnoreEOL();
1713            if (!JS7_ISDEC(c)) {
1714                ungetCharIgnoreEOL(c);
1715                ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1716                                         JSMSG_MISSING_EXPONENT);
1717                goto error;
1718            }
1719            do {
1720                c = getCharIgnoreEOL();
1721            } while (JS7_ISDEC(c));
1722        }
1723        ungetCharIgnoreEOL(c);
1724
1725        if (c != EOF && IsIdentifierStart(c)) {
1726            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1727            goto error;
1728        }
1729
1730        /*
1731         * Unlike identifiers and strings, numbers cannot contain escaped
1732         * chars, so we don't need to use tokenbuf.  Instead we can just
1733         * convert the jschars in userbuf directly to the numeric value.
1734         */
1735        jsdouble dval;
1736        const jschar *dummy;
1737        if (!hasFracOrExp) {
1738            if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), 10, &dummy, &dval))
1739                goto error;
1740        } else {
1741            if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1742                goto error;
1743        }
1744        tp->setNumber(dval);
1745        tt = TOK_NUMBER;
1746        goto out;
1747    }
1748
1749    if (c1kind == Colon) {
1750#if JS_HAS_XML_SUPPORT
1751        if (matchChar(':')) {
1752            tt = TOK_DBLCOLON;
1753            goto out;
1754        }
1755#endif
1756        tp->t_op = JSOP_NOP;
1757        tt = TOK_COLON;
1758        goto out;
1759    }
1760
1761    if (c1kind == Plus) {
1762        if (matchChar('=')) {
1763            tp->t_op = JSOP_ADD;
1764            tt = TOK_ADDASSIGN;
1765        } else if (matchChar('+')) {
1766            tt = TOK_INC;
1767        } else {
1768            tp->t_op = JSOP_POS;
1769            tt = TOK_PLUS;
1770        }
1771        goto out;
1772    }
1773
1774    /*
1775     * Look for a hexadecimal or octal number.
1776     */
1777    if (c1kind == HexOct) {
1778        int radix;
1779        c = getCharIgnoreEOL();
1780        if (c == 'x' || c == 'X') {
1781            radix = 16;
1782            c = getCharIgnoreEOL();
1783            if (!JS7_ISHEX(c)) {
1784                ungetCharIgnoreEOL(c);
1785                ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_MISSING_HEXDIGITS);
1786                goto error;
1787            }
1788            numStart = userbuf.addressOfNextRawChar() - 1;  /* one past the '0x' */
1789            while (JS7_ISHEX(c))
1790                c = getCharIgnoreEOL();
1791        } else if (JS7_ISDEC(c)) {
1792            radix = 8;
1793            numStart = userbuf.addressOfNextRawChar() - 1;  /* one past the '0' */
1794            while (JS7_ISDEC(c)) {
1795                /* Octal integer literals are not permitted in strict mode code. */
1796                if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1797                    goto error;
1798
1799                /*
1800                 * Outside strict mode, we permit 08 and 09 as decimal numbers,
1801                 * which makes our behaviour a superset of the ECMA numeric
1802                 * grammar. We might not always be so permissive, so we warn
1803                 * about it.
1804                 */
1805                if (c >= '8') {
1806                    if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1807                                                  JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1808                        goto error;
1809                    }
1810                    goto decimal;   /* use the decimal scanner for the rest of the number */
1811                }
1812                c = getCharIgnoreEOL();
1813            }
1814        } else {
1815            /* '0' not followed by 'x', 'X' or a digit;  scan as a decimal number. */
1816            numStart = userbuf.addressOfNextRawChar() - 1;
1817            goto decimal;
1818        }
1819        ungetCharIgnoreEOL(c);
1820
1821        if (c != EOF && IsIdentifierStart(c)) {
1822            ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1823            goto error;
1824        }
1825
1826        jsdouble dval;
1827        const jschar *dummy;
1828        if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1829            goto error;
1830        tp->setNumber(dval);
1831        tt = TOK_NUMBER;
1832        goto out;
1833    }
1834
1835    /*
1836     * This handles everything else.
1837     */
1838    JS_ASSERT(c1kind == Other);
1839    switch (c) {
1840      case '\\':
1841        hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
1842        if (hadUnicodeEscape) {
1843            identStart = userbuf.addressOfNextRawChar() - 6;
1844            goto identifier;
1845        }
1846        goto badchar;
1847
1848      case '|':
1849        if (matchChar(c)) {
1850            tt = TOK_OR;
1851        } else if (matchChar('=')) {
1852            tp->t_op = JSOP_BITOR;
1853            tt = TOK_BITORASSIGN;
1854        } else {
1855            tt = TOK_BITOR;
1856        }
1857        break;
1858
1859      case '^':
1860        if (matchChar('=')) {
1861            tp->t_op = JSOP_BITXOR;
1862            tt = TOK_BITXORASSIGN;
1863        } else {
1864            tt = TOK_BITXOR;
1865        }
1866        break;
1867
1868      case '&':
1869        if (matchChar('&')) {
1870            tt = TOK_AND;
1871        } else if (matchChar('=')) {
1872            tp->t_op = JSOP_BITAND;
1873            tt = TOK_BITANDASSIGN;
1874        } else {
1875            tt = TOK_BITAND;
1876        }
1877        break;
1878
1879      case '!':
1880        if (matchChar('=')) {
1881            if (matchChar('=')) {
1882                tp->t_op = JSOP_STRICTNE;
1883                tt = TOK_STRICTNE;
1884            } else {
1885                tp->t_op = JSOP_NE;
1886                tt = TOK_NE;
1887            }
1888        } else {
1889            tp->t_op = JSOP_NOT;
1890            tt = TOK_NOT;
1891        }
1892        break;
1893
1894#if JS_HAS_XML_SUPPORT
1895      case '@':
1896        tt = TOK_AT;
1897        break;
1898#endif
1899
1900      case '<':
1901#if JS_HAS_XML_SUPPORT
1902        if ((flags & TSF_OPERAND) && !isStrictMode() && (hasXML() || peekChar() != '!')) {
1903            if (!getXMLMarkup(&tt, &tp))
1904                goto error;
1905            goto out;
1906        }
1907#endif
1908
1909        /* NB: treat HTML begin-comment as comment-till-end-of-line */
1910        if (matchChar('!')) {
1911            if (matchChar('-')) {
1912                if (matchChar('-')) {
1913                    flags |= TSF_IN_HTML_COMMENT;
1914                    goto skipline;
1915                }
1916                ungetChar('-');
1917            }
1918            ungetChar('!');
1919        }
1920        if (matchChar('<')) {
1921            tp->t_op = JSOP_LSH;
1922            tt = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1923        } else {
1924            if (matchChar('=')) {
1925                tp->t_op = JSOP_LE;
1926                tt = TOK_LE;
1927            } else {
1928                tp->t_op = JSOP_LT;
1929                tt = TOK_LT;
1930            }
1931        }
1932        break;
1933
1934      case '>':
1935        if (matchChar('>')) {
1936            if (matchChar('>')) {
1937                tp->t_op = JSOP_URSH;
1938                tt = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1939            } else {
1940                tp->t_op = JSOP_RSH;
1941                tt = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1942            }
1943        } else {
1944            if (matchChar('=')) {
1945                tp->t_op = JSOP_GE;
1946                tt = TOK_GE;
1947            } else {
1948                tp->t_op = JSOP_GT;
1949                tt = TOK_GT;
1950            }
1951        }
1952        break;
1953
1954      case '*':
1955        tp->t_op = JSOP_MUL;
1956        tt = matchChar('=') ? TOK_MULASSIGN : TOK_STAR;
1957        break;
1958
1959      case '/':
1960        /*
1961         * Look for a single-line comment.
1962         */
1963        if (matchChar('/')) {
1964            if (cx->hasAtLineOption() && !getAtLine())
1965                goto error;
1966
1967            if (!getAtSourceMappingURL())
1968                goto error;
1969
1970  skipline:
1971            /* Optimize line skipping if we are not in an HTML comment. */
1972            if (flags & TSF_IN_HTML_COMMENT) {
1973                while ((c = getChar()) != EOF && c != '\n') {
1974                    if (c == '-' && matchChar('-') && matchChar('>'))
1975                        flags &= ~TSF_IN_HTML_COMMENT;
1976                }
1977            } else {
1978                while ((c = getChar()) != EOF && c != '\n')
1979                    continue;
1980            }
1981            ungetChar(c);
1982            cursor = (cursor - 1) & ntokensMask;
1983            goto retry;
1984        }
1985
1986        /*
1987         * Look for a multi-line comment.
1988         */
1989        if (matchChar('*')) {
1990            uintN linenoBefore = lineno;
1991            while ((c = getChar()) != EOF &&
1992                   !(c == '*' && matchChar('/'))) {
1993                /* Ignore all characters until comment close. */
1994            }
1995            if (c == EOF) {
1996                ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1997                                         JSMSG_UNTERMINATED_COMMENT);
1998                goto error;
1999            }
2000            if (linenoBefore != lineno)
2001                updateFlagsForEOL();
2002            cursor = (cursor - 1) & ntokensMask;
2003            goto retry;
2004        }
2005
2006        /*
2007         * Look for a regexp.
2008         */
2009        if (flags & TSF_OPERAND) {
2010            tokenbuf.clear();
2011
2012            bool inCharClass = false;
2013            for (;;) {
2014                c = getChar();
2015                if (c == '\\') {
2016                    if (!tokenbuf.append(c))
2017                        goto error;
2018                    c = getChar();
2019                } else if (c == '[') {
2020                    inCharClass = true;
2021                } else if (c == ']') {
2022                    inCharClass = false;
2023                } else if (c == '/' && !inCharClass) {
2024                    /* For compat with IE, allow unescaped / in char classes. */
2025                    break;
2026                }
2027                if (c == '\n' || c == EOF) {
2028                    ungetChar(c);
2029                    ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
2030                                             JSMSG_UNTERMINATED_REGEXP);
2031                    goto error;
2032                }
2033                if (!tokenbuf.append(c))
2034                    goto error;
2035            }
2036
2037            RegExpFlag reflags = NoFlags;
2038            uintN length = tokenbuf.length() + 1;
2039            while (true) {
2040                c = peekChar();
2041                if (c == 'g' && !(reflags & GlobalFlag))
2042                    reflags = RegExpFlag(reflags | GlobalFlag);
2043                else if (c == 'i' && !(reflags & IgnoreCaseFlag))
2044                    reflags = RegExpFlag(reflags | IgnoreCaseFlag);
2045                else if (c == 'm' && !(reflags & MultilineFlag))
2046                    reflags = RegExpFlag(reflags | MultilineFlag);
2047                else if (c == 'y' && !(reflags & StickyFlag))
2048                    reflags = RegExpFlag(reflags | StickyFlag);
2049                else
2050                    break;
2051                getChar();
2052                length++;
2053            }
2054
2055            c = peekChar();
2056            if (JS7_ISLET(c)) {
2057                char buf[2] = { '\0', '\0' };
2058                tp->pos.begin.index += length + 1;
2059                buf[0] = char(c);
2060                ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
2061                                         buf);
2062                (void) getChar();
2063                goto error;
2064            }
2065            tp->setRegExpFlags(reflags);
2066            tt = TOK_REGEXP;
2067            break;
2068        }
2069
2070        tp->t_op = JSOP_DIV;
2071        tt = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
2072        break;
2073
2074      case '%':
2075        tp->t_op = JSOP_MOD;
2076        tt = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
2077        break;
2078
2079      case '~':
2080        tp->t_op = JSOP_BITNOT;
2081        tt = TOK_BITNOT;
2082        break;
2083
2084      case '-':
2085        if (matchChar('=')) {
2086            tp->t_op = JSOP_SUB;
2087            tt = TOK_SUBASSIGN;
2088        } else if (matchChar(c)) {
2089            if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
2090                flags &= ~TSF_IN_HTML_COMMENT;
2091                goto skipline;
2092            }
2093            tt = TOK_DEC;
2094        } else {
2095            tp->t_op = JSOP_NEG;
2096            tt = TOK_MINUS;
2097        }
2098        break;
2099
2100      badchar:
2101      default:
2102        ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
2103        goto error;
2104    }
2105
2106  out:
2107    flags |= TSF_DIRTYLINE;
2108    tp->pos.end.index = userbuf.addressOfNextRawChar() - linebase;
2109    tp->type = tt;
2110    JS_ASSERT(IsTokenSane(tp));
2111    return tt;
2112
2113  error:
2114    /*
2115     * For erroneous multi-line tokens we won't have changed end.lineno (it'll
2116     * still be equal to begin.lineno) so we revert end.index to be equal to
2117     * begin.index + 1 (as if it's a 1-char token) to avoid having inconsistent
2118     * begin/end positions.  end.index isn't used in error messages anyway.
2119     */
2120    flags |= TSF_DIRTYLINE;
2121    tp->pos.end.index = tp->pos.begin.index + 1;
2122    tp->type = TOK_ERROR;
2123    JS_ASSERT(IsTokenSane(tp));
2124#ifdef DEBUG
2125    /*
2126     * Poisoning userbuf on error establishes an invariant: once an erroneous
2127     * token has been seen, userbuf will not be consulted again.  This is true
2128     * because the parser will either (a) deal with the TOK_ERROR token by
2129     * aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
2130     * match what it expected, it will unget the token, and the next getToken()
2131     * call will immediately return the just-gotten TOK_ERROR token again
2132     * without consulting userbuf, thanks to the lookahead buffer.
2133     */
2134    userbuf.poison();
2135#endif
2136    return TOK_ERROR;
2137}
2138
2139JS_FRIEND_API(int)
2140js_fgets(char *buf, int size, FILE *file)
2141{
2142    int n, i, c;
2143    JSBool crflag;
2144
2145    n = size - 1;
2146    if (n < 0)
2147        return -1;
2148
2149    crflag = JS_FALSE;
2150    for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
2151        buf[i] = c;
2152        if (c == '\n') {        /* any \n ends a line */
2153            i++;                /* keep the \n; we know there is room for \0 */
2154            break;
2155        }
2156        if (crflag) {           /* \r not followed by \n ends line at the \r */
2157            ungetc(c, file);
2158            break;              /* and overwrite c in buf with \0 */
2159        }
2160        crflag = (c == '\r');
2161    }
2162
2163    buf[i] = '\0';
2164    return i;
2165}
2166
2167#ifdef DEBUG
2168const char *
2169TokenKindToString(TokenKind tt)
2170{
2171    switch (tt) {
2172      case TOK_ERROR:           return "TOK_ERROR";
2173      case TOK_EOF:             return "TOK_EOF";
2174      case TOK_EOL:             return "TOK_EOL";
2175      case TOK_SEMI:            return "TOK_SEMI";
2176      case TOK_COMMA:           return "TOK_COMMA";
2177      case TOK_HOOK:            return "TOK_HOOK";
2178      case TOK_COLON:           return "TOK_COLON";
2179      case TOK_OR:              return "TOK_OR";
2180      case TOK_AND:             return "TOK_AND";
2181      case TOK_BITOR:           return "TOK_BITOR";
2182      case TOK_BITXOR:          return "TOK_BITXOR";
2183      case TOK_BITAND:          return "TOK_BITAND";
2184      case TOK_PLUS:            return "TOK_PLUS";
2185      case TOK_MINUS:           return "TOK_MINUS";
2186      case TOK_STAR:            return "TOK_STAR";
2187      case TOK_DIV:             return "TOK_DIV";
2188      case TOK_MOD:             return "TOK_MOD";
2189      case TOK_INC:             return "TOK_INC";
2190      case TOK_DEC:             return "TOK_DEC";
2191      case TOK_DOT:             return "TOK_DOT";
2192      case TOK_LB:              return "TOK_LB";
2193      case TOK_RB:              return "TOK_RB";
2194      case TOK_LC:              return "TOK_LC";
2195      case TOK_RC:              return "TOK_RC";
2196      case TOK_LP:              return "TOK_LP";
2197      case TOK_RP:              return "TOK_RP";
2198      case TOK_NAME:            return "TOK_NAME";
2199      case TOK_NUMBER:          return "TOK_NUMBER";
2200      case TOK_STRING:          return "TOK_STRING";
2201      case TOK_REGEXP:          return "TOK_REGEXP";
2202      case TOK_TRUE:            return "TOK_TRUE";
2203      case TOK_FALSE:           return "TOK_FALSE";
2204      case TOK_NULL:            return "TOK_NULL";
2205      case TOK_THIS:            return "TOK_THIS";
2206      case TOK_FUNCTION:        return "TOK_FUNCTION";
2207      case TOK_IF:              return "TOK_IF";
2208      case TOK_ELSE:            return "TOK_ELSE";
2209      case TOK_SWITCH:          return "TOK_SWITCH";
2210      case TOK_CASE:            return "TOK_CASE";
2211      case TOK_DEFAULT:         return "TOK_DEFAULT";
2212      case TOK_WHILE:           return "TOK_WHILE";
2213      case TOK_DO:              return "TOK_DO";
2214      case TOK_FOR:             return "TOK_FOR";
2215      case TOK_BREAK:           return "TOK_BREAK";
2216      case TOK_CONTINUE:        return "TOK_CONTINUE";
2217      case TOK_IN:              return "TOK_IN";
2218      case TOK_VAR:             return "TOK_VAR";
2219      case TOK_CONST:           return "TOK_CONST";
2220      case TOK_WITH:            return "TOK_WITH";
2221      case TOK_RETURN:          return "TOK_RETURN";
2222      case TOK_NEW:             return "TOK_NEW";
2223      case TOK_DELETE:          return "TOK_DELETE";
2224      case TOK_TRY:             return "TOK_TRY";
2225      case TOK_CATCH:           return "TOK_CATCH";
2226      case TOK_FINALLY:         return "TOK_FINALLY";
2227      case TOK_THROW:           return "TOK_THROW";
2228      case TOK_INSTANCEOF:      return "TOK_INSTANCEOF";
2229      case TOK_DEBUGGER:        return "TOK_DEBUGGER";
2230      case TOK_XMLSTAGO:        return "TOK_XMLSTAGO";
2231      case TOK_XMLETAGO:        return "TOK_XMLETAGO";
2232      case TOK_XMLPTAGC:        return "TOK_XMLPTAGC";
2233      case TOK_XMLTAGC:         return "TOK_XMLTAGC";
2234      case TOK_XMLNAME:         return "TOK_XMLNAME";
2235      case TOK_XMLATTR:         return "TOK_XMLATTR";
2236      case TOK_XMLSPACE:        return "TOK_XMLSPACE";
2237      case TOK_XMLTEXT:         return "TOK_XMLTEXT";
2238      case TOK_XMLCOMMENT:      return "TOK_XMLCOMMENT";
2239      case TOK_XMLCDATA:        return "TOK_XMLCDATA";
2240      case TOK_XMLPI:           return "TOK_XMLPI";
2241      case TOK_AT:              return "TOK_AT";
2242      case TOK_DBLCOLON:        return "TOK_DBLCOLON";
2243      case TOK_DBLDOT:          return "TOK_DBLDOT";
2244      case TOK_FILTER:          return "TOK_FILTER";
2245      case TOK_XMLELEM:         return "TOK_XMLELEM";
2246      case TOK_XMLLIST:         return "TOK_XMLLIST";
2247      case TOK_YIELD:           return "TOK_YIELD";
2248      case TOK_LEXICALSCOPE:    return "TOK_LEXICALSCOPE";
2249      case TOK_LET:             return "TOK_LET";
2250      case TOK_RESERVED:        return "TOK_RESERVED";
2251      case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
2252      case TOK_STRICTEQ:        return "TOK_STRICTEQ";
2253      case TOK_EQ:              return "TOK_EQ";
2254      case TOK_STRICTNE:        return "TOK_STRICTNE";
2255      case TOK_NE:              return "TOK_NE";
2256      case TOK_TYPEOF:          return "TOK_TYPEOF";
2257      case TOK_VOID:            return "TOK_VOID";
2258      case TOK_NOT:             return "TOK_NOT";
2259      case TOK_BITNOT:          return "TOK_BITNOT";
2260      case TOK_LT:              return "TOK_LT";
2261      case TOK_LE:              return "TOK_LE";
2262      case TOK_GT:              return "TOK_GT";
2263      case TOK_GE:              return "TOK_GE";
2264      case TOK_LSH:             return "TOK_LSH";
2265      case TOK_RSH:             return "TOK_RSH";
2266      case TOK_URSH:            return "TOK_URSH";
2267      case TOK_ASSIGN:          return "TOK_ASSIGN";
2268      case TOK_ADDASSIGN:       return "TOK_ADDASSIGN";
2269      case TOK_SUBASSIGN:       return "TOK_SUBASSIGN";
2270      case TOK_BITORASSIGN:     return "TOK_BITORASSIGN";
2271      case TOK_BITXORASSIGN:    return "TOK_BITXORASSIGN";
2272      case TOK_BITANDASSIGN:    return "TOK_BITANDASSIGN";
2273      case TOK_LSHASSIGN:       return "TOK_LSHASSIGN";
2274      case TOK_RSHASSIGN:       return "TOK_RSHASSIGN";
2275      case TOK_URSHASSIGN:      return "TOK_URSHASSIGN";
2276      case TOK_MULASSIGN:       return "TOK_MULASSIGN";
2277      case TOK_DIVASSIGN:       return "TOK_DIVASSIGN";
2278      case TOK_MODASSIGN:       return "TOK_MODASSIGN";
2279      case TOK_LIMIT:           break;
2280    }
2281
2282    return "<bad TokenKind>";
2283}
2284#endif