PageRenderTime 399ms CodeModel.GetById 60ms app.highlight 308ms RepoModel.GetById 1ms app.codeStats 1ms

/js/src/jsstr.cpp

http://github.com/zpao/v8monkey
C++ | 4356 lines | 3507 code | 476 blank | 373 comment | 684 complexity | fbf53656a39e95829f89c223c92027e4 MD5 | raw file
   1/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2 * vim: set ts=8 sw=4 et tw=99:
   3 *
   4 * ***** BEGIN LICENSE BLOCK *****
   5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version
   8 * 1.1 (the "License"); you may not use this file except in compliance with
   9 * the License. You may obtain a copy of the License at
  10 * http://www.mozilla.org/MPL/
  11 *
  12 * Software distributed under the License is distributed on an "AS IS" basis,
  13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14 * for the specific language governing rights and limitations under the
  15 * License.
  16 *
  17 * The Original Code is Mozilla Communicator client code, released
  18 * March 31, 1998.
  19 *
  20 * The Initial Developer of the Original Code is
  21 * Netscape Communications Corporation.
  22 * Portions created by the Initial Developer are Copyright (C) 1998
  23 * the Initial Developer. All Rights Reserved.
  24 *
  25 * Contributor(s):
  26 *
  27 * Alternatively, the contents of this file may be used under the terms of
  28 * either of the GNU General Public License Version 2 or later (the "GPL"),
  29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30 * in which case the provisions of the GPL or the LGPL are applicable instead
  31 * of those above. If you wish to allow use of your version of this file only
  32 * under the terms of either the GPL or the LGPL, and not to allow others to
  33 * use your version of this file under the terms of the MPL, indicate your
  34 * decision by deleting the provisions above and replace them with the notice
  35 * and other provisions required by the GPL or the LGPL. If you do not delete
  36 * the provisions above, a recipient may use your version of this file under
  37 * the terms of any one of the MPL, the GPL or the LGPL.
  38 *
  39 * ***** END LICENSE BLOCK ***** */
  40
  41/*
  42 * JS string type implementation.
  43 *
  44 * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
  45 * native methods store strings (possibly newborn) converted from their 'this'
  46 * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
  47 * conversions at their index (argv[0], argv[1]).  This is a legitimate method
  48 * of rooting things that might lose their newborn root due to subsequent GC
  49 * allocations in the same native method.
  50 */
  51
  52#include "mozilla/Attributes.h"
  53
  54#include <stdlib.h>
  55#include <string.h>
  56#include "jstypes.h"
  57#include "jsutil.h"
  58#include "jshash.h"
  59#include "jsprf.h"
  60#include "jsapi.h"
  61#include "jsarray.h"
  62#include "jsatom.h"
  63#include "jsbool.h"
  64#include "jscntxt.h"
  65#include "jsgc.h"
  66#include "jsinterp.h"
  67#include "jslock.h"
  68#include "jsnum.h"
  69#include "jsobj.h"
  70#include "jsopcode.h"
  71#include "jsprobes.h"
  72#include "jsscope.h"
  73#include "jsstr.h"
  74#include "jsversion.h"
  75
  76#include "builtin/RegExp.h"
  77#include "vm/GlobalObject.h"
  78#include "vm/RegExpObject.h"
  79
  80#include "jsinferinlines.h"
  81#include "jsobjinlines.h"
  82#include "jsautooplen.h"        // generated headers last
  83
  84#include "vm/RegExpObject-inl.h"
  85#include "vm/RegExpStatics-inl.h"
  86#include "vm/StringObject-inl.h"
  87#include "vm/String-inl.h"
  88
  89using namespace js;
  90using namespace js::gc;
  91using namespace js::types;
  92using namespace js::unicode;
  93
  94static JSLinearString *
  95ArgToRootedString(JSContext *cx, CallArgs &args, uintN argno)
  96{
  97    if (argno >= args.length())
  98        return cx->runtime->atomState.typeAtoms[JSTYPE_VOID];
  99
 100    Value &arg = args[argno];
 101    JSString *str = ToString(cx, arg);
 102    if (!str)
 103        return NULL;
 104
 105    arg = StringValue(str);
 106    return str->ensureLinear(cx);
 107}
 108
 109/*
 110 * Forward declarations for URI encode/decode and helper routines
 111 */
 112static JSBool
 113str_decodeURI(JSContext *cx, uintN argc, Value *vp);
 114
 115static JSBool
 116str_decodeURI_Component(JSContext *cx, uintN argc, Value *vp);
 117
 118static JSBool
 119str_encodeURI(JSContext *cx, uintN argc, Value *vp);
 120
 121static JSBool
 122str_encodeURI_Component(JSContext *cx, uintN argc, Value *vp);
 123
 124static const uint32_t INVALID_UTF8 = UINT32_MAX;
 125
 126static uint32_t
 127Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length);
 128
 129/*
 130 * Global string methods
 131 */
 132
 133
 134/* ES5 B.2.1 */
 135static JSBool
 136str_escape(JSContext *cx, uintN argc, Value *vp)
 137{
 138    CallArgs args = CallArgsFromVp(argc, vp);
 139
 140    const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
 141                           '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 142
 143    JSLinearString *str = ArgToRootedString(cx, args, 0);
 144    if (!str)
 145        return false;
 146
 147    size_t length = str->length();
 148    const jschar *chars = str->chars();
 149
 150    static const uint8_t shouldPassThrough[256] = {
 151         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 152         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 153         0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
 154         1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
 155         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
 156         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
 157         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
 158         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,     /*   pqrstuvwxyz{\}~  DEL */
 159    };
 160
 161    /* In step 7, exactly 69 characters should pass through unencoded. */
 162#ifdef DEBUG
 163    size_t count = 0;
 164    for (size_t i = 0; i < sizeof(shouldPassThrough); i++) {
 165        if (shouldPassThrough[i]) {
 166            count++;
 167        }
 168    }
 169    JS_ASSERT(count == 69);
 170#endif
 171
 172
 173    /* Take a first pass and see how big the result string will need to be. */
 174    size_t newlength = length;
 175    for (size_t i = 0; i < length; i++) {
 176        jschar ch = chars[i];
 177        if (ch < 128 && shouldPassThrough[ch])
 178            continue;
 179
 180        /* The character will be encoded as %XX or %uXXXX. */
 181        newlength += (ch < 256) ? 2 : 5;
 182
 183        /*
 184         * This overflow test works because newlength is incremented by at
 185         * most 5 on each iteration.
 186         */
 187        if (newlength < length) {
 188            js_ReportAllocationOverflow(cx);
 189            return false;
 190        }
 191    }
 192
 193    if (newlength >= ~(size_t)0 / sizeof(jschar)) {
 194        js_ReportAllocationOverflow(cx);
 195        return false;
 196    }
 197
 198    jschar *newchars = (jschar *) cx->malloc_((newlength + 1) * sizeof(jschar));
 199    if (!newchars)
 200        return false;
 201    size_t i, ni;
 202    for (i = 0, ni = 0; i < length; i++) {
 203        jschar ch = chars[i];
 204        if (ch < 128 && shouldPassThrough[ch]) {
 205            newchars[ni++] = ch;
 206        } else if (ch < 256) {
 207            newchars[ni++] = '%';
 208            newchars[ni++] = digits[ch >> 4];
 209            newchars[ni++] = digits[ch & 0xF];
 210        } else {
 211            newchars[ni++] = '%';
 212            newchars[ni++] = 'u';
 213            newchars[ni++] = digits[ch >> 12];
 214            newchars[ni++] = digits[(ch & 0xF00) >> 8];
 215            newchars[ni++] = digits[(ch & 0xF0) >> 4];
 216            newchars[ni++] = digits[ch & 0xF];
 217        }
 218    }
 219    JS_ASSERT(ni == newlength);
 220    newchars[newlength] = 0;
 221
 222    JSString *retstr = js_NewString(cx, newchars, newlength);
 223    if (!retstr) {
 224        cx->free_(newchars);
 225        return false;
 226    }
 227
 228    args.rval() = StringValue(retstr);
 229    return true;
 230}
 231
 232static inline bool
 233Unhex4(const jschar *chars, jschar *result)
 234{
 235    jschar a = chars[0],
 236           b = chars[1],
 237           c = chars[2],
 238           d = chars[3];
 239
 240    if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
 241        return false;
 242
 243    *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
 244    return true;
 245}
 246
 247static inline bool
 248Unhex2(const jschar *chars, jschar *result)
 249{
 250    jschar a = chars[0],
 251           b = chars[1];
 252
 253    if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
 254        return false;
 255
 256    *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
 257    return true;
 258}
 259
 260/* ES5 B.2.2 */
 261static JSBool
 262str_unescape(JSContext *cx, uintN argc, Value *vp)
 263{
 264    CallArgs args = CallArgsFromVp(argc, vp);
 265
 266    /* Step 1. */
 267    JSLinearString *str = ArgToRootedString(cx, args, 0);
 268    if (!str)
 269        return false;
 270
 271    /* Step 2. */
 272    size_t length = str->length();
 273    const jschar *chars = str->chars();
 274
 275    /* Step 3. */
 276    StringBuffer sb(cx);
 277
 278    /*
 279     * Note that the spec algorithm has been optimized to avoid building
 280     * a string in the case where no escapes are present.
 281     */
 282
 283    /* Step 4. */
 284    size_t k = 0;
 285    bool building = false;
 286
 287    while (true) {
 288        /* Step 5. */
 289        if (k == length) {
 290            JSLinearString *result;
 291            if (building) {
 292                result = sb.finishString();
 293                if (!result)
 294                    return false;
 295            } else {
 296                result = str;
 297            }
 298
 299            args.rval() = StringValue(result);
 300            return true;
 301        }
 302
 303        /* Step 6. */
 304        jschar c = chars[k];
 305
 306        /* Step 7. */
 307        if (c != '%')
 308            goto step_18;
 309
 310        /* Step 8. */
 311        if (k > length - 6)
 312            goto step_14;
 313
 314        /* Step 9. */
 315        if (chars[k + 1] != 'u')
 316            goto step_14;
 317
 318#define ENSURE_BUILDING                             \
 319    JS_BEGIN_MACRO                                  \
 320        if (!building) {                            \
 321            building = true;                        \
 322            if (!sb.reserve(length))                \
 323                return false;                       \
 324            sb.infallibleAppend(chars, chars + k);  \
 325        }                                           \
 326    JS_END_MACRO
 327
 328        /* Step 10-13. */
 329        if (Unhex4(&chars[k + 2], &c)) {
 330            ENSURE_BUILDING;
 331            k += 5;
 332            goto step_18;
 333        }
 334
 335      step_14:
 336        /* Step 14. */
 337        if (k > length - 3)
 338            goto step_18;
 339
 340        /* Step 15-17. */
 341        if (Unhex2(&chars[k + 1], &c)) {
 342            ENSURE_BUILDING;
 343            k += 2;
 344        }
 345
 346      step_18:
 347        if (building)
 348            sb.infallibleAppend(c);
 349
 350        /* Step 19. */
 351        k += 1;
 352    }
 353#undef ENSURE_BUILDING
 354}
 355
 356#if JS_HAS_UNEVAL
 357static JSBool
 358str_uneval(JSContext *cx, uintN argc, Value *vp)
 359{
 360    CallArgs args = CallArgsFromVp(argc, vp);
 361    JSString *str = js_ValueToSource(cx, args.length() != 0 ? args[0] : UndefinedValue());
 362    if (!str)
 363        return false;
 364
 365    args.rval() = StringValue(str);
 366    return true;
 367}
 368#endif
 369
 370const char js_escape_str[] = "escape";
 371const char js_unescape_str[] = "unescape";
 372#if JS_HAS_UNEVAL
 373const char js_uneval_str[] = "uneval";
 374#endif
 375const char js_decodeURI_str[] = "decodeURI";
 376const char js_encodeURI_str[] = "encodeURI";
 377const char js_decodeURIComponent_str[] = "decodeURIComponent";
 378const char js_encodeURIComponent_str[] = "encodeURIComponent";
 379
 380static JSFunctionSpec string_functions[] = {
 381    JS_FN(js_escape_str,             str_escape,                1,0),
 382    JS_FN(js_unescape_str,           str_unescape,              1,0),
 383#if JS_HAS_UNEVAL
 384    JS_FN(js_uneval_str,             str_uneval,                1,0),
 385#endif
 386    JS_FN(js_decodeURI_str,          str_decodeURI,             1,0),
 387    JS_FN(js_encodeURI_str,          str_encodeURI,             1,0),
 388    JS_FN(js_decodeURIComponent_str, str_decodeURI_Component,   1,0),
 389    JS_FN(js_encodeURIComponent_str, str_encodeURI_Component,   1,0),
 390
 391    JS_FS_END
 392};
 393
 394jschar      js_empty_ucstr[]  = {0};
 395JSSubString js_EmptySubString = {0, js_empty_ucstr};
 396
 397static const uintN STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
 398
 399static JSBool
 400str_enumerate(JSContext *cx, JSObject *obj)
 401{
 402    JSString *str = obj->getPrimitiveThis().toString();
 403    for (size_t i = 0, length = str->length(); i < length; i++) {
 404        JSString *str1 = js_NewDependentString(cx, str, i, 1);
 405        if (!str1)
 406            return false;
 407        if (!obj->defineElement(cx, i, StringValue(str1),
 408                                JS_PropertyStub, JS_StrictPropertyStub,
 409                                STRING_ELEMENT_ATTRS)) {
 410            return false;
 411        }
 412    }
 413
 414    return true;
 415}
 416
 417static JSBool
 418str_resolve(JSContext *cx, JSObject *obj, jsid id, uintN flags,
 419            JSObject **objp)
 420{
 421    if (!JSID_IS_INT(id))
 422        return JS_TRUE;
 423
 424    JSString *str = obj->getPrimitiveThis().toString();
 425
 426    jsint slot = JSID_TO_INT(id);
 427    if ((size_t)slot < str->length()) {
 428        JSString *str1 = cx->runtime->staticStrings.getUnitStringForElement(cx, str, size_t(slot));
 429        if (!str1)
 430            return JS_FALSE;
 431        if (!obj->defineElement(cx, uint32_t(slot), StringValue(str1), NULL, NULL,
 432                                STRING_ELEMENT_ATTRS)) {
 433            return JS_FALSE;
 434        }
 435        *objp = obj;
 436    }
 437    return JS_TRUE;
 438}
 439
 440Class js::StringClass = {
 441    js_String_str,
 442    JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
 443    JSCLASS_NEW_RESOLVE | JSCLASS_HAS_CACHED_PROTO(JSProto_String),
 444    JS_PropertyStub,         /* addProperty */
 445    JS_PropertyStub,         /* delProperty */
 446    JS_PropertyStub,         /* getProperty */
 447    JS_StrictPropertyStub,   /* setProperty */
 448    str_enumerate,
 449    (JSResolveOp)str_resolve,
 450    JS_ConvertStub
 451};
 452
 453/*
 454 * Returns a JSString * for the |this| value associated with 'call', or throws
 455 * a TypeError if |this| is null or undefined.  This algorithm is the same as
 456 * calling CheckObjectCoercible(this), then returning ToString(this), as all
 457 * String.prototype.* methods do (other than toString and valueOf).
 458 */
 459static JS_ALWAYS_INLINE JSString *
 460ThisToStringForStringProto(JSContext *cx, CallReceiver call)
 461{
 462    JS_CHECK_RECURSION(cx, return NULL);
 463
 464    if (call.thisv().isString())
 465        return call.thisv().toString();
 466
 467    if (call.thisv().isObject()) {
 468        JSObject *obj = &call.thisv().toObject();
 469        if (obj->isString() &&
 470            ClassMethodIsNative(cx, obj,
 471                                &StringClass,
 472                                ATOM_TO_JSID(cx->runtime->atomState.toStringAtom),
 473                                js_str_toString))
 474        {
 475            call.thisv() = obj->getPrimitiveThis();
 476            return call.thisv().toString();
 477        }
 478    } else if (call.thisv().isNullOrUndefined()) {
 479        JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_CONVERT_TO,
 480                             call.thisv().isNull() ? "null" : "undefined", "object");
 481        return NULL;
 482    }
 483
 484    JSString *str = ToStringSlow(cx, call.thisv());
 485    if (!str)
 486        return NULL;
 487
 488    call.thisv().setString(str);
 489    return str;
 490}
 491
 492#if JS_HAS_TOSOURCE
 493
 494/*
 495 * String.prototype.quote is generic (as are most string methods), unlike
 496 * toSource, toString, and valueOf.
 497 */
 498static JSBool
 499str_quote(JSContext *cx, uintN argc, Value *vp)
 500{
 501    CallArgs args = CallArgsFromVp(argc, vp);
 502    JSString *str = ThisToStringForStringProto(cx, args);
 503    if (!str)
 504        return false;
 505    str = js_QuoteString(cx, str, '"');
 506    if (!str)
 507        return false;
 508    args.rval() = StringValue(str);
 509    return true;
 510}
 511
 512static JSBool
 513str_toSource(JSContext *cx, uintN argc, Value *vp)
 514{
 515    CallArgs args = CallArgsFromVp(argc, vp);
 516
 517    JSString *str;
 518    bool ok;
 519    if (!BoxedPrimitiveMethodGuard(cx, args, str_toSource, &str, &ok))
 520        return ok;
 521
 522    str = js_QuoteString(cx, str, '"');
 523    if (!str)
 524        return false;
 525
 526    StringBuffer sb(cx);
 527    if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
 528        return false;
 529
 530    str = sb.finishString();
 531    if (!str)
 532        return false;
 533    args.rval() = StringValue(str);
 534    return true;
 535}
 536
 537#endif /* JS_HAS_TOSOURCE */
 538
 539JSBool
 540js_str_toString(JSContext *cx, uintN argc, Value *vp)
 541{
 542    CallArgs args = CallArgsFromVp(argc, vp);
 543
 544    JSString *str;
 545    bool ok;
 546    if (!BoxedPrimitiveMethodGuard(cx, args, js_str_toString, &str, &ok))
 547        return ok;
 548
 549    args.rval() = StringValue(str);
 550    return true;
 551}
 552
 553/*
 554 * Java-like string native methods.
 555 */
 556
 557JS_ALWAYS_INLINE bool
 558ValueToIntegerRange(JSContext *cx, const Value &v, int32_t *out)
 559{
 560    if (v.isInt32()) {
 561        *out = v.toInt32();
 562    } else {
 563        double d;
 564        if (!ToInteger(cx, v, &d))
 565            return false;
 566        if (d > INT32_MAX)
 567            *out = INT32_MAX;
 568        else if (d < INT32_MIN)
 569            *out = INT32_MIN;
 570        else
 571            *out = int32_t(d);
 572    }
 573
 574    return true;
 575}
 576
 577static JSBool
 578str_substring(JSContext *cx, uintN argc, Value *vp)
 579{
 580    CallArgs args = CallArgsFromVp(argc, vp);
 581
 582    JSString *str = ThisToStringForStringProto(cx, args);
 583    if (!str)
 584        return false;
 585
 586    int32_t length, begin, end;
 587    if (args.length() > 0) {
 588        end = length = int32_t(str->length());
 589
 590        if (!ValueToIntegerRange(cx, args[0], &begin))
 591            return false;
 592
 593        if (begin < 0)
 594            begin = 0;
 595        else if (begin > length)
 596            begin = length;
 597
 598        if (args.length() > 1 && !args[1].isUndefined()) {
 599            if (!ValueToIntegerRange(cx, args[1], &end))
 600                return false;
 601
 602            if (end > length) {
 603                end = length;
 604            } else {
 605                if (end < 0)
 606                    end = 0;
 607                if (end < begin) {
 608                    int32_t tmp = begin;
 609                    begin = end;
 610                    end = tmp;
 611                }
 612            }
 613        }
 614
 615        str = js_NewDependentString(cx, str, size_t(begin), size_t(end - begin));
 616        if (!str)
 617            return false;
 618    }
 619
 620    args.rval() = StringValue(str);
 621    return true;
 622}
 623
 624JSString* JS_FASTCALL
 625js_toLowerCase(JSContext *cx, JSString *str)
 626{
 627    size_t n = str->length();
 628    const jschar *s = str->getChars(cx);
 629    if (!s)
 630        return NULL;
 631
 632    jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
 633    if (!news)
 634        return NULL;
 635    for (size_t i = 0; i < n; i++)
 636        news[i] = unicode::ToLowerCase(s[i]);
 637    news[n] = 0;
 638    str = js_NewString(cx, news, n);
 639    if (!str) {
 640        cx->free_(news);
 641        return NULL;
 642    }
 643    return str;
 644}
 645
 646static inline bool
 647ToLowerCaseHelper(JSContext *cx, CallReceiver call)
 648{
 649    JSString *str = ThisToStringForStringProto(cx, call);
 650    if (!str)
 651        return false;
 652
 653    str = js_toLowerCase(cx, str);
 654    if (!str)
 655        return false;
 656
 657    call.rval() = StringValue(str);
 658    return true;
 659}
 660
 661static JSBool
 662str_toLowerCase(JSContext *cx, uintN argc, Value *vp)
 663{
 664    return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
 665}
 666
 667static JSBool
 668str_toLocaleLowerCase(JSContext *cx, uintN argc, Value *vp)
 669{
 670    CallArgs args = CallArgsFromVp(argc, vp);
 671
 672    /*
 673     * Forcefully ignore the first (or any) argument and return toLowerCase(),
 674     * ECMA has reserved that argument, presumably for defining the locale.
 675     */
 676    if (cx->localeCallbacks && cx->localeCallbacks->localeToLowerCase) {
 677        JSString *str = ThisToStringForStringProto(cx, args);
 678        if (!str)
 679            return false;
 680
 681        Value result;
 682        if (!cx->localeCallbacks->localeToLowerCase(cx, str, &result))
 683            return false;
 684
 685        args.rval() = result;
 686        return true;
 687    }
 688
 689    return ToLowerCaseHelper(cx, args);
 690}
 691
 692JSString* JS_FASTCALL
 693js_toUpperCase(JSContext *cx, JSString *str)
 694{
 695    size_t n = str->length();
 696    const jschar *s = str->getChars(cx);
 697    if (!s)
 698        return NULL;
 699    jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
 700    if (!news)
 701        return NULL;
 702    for (size_t i = 0; i < n; i++)
 703        news[i] = unicode::ToUpperCase(s[i]);
 704    news[n] = 0;
 705    str = js_NewString(cx, news, n);
 706    if (!str) {
 707        cx->free_(news);
 708        return NULL;
 709    }
 710    return str;
 711}
 712
 713static JSBool
 714ToUpperCaseHelper(JSContext *cx, CallReceiver call)
 715{
 716    JSString *str = ThisToStringForStringProto(cx, call);
 717    if (!str)
 718        return false;
 719
 720    str = js_toUpperCase(cx, str);
 721    if (!str)
 722        return false;
 723
 724    call.rval() = StringValue(str);
 725    return true;
 726}
 727
 728static JSBool
 729str_toUpperCase(JSContext *cx, uintN argc, Value *vp)
 730{
 731    return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
 732}
 733
 734static JSBool
 735str_toLocaleUpperCase(JSContext *cx, uintN argc, Value *vp)
 736{
 737    CallArgs args = CallArgsFromVp(argc, vp);
 738
 739    /*
 740     * Forcefully ignore the first (or any) argument and return toUpperCase(),
 741     * ECMA has reserved that argument, presumably for defining the locale.
 742     */
 743    if (cx->localeCallbacks && cx->localeCallbacks->localeToUpperCase) {
 744        JSString *str = ThisToStringForStringProto(cx, args);
 745        if (!str)
 746            return false;
 747
 748        Value result;
 749        if (!cx->localeCallbacks->localeToUpperCase(cx, str, &result))
 750            return false;
 751
 752        args.rval() = result;
 753        return true;
 754    }
 755
 756    return ToUpperCaseHelper(cx, args);
 757}
 758
 759static JSBool
 760str_localeCompare(JSContext *cx, uintN argc, Value *vp)
 761{
 762    CallArgs args = CallArgsFromVp(argc, vp);
 763    JSString *str = ThisToStringForStringProto(cx, args);
 764    if (!str)
 765        return false;
 766
 767    if (args.length() == 0) {
 768        args.rval() = Int32Value(0);
 769    } else {
 770        JSString *thatStr = ToString(cx, args[0]);
 771        if (!thatStr)
 772            return false;
 773
 774        if (cx->localeCallbacks && cx->localeCallbacks->localeCompare) {
 775            args[0].setString(thatStr);
 776
 777            Value result;
 778            if (!cx->localeCallbacks->localeCompare(cx, str, thatStr, &result))
 779                return true;
 780
 781            args.rval() = result;
 782            return true;
 783        }
 784
 785        int32_t result;
 786        if (!CompareStrings(cx, str, thatStr, &result))
 787            return false;
 788
 789        args.rval() = Int32Value(result);
 790    }
 791    return true;
 792}
 793
 794JSBool
 795js_str_charAt(JSContext *cx, uintN argc, Value *vp)
 796{
 797    CallArgs args = CallArgsFromVp(argc, vp);
 798
 799    JSString *str;
 800    size_t i;
 801    if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
 802        str = args.thisv().toString();
 803        i = size_t(args[0].toInt32());
 804        if (i >= str->length())
 805            goto out_of_range;
 806    } else {
 807        str = ThisToStringForStringProto(cx, args);
 808        if (!str)
 809            return false;
 810
 811        double d = 0.0;
 812        if (args.length() > 0 && !ToInteger(cx, args[0], &d))
 813            return false;
 814
 815        if (d < 0 || str->length() <= d)
 816            goto out_of_range;
 817        i = size_t(d);
 818    }
 819
 820    str = cx->runtime->staticStrings.getUnitStringForElement(cx, str, i);
 821    if (!str)
 822        return false;
 823    args.rval() = StringValue(str);
 824    return true;
 825
 826  out_of_range:
 827    args.rval() = StringValue(cx->runtime->emptyString);
 828    return true;
 829}
 830
 831JSBool
 832js_str_charCodeAt(JSContext *cx, uintN argc, Value *vp)
 833{
 834    CallArgs args = CallArgsFromVp(argc, vp);
 835
 836    JSString *str;
 837    size_t i;
 838    if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
 839        str = args.thisv().toString();
 840        i = size_t(args[0].toInt32());
 841        if (i >= str->length())
 842            goto out_of_range;
 843    } else {
 844        str = ThisToStringForStringProto(cx, args);
 845        if (!str)
 846            return false;
 847
 848        double d = 0.0;
 849        if (args.length() > 0 && !ToInteger(cx, args[0], &d))
 850            return false;
 851
 852        if (d < 0 || str->length() <= d)
 853            goto out_of_range;
 854        i = size_t(d);
 855    }
 856
 857    const jschar *chars;
 858    chars = str->getChars(cx);
 859    if (!chars)
 860        return false;
 861
 862    args.rval() = Int32Value(chars[i]);
 863    return true;
 864
 865out_of_range:
 866    args.rval() = DoubleValue(js_NaN);
 867    return true;
 868}
 869
 870/*
 871 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
 872 * The patlen argument must be positive and no greater than sBMHPatLenMax.
 873 *
 874 * Return the index of pat in text, or -1 if not found.
 875 */
 876static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
 877static const jsuint sBMHPatLenMax   = 255; /* skip table element is uint8_t */
 878static const jsint  sBMHBadPattern  = -2;  /* return value if pat is not ISO-Latin-1 */
 879
 880jsint
 881js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
 882                      const jschar *pat, jsuint patlen)
 883{
 884    uint8_t skip[sBMHCharSetSize];
 885
 886    JS_ASSERT(0 < patlen && patlen <= sBMHPatLenMax);
 887    for (jsuint i = 0; i < sBMHCharSetSize; i++)
 888        skip[i] = (uint8_t)patlen;
 889    jsuint m = patlen - 1;
 890    for (jsuint i = 0; i < m; i++) {
 891        jschar c = pat[i];
 892        if (c >= sBMHCharSetSize)
 893            return sBMHBadPattern;
 894        skip[c] = (uint8_t)(m - i);
 895    }
 896    jschar c;
 897    for (jsuint k = m;
 898         k < textlen;
 899         k += ((c = text[k]) >= sBMHCharSetSize) ? patlen : skip[c]) {
 900        for (jsuint i = k, j = m; ; i--, j--) {
 901            if (text[i] != pat[j])
 902                break;
 903            if (j == 0)
 904                return static_cast<jsint>(i);  /* safe: max string size */
 905        }
 906    }
 907    return -1;
 908}
 909
 910struct MemCmp {
 911    typedef jsuint Extent;
 912    static JS_ALWAYS_INLINE Extent computeExtent(const jschar *, jsuint patlen) {
 913        return (patlen - 1) * sizeof(jschar);
 914    }
 915    static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
 916        return memcmp(p, t, extent) == 0;
 917    }
 918};
 919
 920struct ManualCmp {
 921    typedef const jschar *Extent;
 922    static JS_ALWAYS_INLINE Extent computeExtent(const jschar *pat, jsuint patlen) {
 923        return pat + patlen;
 924    }
 925    static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
 926        for (; p != extent; ++p, ++t) {
 927            if (*p != *t)
 928                return false;
 929        }
 930        return true;
 931    }
 932};
 933
 934template <class InnerMatch>
 935static jsint
 936UnrolledMatch(const jschar *text, jsuint textlen, const jschar *pat, jsuint patlen)
 937{
 938    JS_ASSERT(patlen > 0 && textlen > 0);
 939    const jschar *textend = text + textlen - (patlen - 1);
 940    const jschar p0 = *pat;
 941    const jschar *const patNext = pat + 1;
 942    const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
 943    uint8_t fixup;
 944
 945    const jschar *t = text;
 946    switch ((textend - t) & 7) {
 947      case 0: if (*t++ == p0) { fixup = 8; goto match; }
 948      case 7: if (*t++ == p0) { fixup = 7; goto match; }
 949      case 6: if (*t++ == p0) { fixup = 6; goto match; }
 950      case 5: if (*t++ == p0) { fixup = 5; goto match; }
 951      case 4: if (*t++ == p0) { fixup = 4; goto match; }
 952      case 3: if (*t++ == p0) { fixup = 3; goto match; }
 953      case 2: if (*t++ == p0) { fixup = 2; goto match; }
 954      case 1: if (*t++ == p0) { fixup = 1; goto match; }
 955    }
 956    while (t != textend) {
 957      if (t[0] == p0) { t += 1; fixup = 8; goto match; }
 958      if (t[1] == p0) { t += 2; fixup = 7; goto match; }
 959      if (t[2] == p0) { t += 3; fixup = 6; goto match; }
 960      if (t[3] == p0) { t += 4; fixup = 5; goto match; }
 961      if (t[4] == p0) { t += 5; fixup = 4; goto match; }
 962      if (t[5] == p0) { t += 6; fixup = 3; goto match; }
 963      if (t[6] == p0) { t += 7; fixup = 2; goto match; }
 964      if (t[7] == p0) { t += 8; fixup = 1; goto match; }
 965        t += 8;
 966        continue;
 967        do {
 968            if (*t++ == p0) {
 969              match:
 970                if (!InnerMatch::match(patNext, t, extent))
 971                    goto failed_match;
 972                return t - text - 1;
 973            }
 974          failed_match:;
 975        } while (--fixup > 0);
 976    }
 977    return -1;
 978}
 979
 980static JS_ALWAYS_INLINE jsint
 981StringMatch(const jschar *text, jsuint textlen,
 982            const jschar *pat, jsuint patlen)
 983{
 984    if (patlen == 0)
 985        return 0;
 986    if (textlen < patlen)
 987        return -1;
 988
 989#if defined(__i386__) || defined(_M_IX86) || defined(__i386)
 990    /*
 991     * Given enough registers, the unrolled loop below is faster than the
 992     * following loop. 32-bit x86 does not have enough registers.
 993     */
 994    if (patlen == 1) {
 995        const jschar p0 = *pat;
 996        for (const jschar *c = text, *end = text + textlen; c != end; ++c) {
 997            if (*c == p0)
 998                return c - text;
 999        }
1000        return -1;
1001    }
1002#endif
1003
1004    /*
1005     * If the text or pattern string is short, BMH will be more expensive than
1006     * the basic linear scan due to initialization cost and a more complex loop
1007     * body. While the correct threshold is input-dependent, we can make a few
1008     * conservative observations:
1009     *  - When |textlen| is "big enough", the initialization time will be
1010     *    proportionally small, so the worst-case slowdown is minimized.
1011     *  - When |patlen| is "too small", even the best case for BMH will be
1012     *    slower than a simple scan for large |textlen| due to the more complex
1013     *    loop body of BMH.
1014     * From this, the values for "big enough" and "too small" are determined
1015     * empirically. See bug 526348.
1016     */
1017    if (textlen >= 512 && patlen >= 11 && patlen <= sBMHPatLenMax) {
1018        jsint index = js_BoyerMooreHorspool(text, textlen, pat, patlen);
1019        if (index != sBMHBadPattern)
1020            return index;
1021    }
1022
1023    /*
1024     * For big patterns with large potential overlap we want the SIMD-optimized
1025     * speed of memcmp. For small patterns, a simple loop is faster.
1026     *
1027     * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1028     */
1029    return
1030#if !defined(__linux__)
1031           patlen > 128 ? UnrolledMatch<MemCmp>(text, textlen, pat, patlen)
1032                        :
1033#endif
1034                          UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
1035}
1036
1037static const size_t sRopeMatchThresholdRatioLog2 = 5;
1038
1039/*
1040 * RopeMatch takes the text to search, the patern to search for in the text.
1041 * RopeMatch returns false on OOM and otherwise returns the match index through
1042 * the 'match' outparam (-1 for not found).
1043 */
1044static bool
1045RopeMatch(JSContext *cx, JSString *textstr, const jschar *pat, jsuint patlen, jsint *match)
1046{
1047    JS_ASSERT(textstr->isRope());
1048
1049    if (patlen == 0) {
1050        *match = 0;
1051        return true;
1052    }
1053    if (textstr->length() < patlen) {
1054        *match = -1;
1055        return true;
1056    }
1057
1058    /*
1059     * List of leaf nodes in the rope. If we run out of memory when trying to
1060     * append to this list, we can still fall back to StringMatch, so use the
1061     * system allocator so we don't report OOM in that case.
1062     */
1063    Vector<JSLinearString *, 16, SystemAllocPolicy> strs;
1064
1065    /*
1066     * We don't want to do rope matching if there is a poor node-to-char ratio,
1067     * since this means spending a lot of time in the match loop below. We also
1068     * need to build the list of leaf nodes. Do both here: iterate over the
1069     * nodes so long as there are not too many.
1070     */
1071    {
1072        size_t textstrlen = textstr->length();
1073        size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2;
1074        StringSegmentRange r(cx);
1075        if (!r.init(textstr))
1076            return false;
1077        while (!r.empty()) {
1078            if (threshold-- == 0 || !strs.append(r.front())) {
1079                const jschar *chars = textstr->getChars(cx);
1080                if (!chars)
1081                    return false;
1082                *match = StringMatch(chars, textstrlen, pat, patlen);
1083                return true;
1084            }
1085            if (!r.popFront())
1086                return false;
1087        }
1088    }
1089
1090    /* Absolute offset from the beginning of the logical string textstr. */
1091    jsint pos = 0;
1092
1093    for (JSLinearString **outerp = strs.begin(); outerp != strs.end(); ++outerp) {
1094        /* Try to find a match within 'outer'. */
1095        JSLinearString *outer = *outerp;
1096        const jschar *chars = outer->chars();
1097        size_t len = outer->length();
1098        jsint matchResult = StringMatch(chars, len, pat, patlen);
1099        if (matchResult != -1) {
1100            /* Matched! */
1101            *match = pos + matchResult;
1102            return true;
1103        }
1104
1105        /* Try to find a match starting in 'outer' and running into other nodes. */
1106        const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1);
1107        const jschar *const textend = chars + len;
1108        const jschar p0 = *pat;
1109        const jschar *const p1 = pat + 1;
1110        const jschar *const patend = pat + patlen;
1111        for (const jschar *t = text; t != textend; ) {
1112            if (*t++ != p0)
1113                continue;
1114            JSLinearString **innerp = outerp;
1115            const jschar *ttend = textend;
1116            for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) {
1117                while (tt == ttend) {
1118                    if (++innerp == strs.end()) {
1119                        *match = -1;
1120                        return true;
1121                    }
1122                    JSLinearString *inner = *innerp;
1123                    tt = inner->chars();
1124                    ttend = tt + inner->length();
1125                }
1126                if (*pp != *tt)
1127                    goto break_continue;
1128            }
1129
1130            /* Matched! */
1131            *match = pos + (t - chars) - 1;  /* -1 because of *t++ above */
1132            return true;
1133
1134          break_continue:;
1135        }
1136
1137        pos += len;
1138    }
1139
1140    *match = -1;
1141    return true;
1142}
1143
1144static JSBool
1145str_indexOf(JSContext *cx, uintN argc, Value *vp)
1146{
1147    CallArgs args = CallArgsFromVp(argc, vp);
1148    JSString *str = ThisToStringForStringProto(cx, args);
1149    if (!str)
1150        return false;
1151
1152    JSLinearString *patstr = ArgToRootedString(cx, args, 0);
1153    if (!patstr)
1154        return false;
1155
1156    jsuint textlen = str->length();
1157    const jschar *text = str->getChars(cx);
1158    if (!text)
1159        return false;
1160
1161    jsuint patlen = patstr->length();
1162    const jschar *pat = patstr->chars();
1163
1164    jsuint start;
1165    if (args.length() > 1) {
1166        if (args[1].isInt32()) {
1167            jsint i = args[1].toInt32();
1168            if (i <= 0) {
1169                start = 0;
1170            } else if (jsuint(i) > textlen) {
1171                start = textlen;
1172                textlen = 0;
1173            } else {
1174                start = i;
1175                text += start;
1176                textlen -= start;
1177            }
1178        } else {
1179            jsdouble d;
1180            if (!ToInteger(cx, args[1], &d))
1181                return false;
1182            if (d <= 0) {
1183                start = 0;
1184            } else if (d > textlen) {
1185                start = textlen;
1186                textlen = 0;
1187            } else {
1188                start = (jsint)d;
1189                text += start;
1190                textlen -= start;
1191            }
1192        }
1193    } else {
1194        start = 0;
1195    }
1196
1197    jsint match = StringMatch(text, textlen, pat, patlen);
1198    args.rval() = Int32Value((match == -1) ? -1 : start + match);
1199    return true;
1200}
1201
1202static JSBool
1203str_lastIndexOf(JSContext *cx, uintN argc, Value *vp)
1204{
1205    CallArgs args = CallArgsFromVp(argc, vp);
1206    JSString *textstr = ThisToStringForStringProto(cx, args);
1207    if (!textstr)
1208        return false;
1209
1210    size_t textlen = textstr->length();
1211    const jschar *text = textstr->getChars(cx);
1212    if (!text)
1213        return false;
1214
1215    JSLinearString *patstr = ArgToRootedString(cx, args, 0);
1216    if (!patstr)
1217        return false;
1218
1219    size_t patlen = patstr->length();
1220    const jschar *pat = patstr->chars();
1221
1222    jsint i = textlen - patlen; // Start searching here
1223    if (i < 0) {
1224        args.rval() = Int32Value(-1);
1225        return true;
1226    }
1227
1228    if (args.length() > 1) {
1229        if (args[1].isInt32()) {
1230            jsint j = args[1].toInt32();
1231            if (j <= 0)
1232                i = 0;
1233            else if (j < i)
1234                i = j;
1235        } else {
1236            double d;
1237            if (!ToNumber(cx, args[1], &d))
1238                return false;
1239            if (!JSDOUBLE_IS_NaN(d)) {
1240                d = js_DoubleToInteger(d);
1241                if (d <= 0)
1242                    i = 0;
1243                else if (d < i)
1244                    i = (jsint)d;
1245            }
1246        }
1247    }
1248
1249    if (patlen == 0) {
1250        args.rval() = Int32Value(i);
1251        return true;
1252    }
1253
1254    const jschar *t = text + i;
1255    const jschar *textend = text - 1;
1256    const jschar p0 = *pat;
1257    const jschar *patNext = pat + 1;
1258    const jschar *patEnd = pat + patlen;
1259
1260    for (; t != textend; --t) {
1261        if (*t == p0) {
1262            const jschar *t1 = t + 1;
1263            for (const jschar *p1 = patNext; p1 != patEnd; ++p1, ++t1) {
1264                if (*t1 != *p1)
1265                    goto break_continue;
1266            }
1267            args.rval() = Int32Value(t - text);
1268            return true;
1269        }
1270      break_continue:;
1271    }
1272
1273    args.rval() = Int32Value(-1);
1274    return true;
1275}
1276
1277static JSBool
1278js_TrimString(JSContext *cx, Value *vp, JSBool trimLeft, JSBool trimRight)
1279{
1280    CallReceiver call = CallReceiverFromVp(vp);
1281    JSString *str = ThisToStringForStringProto(cx, call);
1282    if (!str)
1283        return false;
1284    size_t length = str->length();
1285    const jschar *chars = str->getChars(cx);
1286    if (!chars)
1287        return false;
1288
1289    size_t begin = 0;
1290    size_t end = length;
1291
1292    if (trimLeft) {
1293        while (begin < length && unicode::IsSpace(chars[begin]))
1294            ++begin;
1295    }
1296
1297    if (trimRight) {
1298        while (end > begin && unicode::IsSpace(chars[end - 1]))
1299            --end;
1300    }
1301
1302    str = js_NewDependentString(cx, str, begin, end - begin);
1303    if (!str)
1304        return false;
1305
1306    call.rval() = StringValue(str);
1307    return true;
1308}
1309
1310static JSBool
1311str_trim(JSContext *cx, uintN argc, Value *vp)
1312{
1313    return js_TrimString(cx, vp, JS_TRUE, JS_TRUE);
1314}
1315
1316static JSBool
1317str_trimLeft(JSContext *cx, uintN argc, Value *vp)
1318{
1319    return js_TrimString(cx, vp, JS_TRUE, JS_FALSE);
1320}
1321
1322static JSBool
1323str_trimRight(JSContext *cx, uintN argc, Value *vp)
1324{
1325    return js_TrimString(cx, vp, JS_FALSE, JS_TRUE);
1326}
1327
1328/*
1329 * Perl-inspired string functions.
1330 */
1331
1332/* Result of a successfully performed flat match. */
1333class FlatMatch
1334{
1335    JSAtom       *patstr;
1336    const jschar *pat;
1337    size_t       patlen;
1338    int32_t      match_;
1339
1340    friend class RegExpGuard;
1341
1342  public:
1343    FlatMatch() : patstr(NULL) {} /* Old GCC wants this initialization. */
1344    JSLinearString *pattern() const { return patstr; }
1345    size_t patternLength() const { return patlen; }
1346
1347    /*
1348     * Note: The match is -1 when the match is performed successfully,
1349     * but no match is found.
1350     */
1351    int32_t match() const { return match_; }
1352};
1353
1354static inline bool
1355IsRegExpMetaChar(jschar c)
1356{
1357    switch (c) {
1358      /* Taken from the PatternCharacter production in 15.10.1. */
1359      case '^': case '$': case '\\': case '.': case '*': case '+':
1360      case '?': case '(': case ')': case '[': case ']': case '{':
1361      case '}': case '|':
1362        return true;
1363      default:
1364        return false;
1365    }
1366}
1367
1368static inline bool
1369HasRegExpMetaChars(const jschar *chars, size_t length)
1370{
1371    for (size_t i = 0; i < length; ++i) {
1372        if (IsRegExpMetaChar(chars[i]))
1373            return true;
1374    }
1375    return false;
1376}
1377
1378/*
1379 * RegExpGuard factors logic out of String regexp operations.
1380 *
1381 * |optarg| indicates in which argument position RegExp flags will be found, if
1382 * present. This is a Mozilla extension and not part of any ECMA spec.
1383 */
1384class RegExpGuard
1385{
1386    RegExpGuard(const RegExpGuard &) MOZ_DELETE;
1387    void operator=(const RegExpGuard &) MOZ_DELETE;
1388
1389    RegExpShared::Guard re_;
1390    FlatMatch           fm;
1391
1392    /*
1393     * Upper bound on the number of characters we are willing to potentially
1394     * waste on searching for RegExp meta-characters.
1395     */
1396    static const size_t MAX_FLAT_PAT_LEN = 256;
1397
1398    static JSAtom *
1399    flattenPattern(JSContext *cx, JSAtom *patstr)
1400    {
1401        StringBuffer sb(cx);
1402        if (!sb.reserve(patstr->length()))
1403            return NULL;
1404
1405        static const jschar ESCAPE_CHAR = '\\';
1406        const jschar *chars = patstr->chars();
1407        size_t len = patstr->length();
1408        for (const jschar *it = chars; it != chars + len; ++it) {
1409            if (IsRegExpMetaChar(*it)) {
1410                if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
1411                    return NULL;
1412            } else {
1413                if (!sb.append(*it))
1414                    return NULL;
1415            }
1416        }
1417        return sb.finishAtom();
1418    }
1419
1420  public:
1421    RegExpGuard() {}
1422
1423    /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
1424    bool init(JSContext *cx, CallArgs args, bool convertVoid = false)
1425    {
1426        if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1427            RegExpShared *shared = RegExpToShared(cx, args[0].toObject());
1428            if (!shared)
1429                return false;
1430            re_.init(*shared);
1431        } else {
1432            if (convertVoid && (args.length() == 0 || args[0].isUndefined())) {
1433                fm.patstr = cx->runtime->emptyString;
1434                return true;
1435            }
1436
1437            JSString *arg = ArgToRootedString(cx, args, 0);
1438            if (!arg)
1439                return false;
1440
1441            fm.patstr = js_AtomizeString(cx, arg);
1442            if (!fm.patstr)
1443                return false;
1444        }
1445        return true;
1446    }
1447
1448    /*
1449     * Attempt to match |patstr| to |textstr|. A flags argument, metachars in the
1450     * pattern string, or a lengthy pattern string can thwart this process.
1451     *
1452     * |checkMetaChars| looks for regexp metachars in the pattern string.
1453     *
1454     * Return whether flat matching could be used.
1455     *
1456     * N.B. tryFlatMatch returns NULL on OOM, so the caller must check cx->isExceptionPending().
1457     */
1458    const FlatMatch *
1459    tryFlatMatch(JSContext *cx, JSString *textstr, uintN optarg, uintN argc,
1460                 bool checkMetaChars = true)
1461    {
1462        if (re_.initialized())
1463            return NULL;
1464
1465        fm.pat = fm.patstr->chars();
1466        fm.patlen = fm.patstr->length();
1467
1468        if (optarg < argc)
1469            return NULL;
1470
1471        if (checkMetaChars &&
1472            (fm.patlen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat, fm.patlen))) {
1473            return NULL;
1474        }
1475
1476        /*
1477         * textstr could be a rope, so we want to avoid flattening it for as
1478         * long as possible.
1479         */
1480        if (textstr->isRope()) {
1481            if (!RopeMatch(cx, textstr, fm.pat, fm.patlen, &fm.match_))
1482                return NULL;
1483        } else {
1484            const jschar *text = textstr->asLinear().chars();
1485            size_t textlen = textstr->length();
1486            fm.match_ = StringMatch(text, textlen, fm.pat, fm.patlen);
1487        }
1488        return &fm;
1489    }
1490
1491    /* If the pattern is not already a regular expression, make it so. */
1492    bool normalizeRegExp(JSContext *cx, bool flat, uintN optarg, CallArgs args)
1493    {
1494        if (re_.initialized())
1495            return true;
1496
1497        /* Build RegExp from pattern string. */
1498        JSString *opt;
1499        if (optarg < args.length()) {
1500            opt = ToString(cx, args[optarg]);
1501            if (!opt)
1502                return false;
1503        } else {
1504            opt = NULL;
1505        }
1506
1507        JSAtom *patstr;
1508        if (flat) {
1509            patstr = flattenPattern(cx, fm.patstr);
1510            if (!patstr)
1511                return false;
1512        } else {
1513            patstr = fm.patstr;
1514        }
1515        JS_ASSERT(patstr);
1516
1517        RegExpShared *re = cx->compartment->regExps.get(cx, patstr, opt);
1518        if (!re)
1519            return false;
1520
1521        re_.init(*re);
1522        return true;
1523    }
1524
1525    RegExpShared &regExp() { return *re_; }
1526};
1527
1528/* ExecuteRegExp indicates success in two ways, based on the 'test' flag. */
1529static JS_ALWAYS_INLINE bool
1530Matched(RegExpExecType type, const Value &v)
1531{
1532    return (type == RegExpTest) ? v.isTrue() : !v.isNull();
1533}
1534
1535typedef bool (*DoMatchCallback)(JSContext *cx, RegExpStatics *res, size_t count, void *data);
1536
1537/*
1538 * BitOR-ing these flags allows the DoMatch caller to control when how the
1539 * RegExp engine is called and when callbacks are fired.
1540 */
1541enum MatchControlFlags {
1542   TEST_GLOBAL_BIT         = 0x1, /* use RegExp.test for global regexps */
1543   TEST_SINGLE_BIT         = 0x2, /* use RegExp.test for non-global regexps */
1544   CALLBACK_ON_SINGLE_BIT  = 0x4, /* fire callback on non-global match */
1545
1546   MATCH_ARGS    = TEST_GLOBAL_BIT,
1547   MATCHALL_ARGS = CALLBACK_ON_SINGLE_BIT,
1548   REPLACE_ARGS  = TEST_GLOBAL_BIT | TEST_SINGLE_BIT | CALLBACK_ON_SINGLE_BIT
1549};
1550
1551/* Factor out looping and matching logic. */
1552static bool
1553DoMatch(JSContext *cx, RegExpStatics *res, JSString *str, RegExpShared &re,
1554        DoMatchCallback callback, void *data, MatchControlFlags flags, Value *rval)
1555{
1556    JSLinearString *linearStr = str->ensureLinear(cx);
1557    if (!linearStr)
1558        return false;
1559
1560    const jschar *chars = linearStr->chars();
1561    size_t length = linearStr->length();
1562
1563    if (re.global()) {
1564        RegExpExecType type = (flags & TEST_GLOBAL_BIT) ? RegExpTest : RegExpExec;
1565        for (size_t count = 0, i = 0, length = str->length(); i <= length; ++count) {
1566            if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
1567                return false;
1568            if (!Matched(type, *rval))
1569                break;
1570            if (!callback(cx, res, count, data))
1571                return false;
1572            if (!res->matched())
1573                ++i;
1574        }
1575    } else {
1576        RegExpExecType type = (flags & TEST_SINGLE_BIT) ? RegExpTest : RegExpExec;
1577        bool callbackOnSingle = !!(flags & CALLBACK_ON_SINGLE_BIT);
1578        size_t i = 0;
1579        if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
1580            return false;
1581        if (callbackOnSingle && Matched(type, *rval) && !callback(cx, res, 0, data))
1582            return false;
1583    }
1584    return true;
1585}
1586
1587static bool
1588BuildFlatMatchArray(JSContext *cx, JSString *textstr, const FlatMatch &fm, CallArgs *args)
1589{
1590    if (fm.match() < 0) {
1591        args->rval() = NullValue();
1592        return true;
1593    }
1594
1595    /* For this non-global match, produce a RegExp.exec-style array. */
1596    JSObject *obj = NewSlowEmptyArray(cx);
1597    if (!obj)
1598        return false;
1599
1600    if (!obj->defineElement(cx, 0, StringValue(fm.pattern())) ||
1601        !obj->defineProperty(cx, cx->runtime->atomState.indexAtom, Int32Value(fm.match())) ||
1602        !obj->defineProperty(cx, cx->runtime->atomState.inputAtom, StringValue(textstr)))
1603    {
1604        return false;
1605    }
1606
1607    args->rval() = ObjectValue(*obj);
1608    return true;
1609}
1610
1611typedef JSObject **MatchArgType;
1612
1613/*
1614 * DoMatch will only callback on global matches, hence this function builds
1615 * only the "array of matches" returned by match on global regexps.
1616 */
1617static bool
1618MatchCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
1619{
1620    JS_ASSERT(count <= JSID_INT_MAX);  /* by max string length */
1621
1622    JSObject *&arrayobj = *static_cast<MatchArgType>(p);
1623    if (!arrayobj) {
1624        arrayobj = NewDenseEmptyArray(cx);
1625        if (!arrayobj)
1626            return false;
1627    }
1628
1629    Value v;
1630    return res->createLastMatch(cx, &v) && arrayobj->defineElement(cx, count, v);
1631}
1632
1633JSBool
1634js::str_match(JSContext *cx, uintN argc, Value *vp)
1635{
1636    CallArgs args = CallArgsFromVp(argc, vp);
1637    JSString *str = ThisToStringForStringProto(cx, args);
1638    if (!str)
1639        return false;
1640
1641    RegExpGuard g;
1642    if (!g.init(cx, args, true))
1643        return false;
1644
1645    if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length()))
1646        return BuildFlatMatchArray(cx, str, *fm, &args);
1647
1648    /* Return if there was an error in tryFlatMatch. */
1649    if (cx->isExceptionPending())
1650        return false;
1651
1652    if (!g.normalizeRegExp(cx, false, 1, args))
1653        return false;
1654
1655    JSObject *array = NULL;
1656    MatchArgType arg = &array;
1657    RegExpStatics *res = cx->regExpStatics();
1658    Value rval;
1659    if (!DoMatch(cx, res, str, g.regExp(), MatchCallback, arg, MATCH_ARGS, &rval))
1660        return false;
1661
1662    if (g.regExp().global())
1663        args.rval() = ObjectOrNullValue(array);
1664    else
1665        args.rval() = rval;
1666    return true;
1667}
1668
1669JSBool
1670js::str_search(JSContext *cx, uintN argc, Value *vp)
1671{
1672    CallArgs args = CallArgsFromVp(argc, vp);
1673    JSString *str = ThisToStringForStringProto(cx, args);
1674    if (!str)
1675        return false;
1676
1677    RegExpGuard g;
1678    if (!g.init(cx, args, true))
1679        return false;
1680    if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length())) {
1681        args.rval() = Int32Value(fm->match());
1682        return true;
1683    }
1684
1685    if (cx->isExceptionPending())  /* from tryFlatMatch */
1686        return false;
1687
1688    if (!g.normalizeRegExp(cx, false, 1, args))
1689        return false;
1690
1691    JSLinearString *linearStr = str->ensureLinear(cx);
1692    if (!linearStr)
1693        return false;
1694
1695    const jschar *chars = linearStr->chars();
1696    size_t length = linearStr->length();
1697    RegExpStatics *res = cx->regExpStatics();
1698
1699    /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
1700    size_t i = 0;
1701    Value result;
1702    if (!ExecuteRegExp(cx, res, g.regExp(), linearStr, chars, length, &i, RegExpTest, &result))
1703        return false;
1704
1705    if (result.isTrue())
1706        args.rval() = Int32Value(res->matchStart());
1707    else
1708        args.rval() = Int32Value(-1);
1709    return true;
1710}
1711
1712struct ReplaceData
1713{
1714    ReplaceData(JSContext *cx)
1715     : sb(cx)
1716    {}
1717
1718    JSString           *str;           /* 'this' parameter object as a string */
1719    RegExpGuard        g;              /* regexp parameter object and private data */
1720    JSObject           *lambda;        /* replacement function object or null */
1721    JSObject           *elembase;      /* object for function(a){return b[a]} replace */
1722    JSLinearString     *repstr;        /* replacement string */
1723    const jschar       *dollar;        /* null or pointer to first $ in repstr */
1724    const jschar       *dollarEnd;     /* limit pointer for js_strchr_limit */
1725    jsint              leftIndex;      /* left context index in str->chars */
1726    JSSubString        dollarStr;      /* for "$$" InterpretDollar result */
1727    bool               calledBack;     /* record whether callback has been called */
1728    InvokeArgsGuard    args;           /* arguments for lambda call */
1729    StringBuffer       sb;             /* buffer built during DoMatch */
1730};
1731
1732static bool
1733InterpretDollar(JSContext *cx, RegExpStatics *res, const jschar *dp, const jschar *ep,
1734                ReplaceData &rdata, JSSubString *out, size_t *skip)
1735{
1736    JS_ASSERT(*dp == '$');
1737
1738    /* If there is only a dollar, bail now */
1739    if (dp + 1 >= ep)
1740        return false;
1741
1742    /* Interpret all Perl match-induced dollar variables. */
1743    jschar dc = dp[1];
1744    if (JS7_ISDEC(dc)) {
1745        /* ECMA-262 Edition 3: 1-9 or 01-99 */
1746        uintN num = JS7_UNDEC(dc);
1747        if (num > res->parenCount())
1748            return false;
1749
1750        const jschar *cp = dp + 2;
1751        if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
1752            uintN tmp = 10 * num + JS7_UNDEC(dc);
1753            if (tmp <= res->parenCount()) {
1754                cp++;
1755                num = tmp;
1756            }
1757        }
1758        if (num == 0)
1759            return false;
1760
1761        *skip = cp - dp;
1762
1763        JS_ASSERT(num <= res->parenCount());
1764
1765        /*
1766         * Note: we index to get the paren with the (1-indexed) pair
1767         * number, as opposed to a (0-indexed) paren number.
1768         */
1769        res->getParen(num, out);
1770        return true;
1771    }
1772
1773    *skip = 2;
1774    switch (dc) {
1775      case '$':
1776        rdata.dollarStr.chars = dp;
1777        rdata.dollarStr.length = 1;
1778        *out = rdata.dollarStr;
1779        return true;
1780      case '&':
1781        res->getLastMatch(out);
1782        return true;
1783      case '+':
1784        res->getLastParen(out);
1785        return true;
1786      case '`':
1787        res->getLeftContext(out);
1788        return true;
1789      case '\'':
1790        res->getRightContext(out);
1791        return true;
1792    }
1793    return false;
1794}
1795
1796static bool
1797FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
1798{
1799    JSObject *base = rdata.elembase;
1800    if (base) {
1801        /*
1802         * The base object is used when replace was passed a lambda which looks like
1803         * 'function(a) { return b[a]; }' for the base object b.  b will not change
1804         * in the course of the replace unless we end up making a scripted call due
1805         * to accessing a scripted getter or a value with a scripted toString.
1806         */
1807        JS_ASSERT(rdata.lambda);
1808        JS_ASSERT(!base->getOps()->lookupProperty);
1809        JS_ASSERT(!base->getOps()->getProperty);
1810
1811        Value match;
1812        if (!res->createLastMatch(cx, &match))
1813            return false;
1814        JSString *str = match.toString();
1815
1816        JSAtom *atom;
1817        if (str->isAtom()) {
1818            atom = &str->asAtom();
1819        } else {
1820            atom = js_AtomizeString(cx, str);
1821            if (!atom)
1822                return false;
1823        }
1824        jsid id = ATOM_TO_JSID(atom);
1825
1826        JSObject *holder;
1827        JSProperty *prop = NULL;
1828        if (!LookupPropertyWithFlags(cx, base, id, JSRESOLVE_QUALIFIED, &holder, &prop))
1829            return false;
1830
1831        /* Only handle the case where the property exists and is on this object. */
1832        if (prop && holder == base) {
1833            Shape *shape = (Shape *) prop;
1834            if (shape->hasSlot() && shape->hasDefaultGetter()) {
1835                Value value = base->getSlot(shape->slot());
1836                if (value.isString()) {
1837                    rdata.repstr = value.toString()->ensureLinear(cx);
1838                    if (!rdata.repstr)
1839                        return false;
1840                    *sizep = rdata.repstr->length();
1841                    return true;
1842                }
1843            }
1844        }
1845
1846        /*
1847         * Couldn't handle this property, fall through and despecialize to the
1848         * general lambda case.
1849         */
1850        rdata.elembase = NULL;
1851    }
1852
1853    JSObject *lambda = rdata.lambda;
1854    if (lambda) {
1855        PreserveRegExpStatics staticsGuard(res);
1856        if (!staticsGuard.init(cx))
1857            return false;
1858
1859        /*
1860         * In the lambda case, not only do we find the replacement string's
1861         * length, we compute repstr and return it via rdata for use within
1862         * DoReplace.  The lambda is called with arguments ($&, $1, $2, ...,
1863         * index, input), i.e., all the properties of a regexp match array.
1864         * For $&, etc., we must create string jsvals from cx->regExpStatics.
1865         * We grab up stack space to keep the newborn strings GC-rooted.
1866         */
1867        uintN p = res->parenCount();
1868        uintN argc = 1 + p + 2;
1869
1870        InvokeArgsGuard &args = rdata.args;
1871        if (!args.pushed() && !cx->stack.pushInvokeArgs(cx, argc, &args))
1872            return false;
1873
1874        args.setCallee(ObjectValue(*lambda));
1875        args.thisv() = UndefinedValue();
1876
1877        /* Push $&, $1, $2, ... */
1878        uintN argi = 0;
1879        if (!res->createLastMatch(cx, &args[argi++]))
1880            return false;
1881
1882        for (size_t i = 0; i < res->parenCount(); ++i) {
1883            if (!res->createParen(cx, i + 1, &args[argi++]))
1884                return false;
1885        }
1886
1887        /* Push match index and input string. */
1888        args[argi++].setInt32(res->matchStart());
1889        args[argi].setString(rdata.str);
1890
1891        if (!Invoke(cx, args))
1892            return false;
1893
1894        /* root repstr: rdata is on the stack, so scanned by conservative gc. */
1895        JSString *repstr = ToString(cx, args.rval());
1896        if (!repstr)
1897            return false;
1898        rdata.repstr = repstr->ensureLinear(cx);
1899        if (!rdata.repstr)
1900            return false;
1901        *sizep = rdata.repstr->length();
1902        return true;
1903    }
1904
1905    JSString *repstr = rdata.repstr;
1906    size_t replen = repstr->length();
1907    for (const jschar *dp = rdata.dollar, *ep = rdata.dollarEnd; dp;
1908         dp = js_strchr_limit(dp, '$', ep)) {
1909        JSSubString sub;
1910        size_t skip;
1911        if (InterpretDollar(cx, res, dp, ep, rdata, &sub, &skip)) {
1912            replen += sub.length - skip;
1913            dp += skip;
1914        } else {
1915            dp++;
1916        }
1917    }
1918    *sizep = replen;
1919    return true;
1920}
1921
1922/*
1923 * Precondition: |rdata.sb| already has necessary growth space reserved (as
1924 * derived from FindReplaceLength).
1925 */
1926static void
1927DoReplace(JSContext *cx, RegExpStatics *res, ReplaceData &rdata)
1928{
1929    JSLinearString *repstr = rdata.repstr;
1930    const jschar *cp;
1931    const jschar *bp = cp = repstr->chars();
1932
1933    const jschar *dp = rdata.dollar;
1934    const jschar *ep = rdata.dollarEnd;
1935    for (; dp; dp = js_strchr_limit(dp, '$', ep)) {
1936        /* Move one of the constant portions of the replacement value. */
1937        size_t len = dp - cp;
1938        rdata.sb.infallibleAppend(cp, len);
1939        cp = dp;
1940
1941        JSSubString sub;
1942        size_t skip;
1943        if (InterpretDollar(cx, res, dp, ep, rdata, &sub, &skip)) {
1944            len = sub.length;
1945            rdata.sb.infallibleAppend(sub.chars, len);
1946            cp += skip;
1947            dp += skip;
1948        } else {
1949            dp++;
1950        }
1951    }
1952    rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
1953}
1954
1955static bool
1956ReplaceRegExpCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
1957{
1958    ReplaceData &rdata = *static_cast<ReplaceData *>(p);
1959
1960    rdata.calledBack = true;
1961    JSLinearString &str = rdata.str->asLinear();  /* flattened for regexp */
1962    size_t leftoff = rdata.leftIndex;
1963    const jschar *left = str.chars() + leftoff;
1964    size_t leftlen = res->matchStart() - leftoff;
1965    rdata.leftIndex = res->matchLimit();
1966
1967    size_t replen = 0;  /* silence 'unused' warning */
1968    if (!FindReplaceLength(cx, res, rdata, &replen))
1969        return false;
1970
1971    size_t growth = leftlen + replen;
1972    if (!rdata.sb.reserve(rdata.sb.length() + growth))
1973        return false;
1974    rdata.sb.infallibleAppend(left, leftlen); /* skipped-over portion of the search value */
1975    DoReplace(cx, res, rdata);
1976    return true;
1977}
1978
1979static bool
1980BuildFlatReplacement(JSContext *cx, JSString *textstr, JSString *repstr,
1981                     const FlatMatch &fm, CallArgs *args)
1982{
1983    RopeBuilder builder(cx);
1984    size_t match = fm.match();
1985    size_t matchEnd = match + fm.patternLength();
1986
1987    if (textstr->isRope()) {
1988        /*
1989         * If we are replacing over a rope, avoid flattening it by iterating
1990         * through it, building a new rope.
1991         */
1992        StringSegmentRange r(cx);
1993        if (!r.init(textstr))
1994            return false;
1995        size_t pos = 0;
1996        while (!r.empty()) {
1997            JSString *str = r.front();
1998            size_t len = str->length();
1999            size_t strEnd = pos + len;
2000            if (pos < matchEnd && strEnd > match) {
2001                /*
2002                 * We need to special-case any part of the rope that overlaps
2003                 * with the replacement string.
2004                 */
2005                if (match >= pos) {
2006                    /*
2007                     * If this part of the rope overlaps with the left side of
2008                     * the pattern, then it must be the only one to overlap with
2009                     * the first character in the pattern, so we include the
2010                     * replacement string here.
2011                     */
2012                    JSString *leftSide = js_NewDependentString(cx, str, 0, match - pos);
2013                    if (!leftSide ||
2014                        !builder.append(leftSide) ||
2015                        !builder.append(repstr)) {
2016                        return false;
2017                    }
2018                }
2019
2020                /*
2021                 * If str runs off the end of the matched string, append the
2022                 * last part of str.
2023                 */
2024                if (strEnd > matchEnd) {
2025                    JSString *rightSide = js_NewDependentString(cx, str, matchEnd - pos,
2026                                                                strEnd - matchEnd);
2027                    if (!rightSide || !builder.append(rightSide))
2028                        return false;
2029                }
2030            } else {
2031                if (!builder.append(str))
2032                    return false;
2033            }
2034            pos += str->length();
2035            if (!r.popFront())
2036                return false;
2037        }
2038    } else {
2039        JSString *leftSide = js_NewDependentString(cx, textstr, 0, match);
2040        if (!leftSide)
2041            return false;
2042        JSString *rightSide = js_NewDependentString(cx, textstr, match + fm.patternLength(),
2043                                                    textstr->length() - match - fm.patternLength());
2044        if (!rightSide ||
2045            !builder.append(leftSide) ||
2046            !builder.append(repstr) ||
2047            !builder.append(rightSide)) {
2048            return false;
2049        }
2050    }
2051
2052    args->rval() = StringValue(builder.result());
2053    return true;
2054}
2055
2056/*
2057 * Perform a linear-scan dollar substitution on the replacement text,
2058 * constructing a result string that looks like:
2059 *
2060 *      newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
2061 */
2062static inline bool
2063BuildDollarReplacement(JSContext *cx, JSString *textstrArg, JSLinearString *repstr,
2064                       const jschar *firstDollar, const FlatMatch &fm, CallArgs *args)
2065{
2066    JSLinearString *textstr = textstrArg->ensureLinear(cx);
2067    if (!textstr)
2068        return NULL;
2069
2070    JS_ASSERT(repstr->chars() <= firstDollar && firstDollar < repstr->chars() + repstr->length());
2071    size_t matchStart = fm.match();
2072    size_t matchLimit = matchStart + fm.patternLength();
2073
2074    /*
2075     * Most probably:
2076     *
2077     *      len(newstr) >= len(orig) - len(match) + len(replacement)
2078     *
2079     * Note that dollar vars _could_ make the resulting text smaller than this.
2080     */
2081    StringBuffer newReplaceChars(cx);
2082    if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
2083        return false;
2084
2085    /* Move the pre-dollar chunk in bulk. */
2086    newReplaceChars.infallibleAppend(repstr->chars(), firstDollar);
2087
2088    /* Move the rest char-by-char, interpreting dollars as we encounter them. */
2089#define ENSURE(__cond) if (!(__cond)) return false;
2090    const jschar *repstrLimit = repstr->chars() + repstr->length();
2091    for (const jschar *it = firstDollar; it < repstrLimit; ++it) {
2092        if (*it != '$' || it == repstrLimit - 1) {
2093            ENSURE(newReplaceChars.append(*it));
2094            continue;
2095        }
2096
2097        switch (*(it + 1)) {
2098          case '$': /* Eat one of the dollars. */
2099            ENSURE(newReplaceChars.append(*it));
2100            break;
2101          case '&':
2102            ENSURE(newReplaceChars.append(textstr->chars() + matchStart,
2103                                          textstr->chars() + matchLimit));
2104            break;
2105          case '`':
2106            ENSURE(newReplaceChars.append(textstr->chars(), textstr->chars() + matchStart));
2107            break;
2108          case '\'':
2109            ENSURE(newReplaceChars.append(textstr->chars() + matchLimit,
2110                                          textstr->chars() + textstr->length()));
2111            break;
2112          default: /* The dollar we saw was not special (no matter what its mother told it). */
2113            ENSURE(newReplaceChars.append(*it));
2114            continue;
2115        }
2116        ++it; /* We always eat an extra char in the above switch. */
2117    }
2118
2119    JSString *leftSide = js_NewDependentString(cx, textstr, 0, matchStart);
2120    ENSURE(leftSide);
2121
2122    JSString *newReplace = newReplaceChars.finishString();
2123    ENSURE(newReplace);
2124
2125    JS_ASSERT(textstr->length() >= matchLimit);
2126    JSString *rightSide = js_NewDependentString(cx, textstr, matchLimit,
2127                                                textstr->length() - matchLimit);
2128    ENSURE(rightSide);
2129
2130    RopeBuilder builder(cx);
2131    ENSURE(builder.append(leftSide) &&
2132           builder.append(newReplace) &&
2133           builder.append(rightSide));
2134#undef ENSURE
2135
2136    args->rval() = StringValue(builder.result());
2137    return true;
2138}
2139
2140static inline bool
2141str_replace_regexp(JSContext *cx, CallArgs args, ReplaceData &rdata)
2142{
2143    if (!rdata.g.normalizeRegExp(cx, true, 2, args))
2144        return false;
2145
2146    rdata.leftIndex = 0;
2147    rdata.calledBack = false;
2148
2149    RegExpStatics *res = cx->regExpStatics();
2150    RegExpShared &re = rdata.g.regExp();
2151
2152    Value tmp;
2153    if (!DoMatch(cx, res, rdata.str, re, ReplaceRegExpCallback, &rdata, REPLACE_ARGS, &tmp))
2154        return false;
2155
2156    if (!rdata.calledBack) {
2157        /* Didn't match, so the string is unmodified. */
2158        args.rval() = StringValue(rdata.str);
2159        return true;
2160    }
2161
2162    JSSubString sub;
2163    res->getRightContext(&sub);
2164    if (!rdata.sb.append(sub.chars, sub.length))
2165        return false;
2166
2167    JSString *retstr = rdata.sb.finishString();
2168    if (!retstr)
2169        return false;
2170
2171    args.rval() = StringValue(retstr);
2172    return true;
2173}
2174
2175static inline bool
2176str_replace_flat_lambda(JSContext *cx, CallArgs outerArgs, ReplaceData &rdata, const FlatMatch &fm)
2177{
2178    JS_ASSERT(fm.match() >= 0);
2179
2180    JSString *matchStr = js_NewDependentString(cx, rdata.str, fm.match(), fm.patternLength());
2181    if (!matchStr)
2182        return false;
2183
2184    /* lambda(matchStr, matchStart, textstr) */
2185    static const uint32_t lambdaArgc = 3;
2186    if (!cx->stack.pushInvokeArgs(cx, lambdaArgc, &rdata.args))
2187        return false;
2188
2189    CallArgs &args = rdata.args;
2190    args.calleev().setObject(*rdata.lambda);
2191    args.thisv().setUndefined();
2192
2193    Value *sp = args.array();
2194    sp[0].setString(matchStr);
2195    sp[1].setInt32(fm.match());
2196    sp[2].setString(rdata.str);
2197
2198    if (!Invoke(cx, rdata.args))
2199        return false;
2200
2201    JSString *repstr = ToString(cx, args.rval());
2202    if (!repstr)
2203        return false;
2204
2205    JSString *leftSide = js_NewDependentString(cx, rdata.str, 0, fm.match());
2206    if (!leftSide)
2207        return false;
2208
2209    size_t matchLimit = fm.match() + fm.patternLength();
2210    JSString *rightSide = js_NewDependentString(cx, rdata.str, matchLimit,
2211                                                rdata.str->length() - matchLimit);
2212    if (!rightSide)
2213        return false;
2214
2215    RopeBuilder builder(cx);
2216    if (!(builder.append(leftSide) &&
2217          builder.append(repstr) &&
2218          builder.append(rightSide))) {
2219        return false;
2220    }
2221
2222    outerArgs.rval() = StringValue(builder.result());
2223    return true;
2224}
2225
2226static const uint32_t ReplaceOptArg = 2;
2227
2228JSBool
2229js::str_replace(JSContext *cx, uintN argc, Value *vp)
2230{
2231    CallArgs args = CallArgsFromVp(argc, vp);
2232
2233    ReplaceData rdata(cx);
2234    rdata.str = ThisToStringForStringProto(cx, args);
2235    if (!rdata.str)
2236        return false;
2237
2238    if (!rdata.g.init(cx, args))
2239        return false;
2240
2241    /* Extract replacement string/function. */
2242    if (args.length() >= ReplaceOptArg && js_IsCallable(args[1])) {
2243        rdata.lambda = &args[1].toObject();
2244        rdata.elembase = NULL;
2245        rdata.repstr = NULL;
2246        rdata.dollar = rdata.dollarEnd = NULL;
2247
2248        if (rdata.lambda->isFunction()) {
2249            JSFunction *fun = rdata.lambda->toFunction();
2250            if (fun->isInterpreted()) {
2251                /*
2252                 * Pattern match the script to check if it is is indexing into a
2253                 * particular object, e.g. 'function(a) { return b[a]; }'.  Avoid
2254                 * calling the script in such cases, which are used by javascript
2255                 * packers (particularly the popular Dean Edwards packer) to efficiently
2256                 * encode large scripts.  We only handle the code patterns generated
2257                 * by such packers here.
2258                 */
2259                JSScript *script = fun->script();
2260                jsbytecode *pc = script->code;
2261
2262                Value table = UndefinedValue();
2263                if (JSOp(*pc) == JSOP_GETFCSLOT) {
2264                    table = fun->getFlatClosureUpvar(GET_UINT16(pc));
2265                    pc += JSOP_GETFCSLOT_LENGTH;
2266                }
2267
2268                if (table.isObject() &&
2269                    JSOp(*pc) == JSOP_GETARG && GET_SLOTNO(pc) == 0 &&
2270                    JSOp(pc[JSOP_GETARG_LENGTH]) == JSOP_GETELEM &&
2271                    JSOp(pc[JSOP_GETARG_LENGTH + JSOP_GETELEM_LENGTH]) == JSOP_RETURN) {
2272                    Class *clasp = table.toObject().getClass();
2273                    if (clasp->isNative() &&
2274                        !clasp->ops.lookupProperty &&
2275                        !clasp->ops.getProperty) {
2276                        rdata.elembase = &table.toObject();
2277                    }
2278                }
2279            }
2280        }
2281    } else {
2282        rdata.lambda = NULL;
2283        rdata.elembase = NULL;
2284        rdata.repstr = ArgToRootedString(cx, args, 1);
2285        if (!rdata.repstr)
2286            return false;
2287
2288        /* We're about to store pointers into the middle of our string. */
2289        JSFixedString *fixed = rdata.repstr->ensureFixed(cx);
2290        if (!fixed)
2291            return false;
2292        rdata.dollarEnd = fixed->chars() + fixed->length();
2293        rdata.dollar = js_strchr_limit(fixed->chars(), '$', rdata.dollarEnd);
2294    }
2295
2296    /*
2297     * Unlike its |String.prototype| brethren, |replace| doesn't convert
2298     * its input to a regular expression. (Even if it contains metachars.)
2299     *
2300     * However, if the user invokes our (non-standard) |flags| argument
2301     * extension then we revert to creating a regular expression. Note that
2302     * this is observable behavior through the side-effect mutation of the
2303     * |RegExp| statics.
2304     */
2305
2306    const FlatMatch *fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
2307    if (!fm) {
2308        if (cx->isExceptionPending())  /* oom in RopeMatch in tryFlatMatch */
2309            return false;
2310        return str_replace_regexp(cx, args, rdata);
2311    }
2312
2313    if (fm->match() < 0) {
2314        args.rval() = StringValue(rdata.str);
2315        return true;
2316    }
2317
2318    if (rdata.lambda)
2319        return str_replace_flat_lambda(cx, args, rdata, *fm);
2320
2321    /*
2322     * Note: we could optimize the text.length == pattern.length case if we wanted,
2323     * even in the presence of dollar metachars.
2324     */
2325    if (rdata.dollar)
2326        return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollar, *fm, &args);
2327
2328    return BuildFlatReplacement(cx, rdata.str, rdata.repstr, *fm, &args);
2329}
2330
2331class SplitMatchResult {
2332    size_t endIndex_;
2333    size_t length_;
2334
2335  public:
2336    void setFailure() {
2337        JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
2338        endIndex_ = SIZE_MAX;
2339    }
2340    bool isFailure() const {
2341        return (endIndex_ == SIZE_MAX);
2342    }
2343    size_t endIndex() const {
2344        JS_ASSERT(!isFailure());
2345        return endIndex_;
2346    }
2347    size_t length() const {
2348        JS_ASSERT(!isFailure());
2349        return length_;
2350    }
2351    void setResult(size_t length, size_t endIndex) {
2352        length_ = length;
2353        endIndex_ = endIndex;
2354    }
2355};
2356
2357template<class Matcher>
2358static JSObject *
2359SplitHelper(JSContext *cx, JSLinearString *str, uint32_t limit, Matcher splitMatch, TypeObject *type)
2360{
2361    size_t strLength = str->length();
2362    SplitMatchResult result;
2363
2364    /* Step 11. */
2365    if (strLength == 0) {
2366        if (!splitMatch(cx, str, 0, &result))
2367            return NULL;
2368
2369        /*
2370         * NB: Unlike in the non-empty string case, it's perfectly fine
2371         *     (indeed the spec requires it) if we match at the end of the
2372         *     string.  Thus these cases should hold:
2373         *
2374         *   var a = "".split("");
2375         *   assertEq(a.length, 0);
2376         *   var b = "".split(/.?/);
2377         *   assertEq(b.length, 0);
2378         */
2379        if (!result.isFailure())
2380            return NewDenseEmptyArray(cx);
2381
2382        Value v = StringValue(str);
2383        return NewDenseCopiedArray(cx, 1, &v);
2384    }
2385
2386    /* Step 12. */
2387    size_t lastEndIndex = 0;
2388    size_t index = 0;
2389
2390    /* Step 13. */
2391    AutoValueVector splits(cx);
2392
2393    while (index < strLength) {
2394        /* Step 13(a). */
2395        if (!splitMatch(cx, str, index, &result))
2396            return NULL;
2397
2398        /*
2399         * Step 13(b).
2400         *
2401         * Our match algorithm differs from the spec in that it returns the
2402         * next index at which a match happens.  If no match happens we're
2403         * done.
2404         *
2405         * But what if the match is at the end of the string (and the string is
2406         * not empty)?  Per 13(c)(ii) this shouldn't be a match, so we have to
2407         * specially exclude it.  Thus this case should hold:
2408         *
2409         *   var a = "abc".split(/\b/);
2410         *   assertEq(a.length, 1);
2411         *   assertEq(a[0], "abc");
2412         */
2413        if (result.isFailure())
2414            break;
2415
2416        /* Step 13(c)(i). */
2417        size_t sepLength = result.length();
2418        size_t endIndex = result.endIndex();
2419        if (sepLength == 0 && endIndex == strLength)
2420            break;
2421
2422        /* Step 13(c)(ii). */
2423        if (endIndex == lastEndIndex) {
2424            index++;
2425            continue;
2426        }
2427
2428        /* Step 13(c)(iii). */
2429        JS_ASSERT(lastEndIndex < endIndex);
2430        JS_ASSERT(sepLength <= strLength);
2431        JS_ASSERT(lastEndIndex + sepLength <= endIndex);
2432
2433        /* Steps 13(c)(iii)(1-3). */
2434        size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
2435        JSString *sub = js_NewDependentString(cx, str, lastEndIndex, subLength);
2436        if (!sub || !splits.append(StringValue(sub)))
2437            return NULL;
2438
2439        /* Step 13(c)(iii)(4). */
2440        if (splits.length() == limit)
2441            return NewDenseCopiedArray(cx, splits.length(), splits.begin());
2442
2443        /* Step 13(c)(iii)(5). */
2444        lastEndIndex = endIndex;
2445
2446        /* Step 13(c)(iii)(6-7). */
2447        if (Matcher::returnsCaptures) {
2448            RegExpStatics *res = cx->regExpStatics();
2449            for (size_t i = 0; i < res->parenCount(); i++) {
2450                /* Steps 13(c)(iii)(7)(a-c). */
2451                if (res->pairIsPresent(i + 1)) {
2452                    JSSubString parsub;
2453                    res->getParen(i + 1, &parsub);
2454                    sub = js_NewStringCopyN(cx, parsub.chars, parsub.length);
2455                    if (!sub || !splits.append(StringValue(sub)))
2456                        return NULL;
2457                } else {
2458                    /* Only string entries have been accounted for so far. */
2459                    AddTypeProperty(cx, type, NULL, UndefinedValue());
2460                    if (!splits.append(UndefinedValue()))
2461                        return NULL;
2462                }
2463
2464                /* Step 13(c)(iii)(7)(d). */
2465                if (splits.length() == limit)
2466                    return NewDenseCopiedArray(cx, splits.length(), splits.begin());
2467            }
2468        }
2469
2470        /* Step 13(c)(iii)(8). */
2471        index = lastEndIndex;
2472    }
2473
2474    /* Steps 14-15. */
2475    JSString *sub = js_NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
2476    if (!sub || !splits.append(StringValue(sub)))
2477        return NULL;
2478
2479    /* Step 16. */
2480    return NewDenseCopiedArray(cx, splits.length(), splits.begin());
2481}
2482
2483/*
2484 * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
2485 * paths for regular expression and string separators.
2486 *
2487 * The algorithm differs from the spec in that the we return the next index at
2488 * which a match happens.
2489 */
2490class SplitRegExpMatcher
2491{
2492    RegExpShared &re;
2493    RegExpStatics *res;
2494
2495  public:
2496    SplitRegExpMatcher(RegExpShared &re, RegExpStatics *res) : re(re), res(res) {}
2497
2498    static const bool returnsCaptures = true;
2499
2500    bool operator()(JSContext *cx, JSLinearString *str, size_t index, SplitMatchResult *result)
2501    {
2502        Value rval = UndefinedValue();
2503        const jschar *chars = str->chars();
2504        size_t length = str->length();
2505        if (!ExecuteRegExp(cx, res, re, str, chars, length, &index, RegExpTest, &rval))
2506            return false;
2507        if (!rval.isTrue()) {
2508            result->setFailure();
2509            return true;
2510        }
2511        JSSubString sep;
2512        res->getLastMatch(&sep);
2513
2514        result->setResult(sep.length, index);
2515        return true;
2516    }
2517};
2518
2519class SplitStringMatcher
2520{
2521    const jschar *sepChars;
2522    size_t sepLength;
2523
2524  public:
2525    SplitStringMatcher(JSLinearString *sep) {
2526        sepChars = sep->chars();
2527        sepLength = sep->length();
2528    }
2529
2530    static const bool returnsCaptures = false;
2531
2532    bool operator()(JSContext *cx, JSLinearString *str, size_t index, SplitMatchResult *res)
2533    {
2534        JS_ASSERT(index == 0 || index < str->length());
2535        const jschar *chars = str->chars();
2536        jsint match = StringMatch(chars + index, str->length() - index, sepChars, sepLength);
2537        if (match == -1)
2538            res->setFailure();
2539        else
2540            res->setResult(sepLength, index + match + sepLength);
2541        return true;
2542    }
2543};
2544
2545/* ES5 15.5.4.14 */
2546JSBool
2547js::str_split(JSContext *cx, uintN argc, Value *vp)
2548{
2549    CallArgs args = CallArgsFromVp(argc, vp);
2550
2551    /* Steps 1-2. */
2552    JSString *str = ThisToStringForStringProto(cx, args);
2553    if (!str)
2554        return false;
2555
2556    TypeObject *type = GetTypeCallerInitObject(cx, JSProto_Array);
2557    if (!type)
2558        return false;
2559    AddTypeProperty(cx, type, NULL, Type::StringType());
2560
2561    /* Step 5: Use the second argument as the split limit, if given. */
2562    uint32_t limit;
2563    if (args.length() > 1 && !args[1].isUndefined()) {
2564        jsdouble d;
2565        if (!ToNumber(cx, args[1], &d))
2566            return false;
2567        limit = js_DoubleToECMAUint32(d);
2568    } else {
2569        limit = UINT32_MAX;
2570    }
2571
2572    /* Step 8. */
2573    RegExpShared::Guard re;
2574    JSLinearString *sepstr = NULL;
2575    bool sepUndefined = (args.length() == 0 || args[0].isUndefined());
2576    if (!sepUndefined) {
2577        if (IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
2578            RegExpShared *shared = RegExpToShared(cx, args[0].toObject());
2579            if (!shared)
2580                return false;
2581            re.init(*shared);
2582        } else {
2583            sepstr = ArgToRootedString(cx, args, 0);
2584            if (!sepstr)
2585                return false;
2586        }
2587    }
2588
2589    /* Step 9. */
2590    if (limit == 0) {
2591        JSObject *aobj = NewDenseEmptyArray(cx);
2592        if (!aobj)
2593            return false;
2594        aobj->setType(type);
2595        args.rval() = ObjectValue(*aobj);
2596        return true;
2597    }
2598
2599    /* Step 10. */
2600    if (sepUndefined) {
2601        Value v = StringValue(str);
2602        JSObject *aobj = NewDenseCopiedArray(cx, 1, &v);
2603        if (!aobj)
2604            return false;
2605        aobj->setType(type);
2606        args.rval() = ObjectValue(*aobj);
2607        return true;
2608    }
2609    JSLinearString *strlin = str->ensureLinear(cx);
2610    if (!strlin)
2611        return false;
2612
2613    /* Steps 11-15. */
2614    JSObject *aobj;
2615    if (!re.initialized())
2616        aobj = SplitHelper(cx, strlin, limit, SplitStringMatcher(sepstr), type);
2617    else
2618        aobj = SplitHelper(cx, strlin, limit, SplitRegExpMatcher(*re, cx->regExpStatics()), type);
2619    if (!aobj)
2620        return false;
2621
2622    /* Step 16. */
2623    aobj->setType(type);
2624    args.rval() = ObjectValue(*aobj);
2625    return true;
2626}
2627
2628#if JS_HAS_PERL_SUBSTR
2629static JSBool
2630str_substr(JSContext *cx, uintN argc, Value *vp)
2631{
2632    CallArgs args = CallArgsFromVp(argc, vp);
2633    JSString *str = ThisToStringForStringProto(cx, args);
2634    if (!str)
2635        return false;
2636
2637    int32_t length, len, begin;
2638    if (args.length() > 0) {
2639        length = int32_t(str->length());
2640        if (!ValueToIntegerRange(cx, args[0], &begin))
2641            return false;
2642
2643        if (begin >= length) {
2644            str = cx->runtime->emptyString;
2645            goto out;
2646        }
2647        if (begin < 0) {
2648            begin += length; /* length + INT_MIN will always be less than 0 */
2649            if (begin < 0)
2650                begin = 0;
2651        }
2652
2653        if (args.length() == 1 || args[1].isUndefined()) {
2654            len = length - begin;
2655        } else {
2656            if (!ValueToIntegerRange(cx, args[1], &len))
2657                return false;
2658
2659            if (len <= 0) {
2660                str = cx->runtime->emptyString;
2661                goto out;
2662            }
2663
2664            if (uint32_t(length) < uint32_t(begin + len))
2665                len = length - begin;
2666        }
2667
2668        str = js_NewDependentString(cx, str, size_t(begin), size_t(len));
2669        if (!str)
2670            return false;
2671    }
2672
2673out:
2674    args.rval() = StringValue(str);
2675    return true;
2676}
2677#endif /* JS_HAS_PERL_SUBSTR */
2678
2679/*
2680 * Python-esque sequence operations.
2681 */
2682static JSBool
2683str_concat(JSContext *cx, uintN argc, Value *vp)
2684{
2685    CallArgs args = CallArgsFromVp(argc, vp);
2686    JSString *str = ThisToStringForStringProto(cx, args);
2687    if (!str)
2688        return false;
2689
2690    for (uintN i = 0; i < args.length(); i++) {
2691        JSString *argStr = ToString(cx, args[i]);
2692        if (!argStr)
2693            return false;
2694
2695        str = js_ConcatStrings(cx, str, argStr);
2696        if (!str)
2697            return false;
2698    }
2699
2700    args.rval() = StringValue(str);
2701    return true;
2702}
2703
2704static JSBool
2705str_slice(JSContext *cx, uintN argc, Value *vp)
2706{
2707    CallArgs args = CallArgsFromVp(argc, vp);
2708
2709    if (args.length() == 1 && args.thisv().isString() && args[0].isInt32()) {
2710        size_t begin, end, length;
2711
2712        JSString *str = args.thisv().toString();
2713        begin = args[0].toInt32();
2714        end = str->length();
2715        if (begin <= end) {
2716            length = end - begin;
2717            if (length == 0) {
2718                str = cx->runtime->emptyString;
2719            } else {
2720                str = (length == 1)
2721                      ? cx->runtime->staticStrings.getUnitStringForElement(cx, str, begin)
2722                      : js_NewDependentString(cx, str, begin, length);
2723                if (!str)
2724                    return false;
2725            }
2726            args.rval() = StringValue(str);
2727            return true;
2728        }
2729    }
2730
2731    JSString *str = ThisToStringForStringProto(cx, args);
2732    if (!str)
2733        return false;
2734
2735    if (args.length() != 0) {
2736        double begin, end, length;
2737
2738        if (!ToInteger(cx, args[0], &begin))
2739            return false;
2740        length = str->length();
2741        if (begin < 0) {
2742            begin += length;
2743            if (begin < 0)
2744                begin = 0;
2745        } else if (begin > length) {
2746            begin = length;
2747        }
2748
2749        if (args.length() == 1 || args[1].isUndefined()) {
2750            end = length;
2751        } else {
2752            if (!ToInteger(cx, args[1], &end))
2753                return false;
2754            if (end < 0) {
2755                end += length;
2756                if (end < 0)
2757                    end = 0;
2758            } else if (end > length) {
2759                end = length;
2760            }
2761            if (end < begin)
2762                end = begin;
2763        }
2764
2765        str = js_NewDependentString(cx, str,
2766                                    (size_t)begin,
2767                                    (size_t)(end - begin));
2768        if (!str)
2769            return false;
2770    }
2771    args.rval() = StringValue(str);
2772    return true;
2773}
2774
2775#if JS_HAS_STR_HTML_HELPERS
2776/*
2777 * HTML composition aids.
2778 */
2779static bool
2780tagify(JSContext *cx, const char *begin, JSLinearString *param, const char *end,
2781       CallReceiver call)
2782{
2783    JSString *thisstr = ThisToStringForStringProto(cx, call);
2784    if (!thisstr)
2785        return false;
2786
2787    JSLinearString *str = thisstr->ensureLinear(cx);
2788    if (!str)
2789        return false;
2790
2791    if (!end)
2792        end = begin;
2793
2794    size_t beglen = strlen(begin);
2795    size_t taglen = 1 + beglen + 1;                     /* '<begin' + '>' */
2796    size_t parlen = 0; /* Avoid warning. */
2797    if (param) {
2798        parlen = param->length();
2799        taglen += 2 + parlen + 1;                       /* '="param"' */
2800    }
2801    size_t endlen = strlen(end);
2802    taglen += str->length() + 2 + endlen + 1;           /* 'str</end>' */
2803
2804    if (taglen >= ~(size_t)0 / sizeof(jschar)) {
2805        js_ReportAllocationOverflow(cx);
2806        return false;
2807    }
2808
2809    jschar *tagbuf = (jschar *) cx->malloc_((taglen + 1) * sizeof(jschar));
2810    if (!tagbuf)
2811        return false;
2812
2813    size_t j = 0;
2814    tagbuf[j++] = '<';
2815    for (size_t i = 0; i < beglen; i++)
2816        tagbuf[j++] = (jschar)begin[i];
2817    if (param) {
2818        tagbuf[j++] = '=';
2819        tagbuf[j++] = '"';
2820        js_strncpy(&tagbuf[j], param->chars(), parlen);
2821        j += parlen;
2822        tagbuf[j++] = '"';
2823    }
2824    tagbuf[j++] = '>';
2825
2826    js_strncpy(&tagbuf[j], str->chars(), str->length());
2827    j += str->length();
2828    tagbuf[j++] = '<';
2829    tagbuf[j++] = '/';
2830    for (size_t i = 0; i < endlen; i++)
2831        tagbuf[j++] = (jschar)end[i];
2832    tagbuf[j++] = '>';
2833    JS_ASSERT(j == taglen);
2834    tagbuf[j] = 0;
2835
2836    JSString *retstr = js_NewString(cx, tagbuf, taglen);
2837    if (!retstr) {
2838        Foreground::free_((char *)tagbuf);
2839        return false;
2840    }
2841    call.rval() = StringValue(retstr);
2842    return true;
2843}
2844
2845static JSBool
2846tagify_value(JSContext *cx, CallArgs args, const char *begin, const char *end)
2847{
2848    JSLinearString *param = ArgToRootedString(cx, args, 0);
2849    if (!param)
2850        return false;
2851
2852    return tagify(cx, begin, param, end, args);
2853}
2854
2855static JSBool
2856str_bold(JSContext *cx, uintN argc, Value *vp)
2857{
2858    return tagify(cx, "b", NULL, NULL, CallReceiverFromVp(vp));
2859}
2860
2861static JSBool
2862str_italics(JSContext *cx, uintN argc, Value *vp)
2863{
2864    return tagify(cx, "i", NULL, NULL, CallReceiverFromVp(vp));
2865}
2866
2867static JSBool
2868str_fixed(JSContext *cx, uintN argc, Value *vp)
2869{
2870    return tagify(cx, "tt", NULL, NULL, CallReceiverFromVp(vp));
2871}
2872
2873static JSBool
2874str_fontsize(JSContext *cx, uintN argc, Value *vp)
2875{
2876    return tagify_value(cx, CallArgsFromVp(argc, vp), "font size", "font");
2877}
2878
2879static JSBool
2880str_fontcolor(JSContext *cx, uintN argc, Value *vp)
2881{
2882    return tagify_value(cx, CallArgsFromVp(argc, vp), "font color", "font");
2883}
2884
2885static JSBool
2886str_link(JSContext *cx, uintN argc, Value *vp)
2887{
2888    return tagify_value(cx, CallArgsFromVp(argc, vp), "a href", "a");
2889}
2890
2891static JSBool
2892str_anchor(JSContext *cx, uintN argc, Value *vp)
2893{
2894    return tagify_value(cx, CallArgsFromVp(argc, vp), "a name", "a");
2895}
2896
2897static JSBool
2898str_strike(JSContext *cx, uintN argc, Value *vp)
2899{
2900    return tagify(cx, "strike", NULL, NULL, CallReceiverFromVp(vp));
2901}
2902
2903static JSBool
2904str_small(JSContext *cx, uintN argc, Value *vp)
2905{
2906    return tagify(cx, "small", NULL, NULL, CallReceiverFromVp(vp));
2907}
2908
2909static JSBool
2910str_big(JSContext *cx, uintN argc, Value *vp)
2911{
2912    return tagify(cx, "big", NULL, NULL, CallReceiverFromVp(vp));
2913}
2914
2915static JSBool
2916str_blink(JSContext *cx, uintN argc, Value *vp)
2917{
2918    return tagify(cx, "blink", NULL, NULL, CallReceiverFromVp(vp));
2919}
2920
2921static JSBool
2922str_sup(JSContext *cx, uintN argc, Value *vp)
2923{
2924    return tagify(cx, "sup", NULL, NULL, CallReceiverFromVp(vp));
2925}
2926
2927static JSBool
2928str_sub(JSContext *cx, uintN argc, Value *vp)
2929{
2930    return tagify(cx, "sub", NULL, NULL, CallReceiverFromVp(vp));
2931}
2932#endif /* JS_HAS_STR_HTML_HELPERS */
2933
2934static JSFunctionSpec string_methods[] = {
2935#if JS_HAS_TOSOURCE
2936    JS_FN("quote",             str_quote,             0,JSFUN_GENERIC_NATIVE),
2937    JS_FN(js_toSource_str,     str_toSource,          0,0),
2938#endif
2939
2940    /* Java-like methods. */
2941    JS_FN(js_toString_str,     js_str_toString,       0,0),
2942    JS_FN(js_valueOf_str,      js_str_toString,       0,0),
2943    JS_FN("substring",         str_substring,         2,JSFUN_GENERIC_NATIVE),
2944    JS_FN("toLowerCase",       str_toLowerCase,       0,JSFUN_GENERIC_NATIVE),
2945    JS_FN("toUpperCase",       str_toUpperCase,       0,JSFUN_GENERIC_NATIVE),
2946    JS_FN("charAt",            js_str_charAt,         1,JSFUN_GENERIC_NATIVE),
2947    JS_FN("charCodeAt",        js_str_charCodeAt,     1,JSFUN_GENERIC_NATIVE),
2948    JS_FN("indexOf",           str_indexOf,           1,JSFUN_GENERIC_NATIVE),
2949    JS_FN("lastIndexOf",       str_lastIndexOf,       1,JSFUN_GENERIC_NATIVE),
2950    JS_FN("trim",              str_trim,              0,JSFUN_GENERIC_NATIVE),
2951    JS_FN("trimLeft",          str_trimLeft,          0,JSFUN_GENERIC_NATIVE),
2952    JS_FN("trimRight",         str_trimRight,         0,JSFUN_GENERIC_NATIVE),
2953    JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
2954    JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
2955    JS_FN("localeCompare",     str_localeCompare,     1,JSFUN_GENERIC_NATIVE),
2956
2957    /* Perl-ish methods (search is actually Python-esque). */
2958    JS_FN("match",             str_match,             1,JSFUN_GENERIC_NATIVE),
2959    JS_FN("search",            str_search,            1,JSFUN_GENERIC_NATIVE),
2960    JS_FN("replace",           str_replace,           2,JSFUN_GENERIC_NATIVE),
2961    JS_FN("split",             str_split,             2,JSFUN_GENERIC_NATIVE),
2962#if JS_HAS_PERL_SUBSTR
2963    JS_FN("substr",            str_substr,            2,JSFUN_GENERIC_NATIVE),
2964#endif
2965
2966    /* Python-esque sequence methods. */
2967    JS_FN("concat",            str_concat,            1,JSFUN_GENERIC_NATIVE),
2968    JS_FN("slice",             str_slice,             2,JSFUN_GENERIC_NATIVE),
2969
2970    /* HTML string methods. */
2971#if JS_HAS_STR_HTML_HELPERS
2972    JS_FN("bold",              str_bold,              0,0),
2973    JS_FN("italics",           str_italics,           0,0),
2974    JS_FN("fixed",             str_fixed,             0,0),
2975    JS_FN("fontsize",          str_fontsize,          1,0),
2976    JS_FN("fontcolor",         str_fontcolor,         1,0),
2977    JS_FN("link",              str_link,              1,0),
2978    JS_FN("anchor",            str_anchor,            1,0),
2979    JS_FN("strike",            str_strike,            0,0),
2980    JS_FN("small",             str_small,             0,0),
2981    JS_FN("big",               str_big,               0,0),
2982    JS_FN("blink",             str_blink,             0,0),
2983    JS_FN("sup",               str_sup,               0,0),
2984    JS_FN("sub",               str_sub,               0,0),
2985#endif
2986
2987    JS_FS_END
2988};
2989
2990JSBool
2991js_String(JSContext *cx, uintN argc, Value *vp)
2992{
2993    CallArgs args = CallArgsFromVp(argc, vp);
2994
2995    JSString *str;
2996    if (args.length() > 0) {
2997        str = ToString(cx, args[0]);
2998        if (!str)
2999            return false;
3000    } else {
3001        str = cx->runtime->emptyString;
3002    }
3003
3004    if (IsConstructing(args)) {
3005        StringObject *strobj = StringObject::create(cx, str);
3006        if (!strobj)
3007            return false;
3008        args.rval() = ObjectValue(*strobj);
3009        return true;
3010    }
3011
3012    args.rval() = StringValue(str);
3013    return true;
3014}
3015
3016JSBool
3017js::str_fromCharCode(JSContext *cx, uintN argc, Value *vp)
3018{
3019    CallArgs args = CallArgsFromVp(argc, vp);
3020
3021    JS_ASSERT(args.length() <= StackSpace::ARGS_LENGTH_MAX);
3022    if (args.length() == 1) {
3023        uint16_t code;
3024        if (!ValueToUint16(cx, args[0], &code))
3025            return JS_FALSE;
3026        if (StaticStrings::hasUnit(code)) {
3027            args.rval() = StringValue(cx->runtime->staticStrings.getUnit(code));
3028            return JS_TRUE;
3029        }
3030        args[0].setInt32(code);
3031    }
3032    jschar *chars = (jschar *) cx->malloc_((args.length() + 1) * sizeof(jschar));
3033    if (!chars)
3034        return JS_FALSE;
3035    for (uintN i = 0; i < args.length(); i++) {
3036        uint16_t code;
3037        if (!ValueToUint16(cx, args[i], &code)) {
3038            cx->free_(chars);
3039            return JS_FALSE;
3040        }
3041        chars[i] = (jschar)code;
3042    }
3043    chars[args.length()] = 0;
3044    JSString *str = js_NewString(cx, chars, args.length());
3045    if (!str) {
3046        cx->free_(chars);
3047        return JS_FALSE;
3048    }
3049
3050    args.rval() = StringValue(str);
3051    return JS_TRUE;
3052}
3053
3054static JSFunctionSpec string_static_methods[] = {
3055    JS_FN("fromCharCode", js::str_fromCharCode, 1, 0),
3056    JS_FS_END
3057};
3058
3059Shape *
3060StringObject::assignInitialShape(JSContext *cx)
3061{
3062    JS_ASSERT(nativeEmpty());
3063
3064    return addDataProperty(cx, ATOM_TO_JSID(cx->runtime->atomState.lengthAtom),
3065                           LENGTH_SLOT, JSPROP_PERMANENT | JSPROP_READONLY);
3066}
3067
3068JSObject *
3069js_InitStringClass(JSContext *cx, JSObject *obj)
3070{
3071    JS_ASSERT(obj->isNative());
3072
3073    GlobalObject *global = &obj->asGlobal();
3074
3075    JSObject *proto = global->createBlankPrototype(cx, &StringClass);
3076    if (!proto || !proto->asString().init(cx, cx->runtime->emptyString))
3077        return NULL;
3078
3079    /* Now create the String function. */
3080    JSFunction *ctor = global->createConstructor(cx, js_String, &StringClass,
3081                                                 CLASS_ATOM(cx, String), 1);
3082    if (!ctor)
3083        return NULL;
3084
3085    if (!LinkConstructorAndPrototype(cx, ctor, proto))
3086        return NULL;
3087
3088    if (!DefinePropertiesAndBrand(cx, proto, NULL, string_methods) ||
3089        !DefinePropertiesAndBrand(cx, ctor, NULL, string_static_methods))
3090    {
3091        return NULL;
3092    }
3093
3094    /* Capture normal data properties pregenerated for String objects. */
3095    TypeObject *type = proto->getNewType(cx);
3096    if (!type)
3097        return NULL;
3098    AddTypeProperty(cx, type, "length", Type::Int32Type());
3099
3100    if (!DefineConstructorAndPrototype(cx, global, JSProto_String, ctor, proto))
3101        return NULL;
3102
3103    /*
3104     * Define escape/unescape, the URI encode/decode functions, and maybe
3105     * uneval on the global object.
3106     */
3107    if (!JS_DefineFunctions(cx, global, string_functions))
3108        return NULL;
3109
3110    return proto;
3111}
3112
3113JSFixedString *
3114js_NewString(JSContext *cx, jschar *chars, size_t length)
3115{
3116    JSFixedString *s = JSFixedString::new_(cx, chars, length);
3117    if (s)
3118        Probes::createString(cx, s, length);
3119    return s;
3120}
3121
3122static JS_ALWAYS_INLINE JSFixedString *
3123NewShortString(JSContext *cx, const jschar *chars, size_t length)
3124{
3125    /*
3126     * Don't bother trying to find a static atom; measurement shows that not
3127     * many get here (for one, Atomize is catching them).
3128     */
3129
3130    JS_ASSERT(JSShortString::lengthFits(length));
3131    JSInlineString *str = JSInlineString::lengthFits(length)
3132                          ? JSInlineString::new_(cx)
3133                          : JSShortString::new_(cx);
3134    if (!str)
3135        return NULL;
3136
3137    jschar *storage = str->init(length);
3138    PodCopy(storage, chars, length);
3139    storage[length] = 0;
3140    Probes::createString(cx, str, length);
3141    return str;
3142}
3143
3144static JSInlineString *
3145NewShortString(JSContext *cx, const char *chars, size_t length)
3146{
3147    JS_ASSERT(JSShortString::lengthFits(length));
3148    JSInlineString *str = JSInlineString::lengthFits(length)
3149                          ? JSInlineString::new_(cx)
3150                          : JSShortString::new_(cx);
3151    if (!str)
3152        return NULL;
3153
3154    jschar *storage = str->init(length);
3155    if (js_CStringsAreUTF8) {
3156#ifdef DEBUG
3157        size_t oldLength = length;
3158#endif
3159        if (!InflateUTF8StringToBuffer(cx, chars, length, storage, &length))
3160            return NULL;
3161        JS_ASSERT(length <= oldLength);
3162        storage[length] = 0;
3163        str->resetLength(length);
3164    } else {
3165        size_t n = length;
3166        jschar *p = storage;
3167        while (n--)
3168            *p++ = (unsigned char)*chars++;
3169        *p = 0;
3170    }
3171    Probes::createString(cx, str, length);
3172    return str;
3173}
3174
3175jschar *
3176StringBuffer::extractWellSized()
3177{
3178    size_t capacity = cb.capacity();
3179    size_t length = cb.length();
3180
3181    jschar *buf = cb.extractRawBuffer();
3182    if (!buf)
3183        return NULL;
3184
3185    /* For medium/big buffers, avoid wasting more than 1/4 of the memory. */
3186    JS_ASSERT(capacity >= length);
3187    if (length > CharBuffer::sMaxInlineStorage && capacity - length > length / 4) {
3188        size_t bytes = sizeof(jschar) * (length + 1);
3189        JSContext *cx = context();
3190        jschar *tmp = (jschar *)cx->realloc_(buf, bytes);
3191        if (!tmp) {
3192            cx->free_(buf);
3193            return NULL;
3194        }
3195        buf = tmp;
3196    }
3197
3198    return buf;
3199}
3200
3201JSFixedString *
3202StringBuffer::finishString()
3203{
3204    JSContext *cx = context();
3205    if (cb.empty())
3206        return cx->runtime->atomState.emptyAtom;
3207
3208    size_t length = cb.length();
3209    if (!checkLength(length))
3210        return NULL;
3211
3212    JS_STATIC_ASSERT(JSShortString::MAX_SHORT_LENGTH < CharBuffer::InlineLength);
3213    if (JSShortString::lengthFits(length))
3214        return NewShortString(cx, cb.begin(), length);
3215
3216    if (!cb.append('\0'))
3217        return NULL;
3218
3219    jschar *buf = extractWellSized();
3220    if (!buf)
3221        return NULL;
3222
3223    JSFixedString *str = js_NewString(cx, buf, length);
3224    if (!str)
3225        cx->free_(buf);
3226    return str;
3227}
3228
3229JSAtom *
3230StringBuffer::finishAtom()
3231{
3232    JSContext *cx = context();
3233
3234    size_t length = cb.length();
3235    if (length == 0)
3236        return cx->runtime->atomState.emptyAtom;
3237
3238    JSAtom *atom = js_AtomizeChars(cx, cb.begin(), length);
3239    cb.clear();
3240    return atom;
3241}
3242
3243JSLinearString *
3244js_NewDependentString(JSContext *cx, JSString *baseArg, size_t start, size_t length)
3245{
3246    if (length == 0)
3247        return cx->runtime->emptyString;
3248
3249    JSLinearString *base = baseArg->ensureLinear(cx);
3250    if (!base)
3251        return NULL;
3252
3253    if (start == 0 && length == base->length())
3254        return base;
3255
3256    const jschar *chars = base->chars() + start;
3257
3258    if (JSLinearString *staticStr = cx->runtime->staticStrings.lookup(chars, length))
3259        return staticStr;
3260
3261    JSLinearString *s = JSDependentString::new_(cx, base, chars, length);
3262    Probes::createString(cx, s, length);
3263    return s;
3264}
3265
3266JSFixedString *
3267js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n)
3268{
3269    if (JSShortString::lengthFits(n))
3270        return NewShortString(cx, s, n);
3271
3272    jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
3273    if (!news)
3274        return NULL;
3275    js_strncpy(news, s, n);
3276    news[n] = 0;
3277    JSFixedString *str = js_NewString(cx, news, n);
3278    if (!str)
3279        cx->free_(news);
3280    return str;
3281}
3282
3283JSFixedString *
3284js_NewStringCopyN(JSContext *cx, const char *s, size_t n)
3285{
3286    if (JSShortString::lengthFits(n))
3287        return NewShortString(cx, s, n);
3288
3289    jschar *chars = InflateString(cx, s, &n);
3290    if (!chars)
3291        return NULL;
3292    JSFixedString *str = js_NewString(cx, chars, n);
3293    if (!str)
3294        cx->free_(chars);
3295    return str;
3296}
3297
3298JSFixedString *
3299js_NewStringCopyZ(JSContext *cx, const jschar *s)
3300{
3301    size_t n = js_strlen(s);
3302    if (JSShortString::lengthFits(n))
3303        return NewShortString(cx, s, n);
3304
3305    size_t m = (n + 1) * sizeof(jschar);
3306    jschar *news = (jschar *) cx->malloc_(m);
3307    if (!news)
3308        return NULL;
3309    js_memcpy(news, s, m);
3310    JSFixedString *str = js_NewString(cx, news, n);
3311    if (!str)
3312        cx->free_(news);
3313    return str;
3314}
3315
3316JSFixedString *
3317js_NewStringCopyZ(JSContext *cx, const char *s)
3318{
3319    return js_NewStringCopyN(cx, s, strlen(s));
3320}
3321
3322const char *
3323js_ValueToPrintable(JSContext *cx, const Value &v, JSAutoByteString *bytes, bool asSource)
3324{
3325    JSString *str;
3326
3327    str = (asSource ? js_ValueToSource : ToString)(cx, v);
3328    if (!str)
3329        return NULL;
3330    str = js_QuoteString(cx, str, 0);
3331    if (!str)
3332        return NULL;
3333    return bytes->encode(cx, str);
3334}
3335
3336JSString *
3337js::ToStringSlow(JSContext *cx, const Value &arg)
3338{
3339    /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
3340    JS_ASSERT(!arg.isString());
3341
3342    Value v = arg;
3343    if (!ToPrimitive(cx, JSTYPE_STRING, &v))
3344        return NULL;
3345
3346    JSString *str;
3347    if (v.isString()) {
3348        str = v.toString();
3349    } else if (v.isInt32()) {
3350        str = js_IntToString(cx, v.toInt32());
3351    } else if (v.isDouble()) {
3352        str = js_NumberToString(cx, v.toDouble());
3353    } else if (v.isBoolean()) {
3354        str = js_BooleanToString(cx, v.toBoolean());
3355    } else if (v.isNull()) {
3356        str = cx->runtime->atomState.nullAtom;
3357    } else {
3358        str = cx->runtime->atomState.typeAtoms[JSTYPE_VOID];
3359    }
3360    return str;
3361}
3362
3363/* This function implements E-262-3 section 9.8, toString. */
3364bool
3365js::ValueToStringBufferSlow(JSContext *cx, const Value &arg, StringBuffer &sb)
3366{
3367    Value v = arg;
3368    if (!ToPrimitive(cx, JSTYPE_STRING, &v))
3369        return false;
3370
3371    if (v.isString())
3372        return sb.append(v.toString());
3373    if (v.isNumber())
3374        return NumberValueToStringBuffer(cx, v, sb);
3375    if (v.isBoolean())
3376        return BooleanToStringBuffer(cx, v.toBoolean(), sb);
3377    if (v.isNull())
3378        return sb.append(cx->runtime->atomState.nullAtom);
3379    JS_ASSERT(v.isUndefined());
3380    return sb.append(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
3381}
3382
3383JS_FRIEND_API(JSString *)
3384js_ValueToSource(JSContext *cx, const Value &v)
3385{
3386    JS_CHECK_RECURSION(cx, return NULL);
3387
3388    if (v.isUndefined())
3389        return cx->runtime->atomState.void0Atom;
3390    if (v.isString())
3391        return js_QuoteString(cx, v.toString(), '"');
3392    if (v.isPrimitive()) {
3393        /* Special case to preserve negative zero, _contra_ toString. */
3394        if (v.isDouble() && JSDOUBLE_IS_NEGZERO(v.toDouble())) {
3395            /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
3396            static const jschar js_negzero_ucNstr[] = {'-', '0'};
3397
3398            return js_NewStringCopyN(cx, js_negzero_ucNstr, 2);
3399        }
3400        return ToString(cx, v);
3401    }
3402
3403    Value rval = NullValue();
3404    Value fval;
3405    jsid id = ATOM_TO_JSID(cx->runtime->atomState.toSourceAtom);
3406    if (!js_GetMethod(cx, &v.toObject(), id, JSGET_NO_METHOD_BARRIER, &fval))
3407        return NULL;
3408    if (js_IsCallable(fval)) {
3409        if (!Invoke(cx, v, fval, 0, NULL, &rval))
3410            return NULL;
3411    }
3412
3413    return ToString(cx, rval);
3414}
3415
3416namespace js {
3417
3418bool
3419EqualStrings(JSContext *cx, JSString *str1, JSString *str2, bool *result)
3420{
3421    if (str1 == str2) {
3422        *result = true;
3423        return true;
3424    }
3425
3426    size_t length1 = str1->length();
3427    if (length1 != str2->length()) {
3428        *result = false;
3429        return true;
3430    }
3431
3432    JSLinearString *linear1 = str1->ensureLinear(cx);
3433    if (!linear1)
3434        return false;
3435    JSLinearString *linear2 = str2->ensureLinear(cx);
3436    if (!linear2)
3437        return false;
3438
3439    *result = PodEqual(linear1->chars(), linear2->chars(), length1);
3440    return true;
3441}
3442
3443bool
3444EqualStrings(JSLinearString *str1, JSLinearString *str2)
3445{
3446    if (str1 == str2)
3447        return true;
3448
3449    size_t length1 = str1->length();
3450    if (length1 != str2->length())
3451        return false;
3452
3453    return PodEqual(str1->chars(), str2->chars(), length1);
3454}
3455
3456}  /* namespace js */
3457
3458namespace js {
3459
3460static bool
3461CompareStringsImpl(JSContext *cx, JSString *str1, JSString *str2, int32_t *result)
3462{
3463    JS_ASSERT(str1);
3464    JS_ASSERT(str2);
3465
3466    if (str1 == str2) {
3467        *result = 0;
3468        return true;
3469    }
3470
3471    const jschar *s1 = str1->getChars(cx);
3472    if (!s1)
3473        return false;
3474
3475    const jschar *s2 = str2->getChars(cx);
3476    if (!s2)
3477        return false;
3478
3479    return CompareChars(s1, str1->length(), s2, str2->length(), result);
3480}
3481
3482bool
3483CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32_t *result)
3484{
3485    return CompareStringsImpl(cx, str1, str2, result);
3486}
3487
3488}  /* namespace js */
3489
3490namespace js {
3491
3492bool
3493StringEqualsAscii(JSLinearString *str, const char *asciiBytes)
3494{
3495    size_t length = strlen(asciiBytes);
3496#ifdef DEBUG
3497    for (size_t i = 0; i != length; ++i)
3498        JS_ASSERT(unsigned(asciiBytes[i]) <= 127);
3499#endif
3500    if (length != str->length())
3501        return false;
3502    const jschar *chars = str->chars();
3503    for (size_t i = 0; i != length; ++i) {
3504        if (unsigned(asciiBytes[i]) != unsigned(chars[i]))
3505            return false;
3506    }
3507    return true;
3508}
3509
3510} /* namespacejs */
3511
3512size_t
3513js_strlen(const jschar *s)
3514{
3515    const jschar *t;
3516
3517    for (t = s; *t != 0; t++)
3518        continue;
3519    return (size_t)(t - s);
3520}
3521
3522jschar *
3523js_strchr(const jschar *s, jschar c)
3524{
3525    while (*s != 0) {
3526        if (*s == c)
3527            return (jschar *)s;
3528        s++;
3529    }
3530    return NULL;
3531}
3532
3533jschar *
3534js_strchr_limit(const jschar *s, jschar c, const jschar *limit)
3535{
3536    while (s < limit) {
3537        if (*s == c)
3538            return (jschar *)s;
3539        s++;
3540    }
3541    return NULL;
3542}
3543
3544namespace js {
3545
3546jschar *
3547InflateString(JSContext *cx, const char *bytes, size_t *lengthp, FlationCoding fc)
3548{
3549    size_t nchars;
3550    jschar *chars;
3551    size_t nbytes = *lengthp;
3552
3553    if (js_CStringsAreUTF8 || fc == CESU8Encoding) {
3554        if (!InflateUTF8StringToBuffer(cx, bytes, nbytes, NULL, &nchars, fc))
3555            goto bad;
3556        chars = (jschar *) cx->malloc_((nchars + 1) * sizeof (jschar));
3557        if (!chars)
3558            goto bad;
3559        JS_ALWAYS_TRUE(InflateUTF8StringToBuffer(cx, bytes, nbytes, chars, &nchars, fc));
3560    } else {
3561        nchars = nbytes;
3562        chars = (jschar *) cx->malloc_((nchars + 1) * sizeof(jschar));
3563        if (!chars)
3564            goto bad;
3565        for (size_t i = 0; i < nchars; i++)
3566            chars[i] = (unsigned char) bytes[i];
3567    }
3568    *lengthp = nchars;
3569    chars[nchars] = 0;
3570    return chars;
3571
3572  bad:
3573    /*
3574     * For compatibility with callers of JS_DecodeBytes we must zero lengthp
3575     * on errors.
3576     */
3577    *lengthp = 0;
3578    return NULL;
3579}
3580
3581/*
3582 * May be called with null cx.
3583 */
3584char *
3585DeflateString(JSContext *cx, const jschar *chars, size_t nchars)
3586{
3587    size_t nbytes, i;
3588    char *bytes;
3589
3590    if (js_CStringsAreUTF8) {
3591        nbytes = GetDeflatedStringLength(cx, chars, nchars);
3592        if (nbytes == (size_t) -1)
3593            return NULL;
3594        bytes = (char *) (cx ? cx->malloc_(nbytes + 1) : OffTheBooks::malloc_(nbytes + 1));
3595        if (!bytes)
3596            return NULL;
3597        JS_ALWAYS_TRUE(DeflateStringToBuffer(cx, chars, nchars, bytes, &nbytes));
3598    } else {
3599        nbytes = nchars;
3600        bytes = (char *) (cx ? cx->malloc_(nbytes + 1) : OffTheBooks::malloc_(nbytes + 1));
3601        if (!bytes)
3602            return NULL;
3603        for (i = 0; i < nbytes; i++)
3604            bytes[i] = (char) chars[i];
3605    }
3606    bytes[nbytes] = 0;
3607    return bytes;
3608}
3609
3610size_t
3611GetDeflatedStringLength(JSContext *cx, const jschar *chars, size_t nchars)
3612{
3613    if (!js_CStringsAreUTF8)
3614        return nchars;
3615
3616    return GetDeflatedUTF8StringLength(cx, chars, nchars);
3617}
3618
3619/*
3620 * May be called with null cx through public API, see below.
3621 */
3622size_t
3623GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
3624                                size_t nchars, FlationCoding fc)
3625{
3626    size_t nbytes;
3627    const jschar *end;
3628    uintN c, c2;
3629    char buffer[10];
3630    bool useCESU8 = fc == CESU8Encoding;
3631
3632    nbytes = nchars;
3633    for (end = chars + nchars; chars != end; chars++) {
3634        c = *chars;
3635        if (c < 0x80)
3636            continue;
3637        if (0xD800 <= c && c <= 0xDFFF && !useCESU8) {
3638            /* Surrogate pair. */
3639            chars++;
3640
3641            /* nbytes sets 1 length since this is surrogate pair. */
3642            nbytes--;
3643            if (c >= 0xDC00 || chars == end)
3644                goto bad_surrogate;
3645            c2 = *chars;
3646            if (c2 < 0xDC00 || c2 > 0xDFFF)
3647                goto bad_surrogate;
3648            c = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
3649        }
3650        c >>= 11;
3651        nbytes++;
3652        while (c) {
3653            c >>= 5;
3654            nbytes++;
3655        }
3656    }
3657    return nbytes;
3658
3659  bad_surrogate:
3660    if (cx) {
3661        JS_snprintf(buffer, 10, "0x%x", c);
3662        JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, js_GetErrorMessage,
3663                                     NULL, JSMSG_BAD_SURROGATE_CHAR, buffer);
3664    }
3665    return (size_t) -1;
3666}
3667
3668bool
3669DeflateStringToBuffer(JSContext *cx, const jschar *src, size_t srclen,
3670                          char *dst, size_t *dstlenp)
3671{
3672    size_t dstlen, i;
3673
3674    dstlen = *dstlenp;
3675    if (!js_CStringsAreUTF8) {
3676        if (srclen > dstlen) {
3677            for (i = 0; i < dstlen; i++)
3678                dst[i] = (char) src[i];
3679            if (cx) {
3680                JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3681                                     JSMSG_BUFFER_TOO_SMALL);
3682            }
3683            return JS_FALSE;
3684        }
3685        for (i = 0; i < srclen; i++)
3686            dst[i] = (char) src[i];
3687        *dstlenp = srclen;
3688        return JS_TRUE;
3689    }
3690
3691    return DeflateStringToUTF8Buffer(cx, src, srclen, dst, dstlenp);
3692}
3693
3694bool
3695DeflateStringToUTF8Buffer(JSContext *cx, const jschar *src, size_t srclen,
3696                              char *dst, size_t *dstlenp, FlationCoding fc)
3697{
3698    size_t i, utf8Len;
3699    jschar c, c2;
3700    uint32_t v;
3701    uint8_t utf8buf[6];
3702
3703    bool useCESU8 = fc == CESU8Encoding;
3704    size_t dstlen = *dstlenp;
3705    size_t origDstlen = dstlen;
3706
3707    while (srclen) {
3708        c = *src++;
3709        srclen--;
3710        if ((c >= 0xDC00) && (c <= 0xDFFF) && !useCESU8)
3711            goto badSurrogate;
3712        if (c < 0xD800 || c > 0xDBFF || useCESU8) {
3713            v = c;
3714        } else {
3715            if (srclen < 1)
3716                goto badSurrogate;
3717            c2 = *src;
3718            if ((c2 < 0xDC00) || (c2 > 0xDFFF))
3719                goto badSurrogate;
3720            src++;
3721            srclen--;
3722            v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
3723        }
3724        if (v < 0x0080) {
3725            /* no encoding necessary - performance hack */
3726            if (dstlen == 0)
3727                goto bufferTooSmall;
3728            *dst++ = (char) v;
3729            utf8Len = 1;
3730        } else {
3731            utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
3732            if (utf8Len > dstlen)
3733                goto bufferTooSmall;
3734            for (i = 0; i < utf8Len; i++)
3735                *dst++ = (char) utf8buf[i];
3736        }
3737        dstlen -= utf8Len;
3738    }
3739    *dstlenp = (origDstlen - dstlen);
3740    return JS_TRUE;
3741
3742badSurrogate:
3743    *dstlenp = (origDstlen - dstlen);
3744    /* Delegate error reporting to the measurement function. */
3745    if (cx)
3746        GetDeflatedStringLength(cx, src - 1, srclen + 1);
3747    return JS_FALSE;
3748
3749bufferTooSmall:
3750    *dstlenp = (origDstlen - dstlen);
3751    if (cx) {
3752        JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3753                             JSMSG_BUFFER_TOO_SMALL);
3754    }
3755    return JS_FALSE;
3756}
3757
3758bool
3759InflateStringToBuffer(JSContext *cx, const char *src, size_t srclen,
3760                          jschar *dst, size_t *dstlenp)
3761{
3762    size_t dstlen, i;
3763
3764    if (js_CStringsAreUTF8)
3765        return InflateUTF8StringToBuffer(cx, src, srclen, dst, dstlenp);
3766
3767    if (dst) {
3768        dstlen = *dstlenp;
3769        if (srclen > dstlen) {
3770            for (i = 0; i < dstlen; i++)
3771                dst[i] = (unsigned char) src[i];
3772            if (cx) {
3773                JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3774                                     JSMSG_BUFFER_TOO_SMALL);
3775            }
3776            return JS_FALSE;
3777        }
3778        for (i = 0; i < srclen; i++)
3779            dst[i] = (unsigned char) src[i];
3780    }
3781    *dstlenp = srclen;
3782    return JS_TRUE;
3783}
3784
3785bool
3786InflateUTF8StringToBuffer(JSContext *cx, const char *src, size_t srclen,
3787                              jschar *dst, size_t *dstlenp, FlationCoding fc)
3788{
3789    size_t dstlen, origDstlen, offset, j, n;
3790    uint32_t v;
3791
3792    dstlen = dst ? *dstlenp : (size_t) -1;
3793    origDstlen = dstlen;
3794    offset = 0;
3795    bool useCESU8 = fc == CESU8Encoding;
3796
3797    while (srclen) {
3798        v = (uint8_t) *src;
3799        n = 1;
3800        if (v & 0x80) {
3801            while (v & (0x80 >> n))
3802                n++;
3803            if (n > srclen)
3804                goto bufferTooSmall;
3805            if (n == 1 || n > 4)
3806                goto badCharacter;
3807            for (j = 1; j < n; j++) {
3808                if ((src[j] & 0xC0) != 0x80)
3809                    goto badCharacter;
3810            }
3811            v = Utf8ToOneUcs4Char((uint8_t *)src, n);
3812            if (v >= 0x10000 && !useCESU8) {
3813                v -= 0x10000;
3814                if (v > 0xFFFFF || dstlen < 2) {
3815                    *dstlenp = (origDstlen - dstlen);
3816                    if (cx) {
3817                        char buffer[10];
3818                        JS_snprintf(buffer, 10, "0x%x", v + 0x10000);
3819                        JS_ReportErrorFlagsAndNumber(cx,
3820                                                     JSREPORT_ERROR,
3821                                                     js_GetErrorMessage, NULL,
3822                                                     JSMSG_UTF8_CHAR_TOO_LARGE,
3823                                                     buffer);
3824                    }
3825                    return JS_FALSE;
3826                }
3827                if (dst) {
3828                    *dst++ = (jschar)((v >> 10) + 0xD800);
3829                    v = (jschar)((v & 0x3FF) + 0xDC00);
3830                }
3831                dstlen--;
3832            }
3833        }
3834        if (!dstlen)
3835            goto bufferTooSmall;
3836        if (dst)
3837            *dst++ = (jschar) v;
3838        dstlen--;
3839        offset += n;
3840        src += n;
3841        srclen -= n;
3842    }
3843    *dstlenp = (origDstlen - dstlen);
3844    return JS_TRUE;
3845
3846badCharacter:
3847    *dstlenp = (origDstlen - dstlen);
3848    if (cx) {
3849        char buffer[10];
3850        JS_snprintf(buffer, 10, "%d", offset);
3851        JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
3852                                     js_GetErrorMessage, NULL,
3853                                     JSMSG_MALFORMED_UTF8_CHAR,
3854                                     buffer);
3855    }
3856    return JS_FALSE;
3857
3858bufferTooSmall:
3859    *dstlenp = (origDstlen - dstlen);
3860    if (cx) {
3861        JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3862                             JSMSG_BUFFER_TOO_SMALL);
3863    }
3864    return JS_FALSE;
3865}
3866
3867} /* namepsace js */
3868
3869const jschar js_uriReservedPlusPound_ucstr[] =
3870    {';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '#', 0};
3871const jschar js_uriUnescaped_ucstr[] =
3872    {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
3873     'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
3874     'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
3875     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
3876     'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
3877     '-', '_', '.', '!', '~', '*', '\'', '(', ')', 0};
3878
3879#define ____ false
3880
3881/*
3882 * Identifier start chars:
3883 * -      36:    $
3884 * -  65..90: A..Z
3885 * -      95:    _
3886 * - 97..122: a..z
3887 */
3888const bool js_isidstart[] = {
3889/*       0     1     2     3     4     5     6     7     8     9  */
3890/*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3891/*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3892/*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3893/*  3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
3894/*  4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3895/*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3896/*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true, 
3897/*  7 */ true, true, true, true, true, true, true, true, true, true, 
3898/*  8 */ true, true, true, true, true, true, true, true, true, true, 
3899/*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true, 
3900/* 10 */ true, true, true, true, true, true, true, true, true, true, 
3901/* 11 */ true, true, true, true, true, true, true, true, true, true, 
3902/* 12 */ true, true, true, ____, ____, ____, ____, ____
3903};
3904
3905/*
3906 * Identifier chars:
3907 * -      36:    $
3908 * -  48..57: 0..9
3909 * -  65..90: A..Z
3910 * -      95:    _
3911 * - 97..122: a..z
3912 */
3913const bool js_isident[] = {
3914/*       0     1     2     3     4     5     6     7     8     9  */
3915/*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3916/*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3917/*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3918/*  3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
3919/*  4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true, 
3920/*  5 */ true, true, true, true, true, true, true, true, ____, ____,
3921/*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true, 
3922/*  7 */ true, true, true, true, true, true, true, true, true, true, 
3923/*  8 */ true, true, true, true, true, true, true, true, true, true, 
3924/*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true, 
3925/* 10 */ true, true, true, true, true, true, true, true, true, true, 
3926/* 11 */ true, true, true, true, true, true, true, true, true, true, 
3927/* 12 */ true, true, true, ____, ____, ____, ____, ____
3928};
3929
3930/* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
3931const bool js_isspace[] = {
3932/*       0     1     2     3     4     5     6     7     8     9  */
3933/*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
3934/*  1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
3935/*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3936/*  3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
3937/*  4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3938/*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3939/*  6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3940/*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3941/*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3942/*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3943/* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3944/* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3945/* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3946};
3947
3948#undef ____
3949
3950#define URI_CHUNK 64U
3951
3952static inline bool
3953TransferBufferToString(JSContext *cx, StringBuffer &sb, Value *rval)
3954{
3955    JSString *str = sb.finishString();
3956    if (!str)
3957        return false;
3958    rval->setString(str);
3959    return true;
3960}
3961
3962/*
3963 * ECMA 3, 15.1.3 URI Handling Function Properties
3964 *
3965 * The following are implementations of the algorithms
3966 * given in the ECMA specification for the hidden functions
3967 * 'Encode' and 'Decode'.
3968 */
3969static JSBool
3970Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
3971       const jschar *unescapedSet2, Value *rval)
3972{
3973    static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
3974
3975    size_t length = str->length();
3976    const jschar *chars = str->getChars(cx);
3977    if (!chars)
3978        return JS_FALSE;
3979
3980    if (length == 0) {
3981        rval->setString(cx->runtime->emptyString);
3982        return JS_TRUE;
3983    }
3984
3985    StringBuffer sb(cx);
3986    jschar hexBuf[4];
3987    hexBuf[0] = '%';
3988    hexBuf[3] = 0;
3989    for (size_t k = 0; k < length; k++) {
3990        jschar c = chars[k];
3991        if (js_strchr(unescapedSet, c) ||
3992            (unescapedSet2 && js_strchr(unescapedSet2, c))) {
3993            if (!sb.append(c))
3994                return JS_FALSE;
3995        } else {
3996            if ((c >= 0xDC00) && (c <= 0xDFFF)) {
3997                JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
3998                                 JSMSG_BAD_URI, NULL);
3999                return JS_FALSE;
4000            }
4001            uint32_t v;
4002            if (c < 0xD800 || c > 0xDBFF) {
4003                v = c;
4004            } else {
4005                k++;
4006                if (k == length) {
4007                    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4008                                     JSMSG_BAD_URI, NULL);
4009                    return JS_FALSE;
4010                }
4011                jschar c2 = chars[k];
4012                if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
4013                    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
4014                                     JSMSG_BAD_URI, NULL);
4015                    return JS_FALSE;
4016                }
4017                v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
4018            }
4019            uint8_t utf8buf[4];
4020            size_t L = js_OneUcs4ToUtf8Char(utf8buf, v);
4021            for (size_t j = 0; j < L; j++) {
4022                hexBuf[1] = HexDigits[utf8buf[j] >> 4];
4023                hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
4024                if (!sb.append(hexBuf, 3))
4025                    return JS_FALSE;
4026            }
4027        }
4028    }
4029
4030    return TransferBufferToString(cx, sb, rval);
4031}
4032
4033static JSBool
4034Decode(JSContext *cx, JSString *str, const jschar *reservedSet, Value *rval)
4035{
4036    size_t length = str->length();
4037    const jschar *chars = str->getChars(cx);
4038    if (!chars)
4039        return JS_FALSE;
4040
4041    if (length == 0) {
4042        rval->setString(cx->runtime->emptyString);
4043        return JS_TRUE;
4044    }
4045
4046    StringBuffer sb(cx);
4047    for (size_t k = 0; k < length; k++) {
4048        jschar c = chars[k];
4049        if (c == '%') {
4050            size_t start = k;
4051            if ((k + 2) >= length)
4052                goto report_bad_uri;
4053            if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4054                goto report_bad_uri;
4055            jsuint B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4056            k += 2;
4057            if (!(B & 0x80)) {
4058                c = (jschar)B;
4059            } else {
4060                intN n = 1;
4061                while (B & (0x80 >> n))
4062                    n++;
4063                if (n == 1 || n > 4)
4064                    goto report_bad_uri;
4065                uint8_t octets[4];
4066                octets[0] = (uint8_t)B;
4067                if (k + 3 * (n - 1) >= length)
4068                    goto report_bad_uri;
4069                for (intN j = 1; j < n; j++) {
4070                    k++;
4071                    if (chars[k] != '%')
4072                        goto report_bad_uri;
4073                    if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
4074                        goto report_bad_uri;
4075                    B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
4076                    if ((B & 0xC0) != 0x80)
4077                        goto report_bad_uri;
4078                    k += 2;
4079                    octets[j] = (char)B;
4080                }
4081                uint32_t v = Utf8ToOneUcs4Char(octets, n);
4082                if (v >= 0x10000) {
4083                    v -= 0x10000;
4084                    if (v > 0xFFFFF)
4085                        goto report_bad_uri;
4086                    c = (jschar)((v & 0x3FF) + 0xDC00);
4087                    jschar H = (jschar)((v >> 10) + 0xD800);
4088                    if (!sb.append(H))
4089                        return JS_FALSE;
4090                } else {
4091                    c = (jschar)v;
4092                }
4093            }
4094            if (js_strchr(reservedSet, c)) {
4095                if (!sb.append(chars + start, k - start + 1))
4096                    return JS_FALSE;
4097            } else {
4098                if (!sb.append(c))
4099                    return JS_FALSE;
4100            }
4101        } else {
4102            if (!sb.append(c))
4103                return JS_FALSE;
4104        }
4105    }
4106
4107    return TransferBufferToString(cx, sb, rval);
4108
4109  report_bad_uri:
4110    JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BAD_URI);
4111    /* FALL THROUGH */
4112
4113    return JS_FALSE;
4114}
4115
4116static JSBool
4117str_decodeURI(JSContext *cx, uintN argc, Value *vp)
4118{
4119    CallArgs args = CallArgsFromVp(argc, vp);
4120    JSLinearString *str = ArgToRootedString(cx, args, 0);
4121    if (!str)
4122        return false;
4123
4124    Value result;
4125    if (!Decode(cx, str, js_uriReservedPlusPound_ucstr, &result))
4126        return false;
4127
4128    args.rval() = result;
4129    return true;
4130}
4131
4132static JSBool
4133str_decodeURI_Component(JSContext *cx, uintN argc, Value *vp)
4134{
4135    CallArgs args = CallArgsFromVp(argc, vp);
4136    JSLinearString *str = ArgToRootedString(cx, args, 0);
4137    if (!str)
4138        return false;
4139
4140    Value result;
4141    if (!Decode(cx, str, js_empty_ucstr, &result))
4142        return false;
4143
4144    args.rval() = result;
4145    return true;
4146}
4147
4148static JSBool
4149str_encodeURI(JSContext *cx, uintN argc, Value *vp)
4150{
4151    CallArgs args = CallArgsFromVp(argc, vp);
4152    JSLinearString *str = ArgToRootedString(cx, args, 0);
4153    if (!str)
4154        return false;
4155
4156    Value result;
4157    if (!Encode(cx, str, js_uriReservedPlusPound_ucstr, js_uriUnescaped_ucstr, &result))
4158        return false;
4159
4160    args.rval() = result;
4161    return true;
4162}
4163
4164static JSBool
4165str_encodeURI_Component(JSContext *cx, uintN argc, Value *vp)
4166{
4167    CallArgs args = CallArgsFromVp(argc, vp);
4168    JSLinearString *str = ArgToRootedString(cx, args, 0);
4169    if (!str)
4170        return false;
4171
4172    Value result;
4173    if (!Encode(cx, str, js_uriUnescaped_ucstr, NULL, &result))
4174        return false;
4175
4176    args.rval() = result;
4177    return true;
4178}
4179
4180/*
4181 * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
4182 * least 4 bytes long.  Return the number of UTF-8 bytes of data written.
4183 */
4184int
4185js_OneUcs4ToUtf8Char(uint8_t *utf8Buffer, uint32_t ucs4Char)
4186{
4187    int utf8Length = 1;
4188
4189    JS_ASSERT(ucs4Char <= 0x10FFFF);
4190    if (ucs4Char < 0x80) {
4191        *utf8Buffer = (uint8_t)ucs4Char;
4192    } else {
4193        int i;
4194        uint32_t a = ucs4Char >> 11;
4195        utf8Length = 2;
4196        while (a) {
4197            a >>= 5;
4198            utf8Length++;
4199        }
4200        i = utf8Length;
4201        while (--i) {
4202            utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80);
4203            ucs4Char >>= 6;
4204        }
4205        *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
4206    }
4207    return utf8Length;
4208}
4209
4210/*
4211 * Convert a utf8 character sequence into a UCS-4 character and return that
4212 * character.  It is assumed that the caller already checked that the sequence
4213 * is valid.
4214 */
4215static uint32_t
4216Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length)
4217{
4218    JS_ASSERT(1 <= utf8Length && utf8Length <= 4);
4219
4220    if (utf8Length == 1) {
4221        JS_ASSERT(!(*utf8Buffer & 0x80));
4222        return *utf8Buffer;
4223    }
4224
4225    /* from Unicode 3.1, non-shortest form is illegal */
4226    static const uint32_t minucs4Table[] = { 0x80, 0x800, 0x10000 };
4227
4228    JS_ASSERT((*utf8Buffer & (0x100 - (1 << (7 - utf8Length)))) ==
4229              (0x100 - (1 << (8 - utf8Length))));
4230    uint32_t ucs4Char = *utf8Buffer++ & ((1 << (7 - utf8Length)) - 1);
4231    uint32_t minucs4Char = minucs4Table[utf8Length - 2];
4232    while (--utf8Length) {
4233        JS_ASSERT((*utf8Buffer & 0xC0) == 0x80);
4234        ucs4Char = (ucs4Char << 6) | (*utf8Buffer++ & 0x3F);
4235    }
4236
4237    if (JS_UNLIKELY(ucs4Char < minucs4Char || (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF)))
4238        return INVALID_UTF8;
4239
4240    return ucs4Char;
4241}
4242
4243namespace js {
4244
4245size_t
4246PutEscapedStringImpl(char *buffer, size_t bufferSize, FILE *fp, JSLinearString *str, uint32_t quote)
4247{
4248    enum {
4249        STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
4250    } state;
4251
4252    JS_ASSERT(quote == 0 || quote == '\'' || quote == '"');
4253    JS_ASSERT_IF(!buffer, bufferSize == 0);
4254    JS_ASSERT_IF(fp, !buffer);
4255
4256    if (bufferSize == 0)
4257        buffer = NULL;
4258    else
4259        bufferSize--;
4260
4261    const jschar *chars = str->chars();
4262    const jschar *charsEnd = chars + str->length();
4263    size_t n = 0;
4264    state = FIRST_QUOTE;
4265    uintN shift = 0;
4266    uintN hex = 0;
4267    uintN u = 0;
4268    char c = 0;  /* to quell GCC warnings */
4269
4270    for (;;) {
4271        switch (state) {
4272          case STOP:
4273            goto stop;
4274          case FIRST_QUOTE:
4275            state = CHARS;
4276            goto do_quote;
4277          case LAST_QUOTE:
4278            state = STOP;
4279          do_quote:
4280            if (quote == 0)
4281                continue;
4282            c = (char)quote;
4283            break;
4284          case CHARS:
4285            if (chars == charsEnd) {
4286                state = LAST_QUOTE;
4287                continue;
4288            }
4289            u = *chars++;
4290            if (u < ' ') {
4291                if (u != 0) {
4292                    const char *escape = strchr(js_EscapeMap, (int)u);
4293                    if (escape) {
4294                        u = escape[1];
4295                        goto do_escape;
4296                    }
4297                }
4298                goto do_hex_escape;
4299            }
4300            if (u < 127) {
4301                if (u == quote || u == '\\')
4302                    goto do_escape;
4303                c = (char)u;
4304            } else if (u < 0x100) {
4305                goto do_hex_escape;
4306            } else {
4307                shift = 16;
4308                hex = u;
4309                u = 'u';
4310                goto do_escape;
4311            }
4312            break;
4313          do_hex_escape:
4314            shift = 8;
4315            hex = u;
4316            u = 'x';
4317          do_escape:
4318            c = '\\';
4319            state = ESCAPE_START;
4320            break;
4321          case ESCAPE_START:
4322            JS_ASSERT(' ' <= u && u < 127);
4323            c = (char)u;
4324            state = ESCAPE_MORE;
4325            break;
4326          case ESCAPE_MORE:
4327            if (shift == 0) {
4328                state = CHARS;
4329                continue;
4330            }
4331            shift -= 4;
4332            u = 0xF & (hex >> shift);
4333            c = (char)(u + (u < 10 ? '0' : 'A' - 10));
4334            break;
4335        }
4336        if (buffer) {
4337            JS_ASSERT(n <= bufferSize);
4338            if (n != bufferSize) {
4339                buffer[n] = c;
4340            } else {
4341                buffer[n] = '\0';
4342                buffer = NULL;
4343            }
4344        } else if (fp) {
4345            if (fputc(c, fp) < 0)
4346                return size_t(-1);
4347        }
4348        n++;
4349    }
4350  stop:
4351    if (buffer)
4352        buffer[n] = '\0';
4353    return n;
4354}
4355
4356} /* namespace js */