PageRenderTime 181ms CodeModel.GetById 21ms app.highlight 143ms RepoModel.GetById 1ms app.codeStats 1ms

/js/src/jsstr.cpp

http://github.com/zpao/v8monkey
C++ | 4356 lines | 3507 code | 476 blank | 373 comment | 684 complexity | fbf53656a39e95829f89c223c92027e4 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2 * vim: set ts=8 sw=4 et tw=99:
   3 *
   4 * ***** BEGIN LICENSE BLOCK *****
   5 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6 *
   7 * The contents of this file are subject to the Mozilla Public License Version
   8 * 1.1 (the "License"); you may not use this file except in compliance with
   9 * the License. You may obtain a copy of the License at
  10 * http://www.mozilla.org/MPL/
  11 *
  12 * Software distributed under the License is distributed on an "AS IS" basis,
  13 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14 * for the specific language governing rights and limitations under the
  15 * License.
  16 *
  17 * The Original Code is Mozilla Communicator client code, released
  18 * March 31, 1998.
  19 *
  20 * The Initial Developer of the Original Code is
  21 * Netscape Communications Corporation.
  22 * Portions created by the Initial Developer are Copyright (C) 1998
  23 * the Initial Developer. All Rights Reserved.
  24 *
  25 * Contributor(s):
  26 *
  27 * Alternatively, the contents of this file may be used under the terms of
  28 * either of the GNU General Public License Version 2 or later (the "GPL"),
  29 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30 * in which case the provisions of the GPL or the LGPL are applicable instead
  31 * of those above. If you wish to allow use of your version of this file only
  32 * under the terms of either the GPL or the LGPL, and not to allow others to
  33 * use your version of this file under the terms of the MPL, indicate your
  34 * decision by deleting the provisions above and replace them with the notice
  35 * and other provisions required by the GPL or the LGPL. If you do not delete
  36 * the provisions above, a recipient may use your version of this file under
  37 * the terms of any one of the MPL, the GPL or the LGPL.
  38 *
  39 * ***** END LICENSE BLOCK ***** */
  40
  41/*
  42 * JS string type implementation.
  43 *
  44 * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
  45 * native methods store strings (possibly newborn) converted from their 'this'
  46 * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
  47 * conversions at their index (argv[0], argv[1]).  This is a legitimate method
  48 * of rooting things that might lose their newborn root due to subsequent GC
  49 * allocations in the same native method.
  50 */
  51
  52#include "mozilla/Attributes.h"
  53
  54#include <stdlib.h>
  55#include <string.h>
  56#include "jstypes.h"
  57#include "jsutil.h"
  58#include "jshash.h"
  59#include "jsprf.h"
  60#include "jsapi.h"
  61#include "jsarray.h"
  62#include "jsatom.h"
  63#include "jsbool.h"
  64#include "jscntxt.h"
  65#include "jsgc.h"
  66#include "jsinterp.h"
  67#include "jslock.h"
  68#include "jsnum.h"
  69#include "jsobj.h"
  70#include "jsopcode.h"
  71#include "jsprobes.h"
  72#include "jsscope.h"
  73#include "jsstr.h"
  74#include "jsversion.h"
  75
  76#include "builtin/RegExp.h"
  77#include "vm/GlobalObject.h"
  78#include "vm/RegExpObject.h"
  79
  80#include "jsinferinlines.h"
  81#include "jsobjinlines.h"
  82#include "jsautooplen.h"        // generated headers last
  83
  84#include "vm/RegExpObject-inl.h"
  85#include "vm/RegExpStatics-inl.h"
  86#include "vm/StringObject-inl.h"
  87#include "vm/String-inl.h"
  88
  89using namespace js;
  90using namespace js::gc;
  91using namespace js::types;
  92using namespace js::unicode;
  93
  94static JSLinearString *
  95ArgToRootedString(JSContext *cx, CallArgs &args, uintN argno)
  96{
  97    if (argno >= args.length())
  98        return cx->runtime->atomState.typeAtoms[JSTYPE_VOID];
  99
 100    Value &arg = args[argno];
 101    JSString *str = ToString(cx, arg);
 102    if (!str)
 103        return NULL;
 104
 105    arg = StringValue(str);
 106    return str->ensureLinear(cx);
 107}
 108
 109/*
 110 * Forward declarations for URI encode/decode and helper routines
 111 */
 112static JSBool
 113str_decodeURI(JSContext *cx, uintN argc, Value *vp);
 114
 115static JSBool
 116str_decodeURI_Component(JSContext *cx, uintN argc, Value *vp);
 117
 118static JSBool
 119str_encodeURI(JSContext *cx, uintN argc, Value *vp);
 120
 121static JSBool
 122str_encodeURI_Component(JSContext *cx, uintN argc, Value *vp);
 123
 124static const uint32_t INVALID_UTF8 = UINT32_MAX;
 125
 126static uint32_t
 127Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length);
 128
 129/*
 130 * Global string methods
 131 */
 132
 133
 134/* ES5 B.2.1 */
 135static JSBool
 136str_escape(JSContext *cx, uintN argc, Value *vp)
 137{
 138    CallArgs args = CallArgsFromVp(argc, vp);
 139
 140    const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
 141                           '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 142
 143    JSLinearString *str = ArgToRootedString(cx, args, 0);
 144    if (!str)
 145        return false;
 146
 147    size_t length = str->length();
 148    const jschar *chars = str->chars();
 149
 150    static const uint8_t shouldPassThrough[256] = {
 151         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 152         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 153         0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
 154         1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
 155         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
 156         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
 157         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
 158         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,     /*   pqrstuvwxyz{\}~  DEL */
 159    };
 160
 161    /* In step 7, exactly 69 characters should pass through unencoded. */
 162#ifdef DEBUG
 163    size_t count = 0;
 164    for (size_t i = 0; i < sizeof(shouldPassThrough); i++) {
 165        if (shouldPassThrough[i]) {
 166            count++;
 167        }
 168    }
 169    JS_ASSERT(count == 69);
 170#endif
 171
 172
 173    /* Take a first pass and see how big the result string will need to be. */
 174    size_t newlength = length;
 175    for (size_t i = 0; i < length; i++) {
 176        jschar ch = chars[i];
 177        if (ch < 128 && shouldPassThrough[ch])
 178            continue;
 179
 180        /* The character will be encoded as %XX or %uXXXX. */
 181        newlength += (ch < 256) ? 2 : 5;
 182
 183        /*
 184         * This overflow test works because newlength is incremented by at
 185         * most 5 on each iteration.
 186         */
 187        if (newlength < length) {
 188            js_ReportAllocationOverflow(cx);
 189            return false;
 190        }
 191    }
 192
 193    if (newlength >= ~(size_t)0 / sizeof(jschar)) {
 194        js_ReportAllocationOverflow(cx);
 195        return false;
 196    }
 197
 198    jschar *newchars = (jschar *) cx->malloc_((newlength + 1) * sizeof(jschar));
 199    if (!newchars)
 200        return false;
 201    size_t i, ni;
 202    for (i = 0, ni = 0; i < length; i++) {
 203        jschar ch = chars[i];
 204        if (ch < 128 && shouldPassThrough[ch]) {
 205            newchars[ni++] = ch;
 206        } else if (ch < 256) {
 207            newchars[ni++] = '%';
 208            newchars[ni++] = digits[ch >> 4];
 209            newchars[ni++] = digits[ch & 0xF];
 210        } else {
 211            newchars[ni++] = '%';
 212            newchars[ni++] = 'u';
 213            newchars[ni++] = digits[ch >> 12];
 214            newchars[ni++] = digits[(ch & 0xF00) >> 8];
 215            newchars[ni++] = digits[(ch & 0xF0) >> 4];
 216            newchars[ni++] = digits[ch & 0xF];
 217        }
 218    }
 219    JS_ASSERT(ni == newlength);
 220    newchars[newlength] = 0;
 221
 222    JSString *retstr = js_NewString(cx, newchars, newlength);
 223    if (!retstr) {
 224        cx->free_(newchars);
 225        return false;
 226    }
 227
 228    args.rval() = StringValue(retstr);
 229    return true;
 230}
 231
 232static inline bool
 233Unhex4(const jschar *chars, jschar *result)
 234{
 235    jschar a = chars[0],
 236           b = chars[1],
 237           c = chars[2],
 238           d = chars[3];
 239
 240    if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
 241        return false;
 242
 243    *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
 244    return true;
 245}
 246
 247static inline bool
 248Unhex2(const jschar *chars, jschar *result)
 249{
 250    jschar a = chars[0],
 251           b = chars[1];
 252
 253    if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
 254        return false;
 255
 256    *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
 257    return true;
 258}
 259
 260/* ES5 B.2.2 */
 261static JSBool
 262str_unescape(JSContext *cx, uintN argc, Value *vp)
 263{
 264    CallArgs args = CallArgsFromVp(argc, vp);
 265
 266    /* Step 1. */
 267    JSLinearString *str = ArgToRootedString(cx, args, 0);
 268    if (!str)
 269        return false;
 270
 271    /* Step 2. */
 272    size_t length = str->length();
 273    const jschar *chars = str->chars();
 274
 275    /* Step 3. */
 276    StringBuffer sb(cx);
 277
 278    /*
 279     * Note that the spec algorithm has been optimized to avoid building
 280     * a string in the case where no escapes are present.
 281     */
 282
 283    /* Step 4. */
 284    size_t k = 0;
 285    bool building = false;
 286
 287    while (true) {
 288        /* Step 5. */
 289        if (k == length) {
 290            JSLinearString *result;
 291            if (building) {
 292                result = sb.finishString();
 293                if (!result)
 294                    return false;
 295            } else {
 296                result = str;
 297            }
 298
 299            args.rval() = StringValue(result);
 300            return true;
 301        }
 302
 303        /* Step 6. */
 304        jschar c = chars[k];
 305
 306        /* Step 7. */
 307        if (c != '%')
 308            goto step_18;
 309
 310        /* Step 8. */
 311        if (k > length - 6)
 312            goto step_14;
 313
 314        /* Step 9. */
 315        if (chars[k + 1] != 'u')
 316            goto step_14;
 317
 318#define ENSURE_BUILDING                             \
 319    JS_BEGIN_MACRO                                  \
 320        if (!building) {                            \
 321            building = true;                        \
 322            if (!sb.reserve(length))                \
 323                return false;                       \
 324            sb.infallibleAppend(chars, chars + k);  \
 325        }                                           \
 326    JS_END_MACRO
 327
 328        /* Step 10-13. */
 329        if (Unhex4(&chars[k + 2], &c)) {
 330            ENSURE_BUILDING;
 331            k += 5;
 332            goto step_18;
 333        }
 334
 335      step_14:
 336        /* Step 14. */
 337        if (k > length - 3)
 338            goto step_18;
 339
 340        /* Step 15-17. */
 341        if (Unhex2(&chars[k + 1], &c)) {
 342            ENSURE_BUILDING;
 343            k += 2;
 344        }
 345
 346      step_18:
 347        if (building)
 348            sb.infallibleAppend(c);
 349
 350        /* Step 19. */
 351        k += 1;
 352    }
 353#undef ENSURE_BUILDING
 354}
 355
 356#if JS_HAS_UNEVAL
 357static JSBool
 358str_uneval(JSContext *cx, uintN argc, Value *vp)
 359{
 360    CallArgs args = CallArgsFromVp(argc, vp);
 361    JSString *str = js_ValueToSource(cx, args.length() != 0 ? args[0] : UndefinedValue());
 362    if (!str)
 363        return false;
 364
 365    args.rval() = StringValue(str);
 366    return true;
 367}
 368#endif
 369
 370const char js_escape_str[] = "escape";
 371const char js_unescape_str[] = "unescape";
 372#if JS_HAS_UNEVAL
 373const char js_uneval_str[] = "uneval";
 374#endif
 375const char js_decodeURI_str[] = "decodeURI";
 376const char js_encodeURI_str[] = "encodeURI";
 377const char js_decodeURIComponent_str[] = "decodeURIComponent";
 378const char js_encodeURIComponent_str[] = "encodeURIComponent";
 379
 380static JSFunctionSpec string_functions[] = {
 381    JS_FN(js_escape_str,             str_escape,                1,0),
 382    JS_FN(js_unescape_str,           str_unescape,              1,0),
 383#if JS_HAS_UNEVAL
 384    JS_FN(js_uneval_str,             str_uneval,                1,0),
 385#endif
 386    JS_FN(js_decodeURI_str,          str_decodeURI,             1,0),
 387    JS_FN(js_encodeURI_str,          str_encodeURI,             1,0),
 388    JS_FN(js_decodeURIComponent_str, str_decodeURI_Component,   1,0),
 389    JS_FN(js_encodeURIComponent_str, str_encodeURI_Component,   1,0),
 390
 391    JS_FS_END
 392};
 393
 394jschar      js_empty_ucstr[]  = {0};
 395JSSubString js_EmptySubString = {0, js_empty_ucstr};
 396
 397static const uintN STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
 398
 399static JSBool
 400str_enumerate(JSContext *cx, JSObject *obj)
 401{
 402    JSString *str = obj->getPrimitiveThis().toString();
 403    for (size_t i = 0, length = str->length(); i < length; i++) {
 404        JSString *str1 = js_NewDependentString(cx, str, i, 1);
 405        if (!str1)
 406            return false;
 407        if (!obj->defineElement(cx, i, StringValue(str1),
 408                                JS_PropertyStub, JS_StrictPropertyStub,
 409                                STRING_ELEMENT_ATTRS)) {
 410            return false;
 411        }
 412    }
 413
 414    return true;
 415}
 416
 417static JSBool
 418str_resolve(JSContext *cx, JSObject *obj, jsid id, uintN flags,
 419            JSObject **objp)
 420{
 421    if (!JSID_IS_INT(id))
 422        return JS_TRUE;
 423
 424    JSString *str = obj->getPrimitiveThis().toString();
 425
 426    jsint slot = JSID_TO_INT(id);
 427    if ((size_t)slot < str->length()) {
 428        JSString *str1 = cx->runtime->staticStrings.getUnitStringForElement(cx, str, size_t(slot));
 429        if (!str1)
 430            return JS_FALSE;
 431        if (!obj->defineElement(cx, uint32_t(slot), StringValue(str1), NULL, NULL,
 432                                STRING_ELEMENT_ATTRS)) {
 433            return JS_FALSE;
 434        }
 435        *objp = obj;
 436    }
 437    return JS_TRUE;
 438}
 439
 440Class js::StringClass = {
 441    js_String_str,
 442    JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
 443    JSCLASS_NEW_RESOLVE | JSCLASS_HAS_CACHED_PROTO(JSProto_String),
 444    JS_PropertyStub,         /* addProperty */
 445    JS_PropertyStub,         /* delProperty */
 446    JS_PropertyStub,         /* getProperty */
 447    JS_StrictPropertyStub,   /* setProperty */
 448    str_enumerate,
 449    (JSResolveOp)str_resolve,
 450    JS_ConvertStub
 451};
 452
 453/*
 454 * Returns a JSString * for the |this| value associated with 'call', or throws
 455 * a TypeError if |this| is null or undefined.  This algorithm is the same as
 456 * calling CheckObjectCoercible(this), then returning ToString(this), as all
 457 * String.prototype.* methods do (other than toString and valueOf).
 458 */
 459static JS_ALWAYS_INLINE JSString *
 460ThisToStringForStringProto(JSContext *cx, CallReceiver call)
 461{
 462    JS_CHECK_RECURSION(cx, return NULL);
 463
 464    if (call.thisv().isString())
 465        return call.thisv().toString();
 466
 467    if (call.thisv().isObject()) {
 468        JSObject *obj = &call.thisv().toObject();
 469        if (obj->isString() &&
 470            ClassMethodIsNative(cx, obj,
 471                                &StringClass,
 472                                ATOM_TO_JSID(cx->runtime->atomState.toStringAtom),
 473                                js_str_toString))
 474        {
 475            call.thisv() = obj->getPrimitiveThis();
 476            return call.thisv().toString();
 477        }
 478    } else if (call.thisv().isNullOrUndefined()) {
 479        JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_CONVERT_TO,
 480                             call.thisv().isNull() ? "null" : "undefined", "object");
 481        return NULL;
 482    }
 483
 484    JSString *str = ToStringSlow(cx, call.thisv());
 485    if (!str)
 486        return NULL;
 487
 488    call.thisv().setString(str);
 489    return str;
 490}
 491
 492#if JS_HAS_TOSOURCE
 493
 494/*
 495 * String.prototype.quote is generic (as are most string methods), unlike
 496 * toSource, toString, and valueOf.
 497 */
 498static JSBool
 499str_quote(JSContext *cx, uintN argc, Value *vp)
 500{
 501    CallArgs args = CallArgsFromVp(argc, vp);
 502    JSString *str = ThisToStringForStringProto(cx, args);
 503    if (!str)
 504        return false;
 505    str = js_QuoteString(cx, str, '"');
 506    if (!str)
 507        return false;
 508    args.rval() = StringValue(str);
 509    return true;
 510}
 511
 512static JSBool
 513str_toSource(JSContext *cx, uintN argc, Value *vp)
 514{
 515    CallArgs args = CallArgsFromVp(argc, vp);
 516
 517    JSString *str;
 518    bool ok;
 519    if (!BoxedPrimitiveMethodGuard(cx, args, str_toSource, &str, &ok))
 520        return ok;
 521
 522    str = js_QuoteString(cx, str, '"');
 523    if (!str)
 524        return false;
 525
 526    StringBuffer sb(cx);
 527    if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
 528        return false;
 529
 530    str = sb.finishString();
 531    if (!str)
 532        return false;
 533    args.rval() = StringValue(str);
 534    return true;
 535}
 536
 537#endif /* JS_HAS_TOSOURCE */
 538
 539JSBool
 540js_str_toString(JSContext *cx, uintN argc, Value *vp)
 541{
 542    CallArgs args = CallArgsFromVp(argc, vp);
 543
 544    JSString *str;
 545    bool ok;
 546    if (!BoxedPrimitiveMethodGuard(cx, args, js_str_toString, &str, &ok))
 547        return ok;
 548
 549    args.rval() = StringValue(str);
 550    return true;
 551}
 552
 553/*
 554 * Java-like string native methods.
 555 */
 556
 557JS_ALWAYS_INLINE bool
 558ValueToIntegerRange(JSContext *cx, const Value &v, int32_t *out)
 559{
 560    if (v.isInt32()) {
 561        *out = v.toInt32();
 562    } else {
 563        double d;
 564        if (!ToInteger(cx, v, &d))
 565            return false;
 566        if (d > INT32_MAX)
 567            *out = INT32_MAX;
 568        else if (d < INT32_MIN)
 569            *out = INT32_MIN;
 570        else
 571            *out = int32_t(d);
 572    }
 573
 574    return true;
 575}
 576
 577static JSBool
 578str_substring(JSContext *cx, uintN argc, Value *vp)
 579{
 580    CallArgs args = CallArgsFromVp(argc, vp);
 581
 582    JSString *str = ThisToStringForStringProto(cx, args);
 583    if (!str)
 584        return false;
 585
 586    int32_t length, begin, end;
 587    if (args.length() > 0) {
 588        end = length = int32_t(str->length());
 589
 590        if (!ValueToIntegerRange(cx, args[0], &begin))
 591            return false;
 592
 593        if (begin < 0)
 594            begin = 0;
 595        else if (begin > length)
 596            begin = length;
 597
 598        if (args.length() > 1 && !args[1].isUndefined()) {
 599            if (!ValueToIntegerRange(cx, args[1], &end))
 600                return false;
 601
 602            if (end > length) {
 603                end = length;
 604            } else {
 605                if (end < 0)
 606                    end = 0;
 607                if (end < begin) {
 608                    int32_t tmp = begin;
 609                    begin = end;
 610                    end = tmp;
 611                }
 612            }
 613        }
 614
 615        str = js_NewDependentString(cx, str, size_t(begin), size_t(end - begin));
 616        if (!str)
 617            return false;
 618    }
 619
 620    args.rval() = StringValue(str);
 621    return true;
 622}
 623
 624JSString* JS_FASTCALL
 625js_toLowerCase(JSContext *cx, JSString *str)
 626{
 627    size_t n = str->length();
 628    const jschar *s = str->getChars(cx);
 629    if (!s)
 630        return NULL;
 631
 632    jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
 633    if (!news)
 634        return NULL;
 635    for (size_t i = 0; i < n; i++)
 636        news[i] = unicode::ToLowerCase(s[i]);
 637    news[n] = 0;
 638    str = js_NewString(cx, news, n);
 639    if (!str) {
 640        cx->free_(news);
 641        return NULL;
 642    }
 643    return str;
 644}
 645
 646static inline bool
 647ToLowerCaseHelper(JSContext *cx, CallReceiver call)
 648{
 649    JSString *str = ThisToStringForStringProto(cx, call);
 650    if (!str)
 651        return false;
 652
 653    str = js_toLowerCase(cx, str);
 654    if (!str)
 655        return false;
 656
 657    call.rval() = StringValue(str);
 658    return true;
 659}
 660
 661static JSBool
 662str_toLowerCase(JSContext *cx, uintN argc, Value *vp)
 663{
 664    return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
 665}
 666
 667static JSBool
 668str_toLocaleLowerCase(JSContext *cx, uintN argc, Value *vp)
 669{
 670    CallArgs args = CallArgsFromVp(argc, vp);
 671
 672    /*
 673     * Forcefully ignore the first (or any) argument and return toLowerCase(),
 674     * ECMA has reserved that argument, presumably for defining the locale.
 675     */
 676    if (cx->localeCallbacks && cx->localeCallbacks->localeToLowerCase) {
 677        JSString *str = ThisToStringForStringProto(cx, args);
 678        if (!str)
 679            return false;
 680
 681        Value result;
 682        if (!cx->localeCallbacks->localeToLowerCase(cx, str, &result))
 683            return false;
 684
 685        args.rval() = result;
 686        return true;
 687    }
 688
 689    return ToLowerCaseHelper(cx, args);
 690}
 691
 692JSString* JS_FASTCALL
 693js_toUpperCase(JSContext *cx, JSString *str)
 694{
 695    size_t n = str->length();
 696    const jschar *s = str->getChars(cx);
 697    if (!s)
 698        return NULL;
 699    jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
 700    if (!news)
 701        return NULL;
 702    for (size_t i = 0; i < n; i++)
 703        news[i] = unicode::ToUpperCase(s[i]);
 704    news[n] = 0;
 705    str = js_NewString(cx, news, n);
 706    if (!str) {
 707        cx->free_(news);
 708        return NULL;
 709    }
 710    return str;
 711}
 712
 713static JSBool
 714ToUpperCaseHelper(JSContext *cx, CallReceiver call)
 715{
 716    JSString *str = ThisToStringForStringProto(cx, call);
 717    if (!str)
 718        return false;
 719
 720    str = js_toUpperCase(cx, str);
 721    if (!str)
 722        return false;
 723
 724    call.rval() = StringValue(str);
 725    return true;
 726}
 727
 728static JSBool
 729str_toUpperCase(JSContext *cx, uintN argc, Value *vp)
 730{
 731    return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
 732}
 733
 734static JSBool
 735str_toLocaleUpperCase(JSContext *cx, uintN argc, Value *vp)
 736{
 737    CallArgs args = CallArgsFromVp(argc, vp);
 738
 739    /*
 740     * Forcefully ignore the first (or any) argument and return toUpperCase(),
 741     * ECMA has reserved that argument, presumably for defining the locale.
 742     */
 743    if (cx->localeCallbacks && cx->localeCallbacks->localeToUpperCase) {
 744        JSString *str = ThisToStringForStringProto(cx, args);
 745        if (!str)
 746            return false;
 747
 748        Value result;
 749        if (!cx->localeCallbacks->localeToUpperCase(cx, str, &result))
 750            return false;
 751
 752        args.rval() = result;
 753        return true;
 754    }
 755
 756    return ToUpperCaseHelper(cx, args);
 757}
 758
 759static JSBool
 760str_localeCompare(JSContext *cx, uintN argc, Value *vp)
 761{
 762    CallArgs args = CallArgsFromVp(argc, vp);
 763    JSString *str = ThisToStringForStringProto(cx, args);
 764    if (!str)
 765        return false;
 766
 767    if (args.length() == 0) {
 768        args.rval() = Int32Value(0);
 769    } else {
 770        JSString *thatStr = ToString(cx, args[0]);
 771        if (!thatStr)
 772            return false;
 773
 774        if (cx->localeCallbacks && cx->localeCallbacks->localeCompare) {
 775            args[0].setString(thatStr);
 776
 777            Value result;
 778            if (!cx->localeCallbacks->localeCompare(cx, str, thatStr, &result))
 779                return true;
 780
 781            args.rval() = result;
 782            return true;
 783        }
 784
 785        int32_t result;
 786        if (!CompareStrings(cx, str, thatStr, &result))
 787            return false;
 788
 789        args.rval() = Int32Value(result);
 790    }
 791    return true;
 792}
 793
 794JSBool
 795js_str_charAt(JSContext *cx, uintN argc, Value *vp)
 796{
 797    CallArgs args = CallArgsFromVp(argc, vp);
 798
 799    JSString *str;
 800    size_t i;
 801    if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
 802        str = args.thisv().toString();
 803        i = size_t(args[0].toInt32());
 804        if (i >= str->length())
 805            goto out_of_range;
 806    } else {
 807        str = ThisToStringForStringProto(cx, args);
 808        if (!str)
 809            return false;
 810
 811        double d = 0.0;
 812        if (args.length() > 0 && !ToInteger(cx, args[0], &d))
 813            return false;
 814
 815        if (d < 0 || str->length() <= d)
 816            goto out_of_range;
 817        i = size_t(d);
 818    }
 819
 820    str = cx->runtime->staticStrings.getUnitStringForElement(cx, str, i);
 821    if (!str)
 822        return false;
 823    args.rval() = StringValue(str);
 824    return true;
 825
 826  out_of_range:
 827    args.rval() = StringValue(cx->runtime->emptyString);
 828    return true;
 829}
 830
 831JSBool
 832js_str_charCodeAt(JSContext *cx, uintN argc, Value *vp)
 833{
 834    CallArgs args = CallArgsFromVp(argc, vp);
 835
 836    JSString *str;
 837    size_t i;
 838    if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
 839        str = args.thisv().toString();
 840        i = size_t(args[0].toInt32());
 841        if (i >= str->length())
 842            goto out_of_range;
 843    } else {
 844        str = ThisToStringForStringProto(cx, args);
 845        if (!str)
 846            return false;
 847
 848        double d = 0.0;
 849        if (args.length() > 0 && !ToInteger(cx, args[0], &d))
 850            return false;
 851
 852        if (d < 0 || str->length() <= d)
 853            goto out_of_range;
 854        i = size_t(d);
 855    }
 856
 857    const jschar *chars;
 858    chars = str->getChars(cx);
 859    if (!chars)
 860        return false;
 861
 862    args.rval() = Int32Value(chars[i]);
 863    return true;
 864
 865out_of_range:
 866    args.rval() = DoubleValue(js_NaN);
 867    return true;
 868}
 869
 870/*
 871 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
 872 * The patlen argument must be positive and no greater than sBMHPatLenMax.
 873 *
 874 * Return the index of pat in text, or -1 if not found.
 875 */
 876static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
 877static const jsuint sBMHPatLenMax   = 255; /* skip table element is uint8_t */
 878static const jsint  sBMHBadPattern  = -2;  /* return value if pat is not ISO-Latin-1 */
 879
 880jsint
 881js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
 882                      const jschar *pat, jsuint patlen)
 883{
 884    uint8_t skip[sBMHCharSetSize];
 885
 886    JS_ASSERT(0 < patlen && patlen <= sBMHPatLenMax);
 887    for (jsuint i = 0; i < sBMHCharSetSize; i++)
 888        skip[i] = (uint8_t)patlen;
 889    jsuint m = patlen - 1;
 890    for (jsuint i = 0; i < m; i++) {
 891        jschar c = pat[i];
 892        if (c >= sBMHCharSetSize)
 893            return sBMHBadPattern;
 894        skip[c] = (uint8_t)(m - i);
 895    }
 896    jschar c;
 897    for (jsuint k = m;
 898         k < textlen;
 899         k += ((c = text[k]) >= sBMHCharSetSize) ? patlen : skip[c]) {
 900        for (jsuint i = k, j = m; ; i--, j--) {
 901            if (text[i] != pat[j])
 902                break;
 903            if (j == 0)
 904                return static_cast<jsint>(i);  /* safe: max string size */
 905        }
 906    }
 907    return -1;
 908}
 909
 910struct MemCmp {
 911    typedef jsuint Extent;
 912    static JS_ALWAYS_INLINE Extent computeExtent(const jschar *, jsuint patlen) {
 913        return (patlen - 1) * sizeof(jschar);
 914    }
 915    static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
 916        return memcmp(p, t, extent) == 0;
 917    }
 918};
 919
 920struct ManualCmp {
 921    typedef const jschar *Extent;
 922    static JS_ALWAYS_INLINE Extent computeExtent(const jschar *pat, jsuint patlen) {
 923        return pat + patlen;
 924    }
 925    static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
 926        for (; p != extent; ++p, ++t) {
 927            if (*p != *t)
 928                return false;
 929        }
 930        return true;
 931    }
 932};
 933
 934template <class InnerMatch>
 935static jsint
 936UnrolledMatch(const jschar *text, jsuint textlen, const jschar *pat, jsuint patlen)
 937{
 938    JS_ASSERT(patlen > 0 && textlen > 0);
 939    const jschar *textend = text + textlen - (patlen - 1);
 940    const jschar p0 = *pat;
 941    const jschar *const patNext = pat + 1;
 942    const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
 943    uint8_t fixup;
 944
 945    const jschar *t = text;
 946    switch ((textend - t) & 7) {
 947      case 0: if (*t++ == p0) { fixup = 8; goto match; }
 948      case 7: if (*t++ == p0) { fixup = 7; goto match; }
 949      case 6: if (*t++ == p0) { fixup = 6; goto match; }
 950      case 5: if (*t++ == p0) { fixup = 5; goto match; }
 951      case 4: if (*t++ == p0) { fixup = 4; goto match; }
 952      case 3: if (*t++ == p0) { fixup = 3; goto match; }
 953      case 2: if (*t++ == p0) { fixup = 2; goto match; }
 954      case 1: if (*t++ == p0) { fixup = 1; goto match; }
 955    }
 956    while (t != textend) {
 957      if (t[0] == p0) { t += 1; fixup = 8; goto match; }
 958      if (t[1] == p0) { t += 2; fixup = 7; goto match; }
 959      if (t[2] == p0) { t += 3; fixup = 6; goto match; }
 960      if (t[3] == p0) { t += 4; fixup = 5; goto match; }
 961      if (t[4] == p0) { t += 5; fixup = 4; goto match; }
 962      if (t[5] == p0) { t += 6; fixup = 3; goto match; }
 963      if (t[6] == p0) { t += 7; fixup = 2; goto match; }
 964      if (t[7] == p0) { t += 8; fixup = 1; goto match; }
 965        t += 8;
 966        continue;
 967        do {
 968            if (*t++ == p0) {
 969              match:
 970                if (!InnerMatch::match(patNext, t, extent))
 971                    goto failed_match;
 972                return t - text - 1;
 973            }
 974          failed_match:;
 975        } while (--fixup > 0);
 976    }
 977    return -1;
 978}
 979
 980static JS_ALWAYS_INLINE jsint
 981StringMatch(const jschar *text, jsuint textlen,
 982            const jschar *pat, jsuint patlen)
 983{
 984    if (patlen == 0)
 985        return 0;
 986    if (textlen < patlen)
 987        return -1;
 988
 989#if defined(__i386__) || defined(_M_IX86) || defined(__i386)
 990    /*
 991     * Given enough registers, the unrolled loop below is faster than the
 992     * following loop. 32-bit x86 does not have enough registers.
 993     */
 994    if (patlen == 1) {
 995        const jschar p0 = *pat;
 996        for (const jschar *c = text, *end = text + textlen; c != end; ++c) {
 997            if (*c == p0)
 998                return c - text;
 999        }
1000        return -1;
1001    }
1002#endif
1003
1004    /*
1005     * If the text or pattern string is short, BMH will be more expensive than
1006     * the basic linear scan due to initialization cost and a more complex loop
1007     * body. While the correct threshold is input-dependent, we can make a few
1008     * conservative observations:
1009     *  - When |textlen| is "big enough", the initialization time will be
1010     *    proportionally small, so the worst-case slowdown is minimized.
1011     *  - When |patlen| is "too small", even the best case for BMH will be
1012     *    slower than a simple scan for large |textlen| due to the more complex
1013     *    loop body of BMH.
1014     * From this, the values for "big enough" and "too small" are determined
1015     * empirically. See bug 526348.
1016     */
1017    if (textlen >= 512 && patlen >= 11 && patlen <= sBMHPatLenMax) {
1018        jsint index = js_BoyerMooreHorspool(text, textlen, pat, patlen);
1019        if (index != sBMHBadPattern)
1020            return index;
1021    }
1022
1023    /*
1024     * For big patterns with large potential overlap we want the SIMD-optimized
1025     * speed of memcmp. For small patterns, a simple loop is faster.
1026     *
1027     * FIXME: Linux memcmp performance is sad and the manual loop is faster.
1028     */
1029    return
1030#if !defined(__linux__)
1031           patlen > 128 ? UnrolledMatch<MemCmp>(text, textlen, pat, patlen)
1032                        :
1033#endif
1034                          UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
1035}
1036
1037static const size_t sRopeMatchThresholdRatioLog2 = 5;
1038
1039/*
1040 * RopeMatch takes the text to search, the patern to search for in the text.
1041 * RopeMatch returns false on OOM and otherwise returns the match index through
1042 * the 'match' outparam (-1 for not found).
1043 */
1044static bool
1045RopeMatch(JSContext *cx, JSString *textstr, const jschar *pat, jsuint patlen, jsint *match)
1046{
1047    JS_ASSERT(textstr->isRope());
1048
1049    if (patlen == 0) {
1050        *match = 0;
1051        return true;
1052    }
1053    if (textstr->length() < patlen) {
1054        *match = -1;
1055        return true;
1056    }
1057
1058    /*
1059     * List of leaf nodes in the rope. If we run out of memory when trying to
1060     * append to this list, we can still fall back to StringMatch, so use the
1061     * system allocator so we don't report OOM in that case.
1062     */
1063    Vector<JSLinearString *, 16, SystemAllocPolicy> strs;
1064
1065    /*
1066     * We don't want to do rope matching if there is a poor node-to-char ratio,
1067     * since this means spending a lot of time in the match loop below. We also
1068     * need to build the list of leaf nodes. Do both here: iterate over the
1069     * nodes so long as there are not too many.
1070     */
1071    {
1072        size_t textstrlen = textstr->length();
1073        size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2;
1074        StringSegmentRange r(cx);
1075        if (!r.init(textstr))
1076            return false;
1077        while (!r.empty()) {
1078            if (threshold-- == 0 || !strs.append(r.front())) {
1079                const jschar *chars = textstr->getChars(cx);
1080                if (!chars)
1081                    return false;
1082                *match = StringMatch(chars, textstrlen, pat, patlen);
1083                return true;
1084            }
1085            if (!r.popFront())
1086                return false;
1087        }
1088    }
1089
1090    /* Absolute offset from the beginning of the logical string textstr. */
1091    jsint pos = 0;
1092
1093    for (JSLinearString **outerp = strs.begin(); outerp != strs.end(); ++outerp) {
1094        /* Try to find a match within 'outer'. */
1095        JSLinearString *outer = *outerp;
1096        const jschar *chars = outer->chars();
1097        size_t len = outer->length();
1098        jsint matchResult = StringMatch(chars, len, pat, patlen);
1099        if (matchResult != -1) {
1100            /* Matched! */
1101            *match = pos + matchResult;
1102            return true;
1103        }
1104
1105        /* Try to find a match starting in 'outer' and running into other nodes. */
1106        const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1);
1107        const jschar *const textend = chars + len;
1108        const jschar p0 = *pat;
1109        const jschar *const p1 = pat + 1;
1110        const jschar *const patend = pat + patlen;
1111        for (const jschar *t = text; t != textend; ) {
1112            if (*t++ != p0)
1113                continue;
1114            JSLinearString **innerp = outerp;
1115            const jschar *ttend = textend;
1116            for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) {
1117                while (tt == ttend) {
1118                    if (++innerp == strs.end()) {
1119                        *match = -1;
1120                        return true;
1121                    }
1122                    JSLinearString *inner = *innerp;
1123                    tt = inner->chars();
1124                    ttend = tt + inner->length();
1125                }
1126                if (*pp != *tt)
1127                    goto break_continue;
1128            }
1129
1130            /* Matched! */
1131            *match = pos + (t - chars) - 1;  /* -1 because of *t++ above */
1132            return true;
1133
1134          break_continue:;
1135        }
1136
1137        pos += len;
1138    }
1139
1140    *match = -1;
1141    return true;
1142}
1143
1144static JSBool
1145str_indexOf(JSContext *cx, uintN argc, Value *vp)
1146{
1147    CallArgs args = CallArgsFromVp(argc, vp);
1148    JSString *str = ThisToStringForStringProto(cx, args);
1149    if (!str)
1150        return false;
1151
1152    JSLinearString *patstr = ArgToRootedString(cx, args, 0);
1153    if (!patstr)
1154        return false;
1155
1156    jsuint textlen = str->length();
1157    const jschar *text = str->getChars(cx);
1158    if (!text)
1159        return false;
1160
1161    jsuint patlen = patstr->length();
1162    const jschar *pat = patstr->chars();
1163
1164    jsuint start;
1165    if (args.length() > 1) {
1166        if (args[1].isInt32()) {
1167            jsint i = args[1].toInt32();
1168            if (i <= 0) {
1169                start = 0;
1170            } else if (jsuint(i) > textlen) {
1171                start = textlen;
1172                textlen = 0;
1173            } else {
1174                start = i;
1175                text += start;
1176                textlen -= start;
1177            }
1178        } else {
1179            jsdouble d;
1180            if (!ToInteger(cx, args[1], &d))
1181                return false;
1182            if (d <= 0) {
1183                start = 0;
1184            } else if (d > textlen) {
1185                start = textlen;
1186                textlen = 0;
1187            } else {
1188                start = (jsint)d;
1189                text += start;
1190                textlen -= start;
1191            }
1192        }
1193    } else {
1194        start = 0;
1195    }
1196
1197    jsint match = StringMatch(text, textlen, pat, patlen);
1198    args.rval() = Int32Value((match == -1) ? -1 : start + match);
1199    return true;
1200}
1201
1202static JSBool
1203str_lastIndexOf(JSContext *cx, uintN argc, Value *vp)
1204{
1205    CallArgs args = CallArgsFromVp(argc, vp);
1206    JSString *textstr = ThisToStringForStringProto(cx, args);
1207    if (!textstr)
1208        return false;
1209
1210    size_t textlen = textstr->length();
1211    const jschar *text = textstr->getChars(cx);
1212    if (!text)
1213        return false;
1214
1215    JSLinearString *patstr = ArgToRootedString(cx, args, 0);
1216    if (!patstr)
1217        return false;
1218
1219    size_t patlen = patstr->length();
1220    const jschar *pat = patstr->chars();
1221
1222    jsint i = textlen - patlen; // Start searching here
1223    if (i < 0) {
1224        args.rval() = Int32Value(-1);
1225        return true;
1226    }
1227
1228    if (args.length() > 1) {
1229        if (args[1].isInt32()) {
1230            jsint j = args[1].toInt32();
1231            if (j <= 0)
1232                i = 0;
1233            else if (j < i)
1234                i = j;
1235        } else {
1236            double d;
1237            if (!ToNumber(cx, args[1], &d))
1238                return false;
1239            if (!JSDOUBLE_IS_NaN(d)) {
1240                d = js_DoubleToInteger(d);
1241                if (d <= 0)
1242                    i = 0;
1243                else if (d < i)
1244                    i = (jsint)d;
1245            }
1246        }
1247    }
1248
1249    if (patlen == 0) {
1250        args.rval() = Int32Value(i);
1251        return true;
1252    }
1253
1254    const jschar *t = text + i;
1255    const jschar *textend = text - 1;
1256    const jschar p0 = *pat;
1257    const jschar *patNext = pat + 1;
1258    const jschar *patEnd = pat + patlen;
1259
1260    for (; t != textend; --t) {
1261        if (*t == p0) {
1262            const jschar *t1 = t + 1;
1263            for (const jschar *p1 = patNext; p1 != patEnd; ++p1, ++t1) {
1264                if (*t1 != *p1)
1265                    goto break_continue;
1266            }
1267            args.rval() = Int32Value(t - text);
1268            return true;
1269        }
1270      break_continue:;
1271    }
1272
1273    args.rval() = Int32Value(-1);
1274    return true;
1275}
1276
1277static JSBool
1278js_TrimString(JSContext *cx, Value *vp, JSBool trimLeft, JSBool trimRight)
1279{
1280    CallReceiver call = CallReceiverFromVp(vp);
1281    JSString *str = ThisToStringForStringProto(cx, call);
1282    if (!str)
1283        return false;
1284    size_t length = str->length();
1285    const jschar *chars = str->getChars(cx);
1286    if (!chars)
1287        return false;
1288
1289    size_t begin = 0;
1290    size_t end = length;
1291
1292    if (trimLeft) {
1293        while (begin < length && unicode::IsSpace(chars[begin]))
1294            ++begin;
1295    }
1296
1297    if (trimRight) {
1298        while (end > begin && unicode::IsSpace(chars[end - 1]))
1299            --end;
1300    }
1301
1302    str = js_NewDependentString(cx, str, begin, end - begin);
1303    if (!str)
1304        return false;
1305
1306    call.rval() = StringValue(str);
1307    return true;
1308}
1309
1310static JSBool
1311str_trim(JSContext *cx, uintN argc, Value *vp)
1312{
1313    return js_TrimString(cx, vp, JS_TRUE, JS_TRUE);
1314}
1315
1316static JSBool
1317str_trimLeft(JSContext *cx, uintN argc, Value *vp)
1318{
1319    return js_TrimString(cx, vp, JS_TRUE, JS_FALSE);
1320}
1321
1322static JSBool
1323str_trimRight(JSContext *cx, uintN argc, Value *vp)
1324{
1325    return js_TrimString(cx, vp, JS_FALSE, JS_TRUE);
1326}
1327
1328/*
1329 * Perl-inspired string functions.
1330 */
1331
1332/* Result of a successfully performed flat match. */
1333class FlatMatch
1334{
1335    JSAtom       *patstr;
1336    const jschar *pat;
1337    size_t       patlen;
1338    int32_t      match_;
1339
1340    friend class RegExpGuard;
1341
1342  public:
1343    FlatMatch() : patstr(NULL) {} /* Old GCC wants this initialization. */
1344    JSLinearString *pattern() const { return patstr; }
1345    size_t patternLength() const { return patlen; }
1346
1347    /*
1348     * Note: The match is -1 when the match is performed successfully,
1349     * but no match is found.
1350     */
1351    int32_t match() const { return match_; }
1352};
1353
1354static inline bool
1355IsRegExpMetaChar(jschar c)
1356{
1357    switch (c) {
1358      /* Taken from the PatternCharacter production in 15.10.1. */
1359      case '^': case '$': case '\\': case '.': case '*': case '+':
1360      case '?': case '(': case ')': case '[': case ']': case '{':
1361      case '}': case '|':
1362        return true;
1363      default:
1364        return false;
1365    }
1366}
1367
1368static inline bool
1369HasRegExpMetaChars(const jschar *chars, size_t length)
1370{
1371    for (size_t i = 0; i < length; ++i) {
1372        if (IsRegExpMetaChar(chars[i]))
1373            return true;
1374    }
1375    return false;
1376}
1377
1378/*
1379 * RegExpGuard factors logic out of String regexp operations.
1380 *
1381 * |optarg| indicates in which argument position RegExp flags will be found, if
1382 * present. This is a Mozilla extension and not part of any ECMA spec.
1383 */
1384class RegExpGuard
1385{
1386    RegExpGuard(const RegExpGuard &) MOZ_DELETE;
1387    void operator=(const RegExpGuard &) MOZ_DELETE;
1388
1389    RegExpShared::Guard re_;
1390    FlatMatch           fm;
1391
1392    /*
1393     * Upper bound on the number of characters we are willing to potentially
1394     * waste on searching for RegExp meta-characters.
1395     */
1396    static const size_t MAX_FLAT_PAT_LEN = 256;
1397
1398    static JSAtom *
1399    flattenPattern(JSContext *cx, JSAtom *patstr)
1400    {
1401        StringBuffer sb(cx);
1402        if (!sb.reserve(patstr->length()))
1403            return NULL;
1404
1405        static const jschar ESCAPE_CHAR = '\\';
1406        const jschar *chars = patstr->chars();
1407        size_t len = patstr->length();
1408        for (const jschar *it = chars; it != chars + len; ++it) {
1409            if (IsRegExpMetaChar(*it)) {
1410                if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
1411                    return NULL;
1412            } else {
1413                if (!sb.append(*it))
1414                    return NULL;
1415            }
1416        }
1417        return sb.finishAtom();
1418    }
1419
1420  public:
1421    RegExpGuard() {}
1422
1423    /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
1424    bool init(JSContext *cx, CallArgs args, bool convertVoid = false)
1425    {
1426        if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
1427            RegExpShared *shared = RegExpToShared(cx, args[0].toObject());
1428            if (!shared)
1429                return false;
1430            re_.init(*shared);
1431        } else {
1432            if (convertVoid && (args.length() == 0 || args[0].isUndefined())) {
1433                fm.patstr = cx->runtime->emptyString;
1434                return true;
1435            }
1436
1437            JSString *arg = ArgToRootedString(cx, args, 0);
1438            if (!arg)
1439                return false;
1440
1441            fm.patstr = js_AtomizeString(cx, arg);
1442            if (!fm.patstr)
1443                return false;
1444        }
1445        return true;
1446    }
1447
1448    /*
1449     * Attempt to match |patstr| to |textstr|. A flags argument, metachars in the
1450     * pattern string, or a lengthy pattern string can thwart this process.
1451     *
1452     * |checkMetaChars| looks for regexp metachars in the pattern string.
1453     *
1454     * Return whether flat matching could be used.
1455     *
1456     * N.B. tryFlatMatch returns NULL on OOM, so the caller must check cx->isExceptionPending().
1457     */
1458    const FlatMatch *
1459    tryFlatMatch(JSContext *cx, JSString *textstr, uintN optarg, uintN argc,
1460                 bool checkMetaChars = true)
1461    {
1462        if (re_.initialized())
1463            return NULL;
1464
1465        fm.pat = fm.patstr->chars();
1466        fm.patlen = fm.patstr->length();
1467
1468        if (optarg < argc)
1469            return NULL;
1470
1471        if (checkMetaChars &&
1472            (fm.patlen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat, fm.patlen))) {
1473            return NULL;
1474        }
1475
1476        /*
1477         * textstr could be a rope, so we want to avoid flattening it for as
1478         * long as possible.
1479         */
1480        if (textstr->isRope()) {
1481            if (!RopeMatch(cx, textstr, fm.pat, fm.patlen, &fm.match_))
1482                return NULL;
1483        } else {
1484            const jschar *text = textstr->asLinear().chars();
1485            size_t textlen = textstr->length();
1486            fm.match_ = StringMatch(text, textlen, fm.pat, fm.patlen);
1487        }
1488        return &fm;
1489    }
1490
1491    /* If the pattern is not already a regular expression, make it so. */
1492    bool normalizeRegExp(JSContext *cx, bool flat, uintN optarg, CallArgs args)
1493    {
1494        if (re_.initialized())
1495            return true;
1496
1497        /* Build RegExp from pattern string. */
1498        JSString *opt;
1499        if (optarg < args.length()) {
1500            opt = ToString(cx, args[optarg]);
1501            if (!opt)
1502                return false;
1503        } else {
1504            opt = NULL;
1505        }
1506
1507        JSAtom *patstr;
1508        if (flat) {
1509            patstr = flattenPattern(cx, fm.patstr);
1510            if (!patstr)
1511                return false;
1512        } else {
1513            patstr = fm.patstr;
1514        }
1515        JS_ASSERT(patstr);
1516
1517        RegExpShared *re = cx->compartment->regExps.get(cx, patstr, opt);
1518        if (!re)
1519            return false;
1520
1521        re_.init(*re);
1522        return true;
1523    }
1524
1525    RegExpShared &regExp() { return *re_; }
1526};
1527
1528/* ExecuteRegExp indicates success in two ways, based on the 'test' flag. */
1529static JS_ALWAYS_INLINE bool
1530Matched(RegExpExecType type, const Value &v)
1531{
1532    return (type == RegExpTest) ? v.isTrue() : !v.isNull();
1533}
1534
1535typedef bool (*DoMatchCallback)(JSContext *cx, RegExpStatics *res, size_t count, void *data);
1536
1537/*
1538 * BitOR-ing these flags allows the DoMatch caller to control when how the
1539 * RegExp engine is called and when callbacks are fired.
1540 */
1541enum MatchControlFlags {
1542   TEST_GLOBAL_BIT         = 0x1, /* use RegExp.test for global regexps */
1543   TEST_SINGLE_BIT         = 0x2, /* use RegExp.test for non-global regexps */
1544   CALLBACK_ON_SINGLE_BIT  = 0x4, /* fire callback on non-global match */
1545
1546   MATCH_ARGS    = TEST_GLOBAL_BIT,
1547   MATCHALL_ARGS = CALLBACK_ON_SINGLE_BIT,
1548   REPLACE_ARGS  = TEST_GLOBAL_BIT | TEST_SINGLE_BIT | CALLBACK_ON_SINGLE_BIT
1549};
1550
1551/* Factor out looping and matching logic. */
1552static bool
1553DoMatch(JSContext *cx, RegExpStatics *res, JSString *str, RegExpShared &re,
1554        DoMatchCallback callback, void *data, MatchControlFlags flags, Value *rval)
1555{
1556    JSLinearString *linearStr = str->ensureLinear(cx);
1557    if (!linearStr)
1558        return false;
1559
1560    const jschar *chars = linearStr->chars();
1561    size_t length = linearStr->length();
1562
1563    if (re.global()) {
1564        RegExpExecType type = (flags & TEST_GLOBAL_BIT) ? RegExpTest : RegExpExec;
1565        for (size_t count = 0, i = 0, length = str->length(); i <= length; ++count) {
1566            if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
1567                return false;
1568            if (!Matched(type, *rval))
1569                break;
1570            if (!callback(cx, res, count, data))
1571                return false;
1572            if (!res->matched())
1573                ++i;
1574        }
1575    } else {
1576        RegExpExecType type = (flags & TEST_SINGLE_BIT) ? RegExpTest : RegExpExec;
1577        bool callbackOnSingle = !!(flags & CALLBACK_ON_SINGLE_BIT);
1578        size_t i = 0;
1579        if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
1580            return false;
1581        if (callbackOnSingle && Matched(type, *rval) && !callback(cx, res, 0, data))
1582            return false;
1583    }
1584    return true;
1585}
1586
1587static bool
1588BuildFlatMatchArray(JSContext *cx, JSString *textstr, const FlatMatch &fm, CallArgs *args)
1589{
1590    if (fm.match() < 0) {
1591        args->rval() = NullValue();
1592        return true;
1593    }
1594
1595    /* For this non-global match, produce a RegExp.exec-style array. */
1596    JSObject *obj = NewSlowEmptyArray(cx);
1597    if (!obj)
1598        return false;
1599
1600    if (!obj->defineElement(cx, 0, StringValue(fm.pattern())) ||
1601        !obj->defineProperty(cx, cx->runtime->atomState.indexAtom, Int32Value(fm.match())) ||
1602        !obj->defineProperty(cx, cx->runtime->atomState.inputAtom, StringValue(textstr)))
1603    {
1604        return false;
1605    }
1606
1607    args->rval() = ObjectValue(*obj);
1608    return true;
1609}
1610
1611typedef JSObject **MatchArgType;
1612
1613/*
1614 * DoMatch will only callback on global matches, hence this function builds
1615 * only the "array of matches" returned by match on global regexps.
1616 */
1617static bool
1618MatchCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
1619{
1620    JS_ASSERT(count <= JSID_INT_MAX);  /* by max string length */
1621
1622    JSObject *&arrayobj = *static_cast<MatchArgType>(p);
1623    if (!arrayobj) {
1624        arrayobj = NewDenseEmptyArray(cx);
1625        if (!arrayobj)
1626            return false;
1627    }
1628
1629    Value v;
1630    return res->createLastMatch(cx, &v) && arrayobj->defineElement(cx, count, v);
1631}
1632
1633JSBool
1634js::str_match(JSContext *cx, uintN argc, Value *vp)
1635{
1636    CallArgs args = CallArgsFromVp(argc, vp);
1637    JSString *str = ThisToStringForStringProto(cx, args);
1638    if (!str)
1639        return false;
1640
1641    RegExpGuard g;
1642    if (!g.init(cx, args, true))
1643        return false;
1644
1645    if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length()))
1646        return BuildFlatMatchArray(cx, str, *fm, &args);
1647
1648    /* Return if there was an error in tryFlatMatch. */
1649    if (cx->isExceptionPending())
1650        return false;
1651
1652    if (!g.normalizeRegExp(cx, false, 1, args))
1653        return false;
1654
1655    JSObject *array = NULL;
1656    MatchArgType arg = &array;
1657    RegExpStatics *res = cx->regExpStatics();
1658    Value rval;
1659    if (!DoMatch(cx, res, str, g.regExp(), MatchCallback, arg, MATCH_ARGS, &rval))
1660        return false;
1661
1662    if (g.regExp().global())
1663        args.rval() = ObjectOrNullValue(array);
1664    else
1665        args.rval() = rval;
1666    return true;
1667}
1668
1669JSBool
1670js::str_search(JSContext *cx, uintN argc, Value *vp)
1671{
1672    CallArgs args = CallArgsFromVp(argc, vp);
1673    JSString *str = ThisToStringForStringProto(cx, args);
1674    if (!str)
1675        return false;
1676
1677    RegExpGuard g;
1678    if (!g.init(cx, args, true))
1679        return false;
1680    if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length())) {
1681        args.rval() = Int32Value(fm->match());
1682        return true;
1683    }
1684
1685    if (cx->isExceptionPending())  /* from tryFlatMatch */
1686        return false;
1687
1688    if (!g.normalizeRegExp(cx, false, 1, args))
1689        return false;
1690
1691    JSLinearString *linearStr = str->ensureLinear(cx);
1692    if (!linearStr)
1693        return false;
1694
1695    const jschar *chars = linearStr->chars();
1696    size_t length = linearStr->length();
1697    RegExpStatics *res = cx->regExpStatics();
1698
1699    /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
1700    size_t i = 0;
1701    Value result;
1702    if (!ExecuteRegExp(cx, res, g.regExp(), linearStr, chars, length, &i, RegExpTest, &result))
1703        return false;
1704
1705    if (result.isTrue())
1706        args.rval() = Int32Value(res->matchStart());
1707    else
1708        args.rval() = Int32Value(-1);
1709    return true;
1710}
1711
1712struct ReplaceData
1713{
1714    ReplaceData(JSContext *cx)
1715     : sb(cx)
1716    {}
1717
1718    JSString           *str;           /* 'this' parameter object as a string */
1719    RegExpGuard        g;              /* regexp parameter object and private data */
1720    JSObject           *lambda;        /* replacement function object or null */
1721    JSObject           *elembase;      /* object for function(a){return b[a]} replace */
1722    JSLinearString     *repstr;        /* replacement string */
1723    const jschar       *dollar;        /* null or pointer to first $ in repstr */
1724    const jschar       *dollarEnd;     /* limit pointer for js_strchr_limit */
1725    jsint              leftIndex;      /* left context index in str->chars */
1726    JSSubString        dollarStr;      /* for "$$" InterpretDollar result */
1727    bool               calledBack;     /* record whether callback has been called */
1728    InvokeArgsGuard    args;           /* arguments for lambda call */
1729    StringBuffer       sb;             /* buffer built during DoMatch */
1730};
1731
1732static bool
1733InterpretDollar(JSContext *cx, RegExpStatics *res, const jschar *dp, const jschar *ep,
1734                ReplaceData &rdata, JSSubString *out, size_t *skip)
1735{
1736   

Large files files are truncated, but you can click here to view the full file