/js/src/jsstr.cpp

http://github.com/zpao/v8monkey · C++ · 4356 lines · 3507 code · 476 blank · 373 comment · 684 complexity · fbf53656a39e95829f89c223c92027e4 MD5 · raw file

Large files are truncated click here to view the full file

  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  2. * vim: set ts=8 sw=4 et tw=99:
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is Mozilla Communicator client code, released
  18. * March 31, 1998.
  19. *
  20. * The Initial Developer of the Original Code is
  21. * Netscape Communications Corporation.
  22. * Portions created by the Initial Developer are Copyright (C) 1998
  23. * the Initial Developer. All Rights Reserved.
  24. *
  25. * Contributor(s):
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the MPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the MPL, the GPL or the LGPL.
  38. *
  39. * ***** END LICENSE BLOCK ***** */
  40. /*
  41. * JS string type implementation.
  42. *
  43. * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
  44. * native methods store strings (possibly newborn) converted from their 'this'
  45. * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
  46. * conversions at their index (argv[0], argv[1]). This is a legitimate method
  47. * of rooting things that might lose their newborn root due to subsequent GC
  48. * allocations in the same native method.
  49. */
  50. #include "mozilla/Attributes.h"
  51. #include <stdlib.h>
  52. #include <string.h>
  53. #include "jstypes.h"
  54. #include "jsutil.h"
  55. #include "jshash.h"
  56. #include "jsprf.h"
  57. #include "jsapi.h"
  58. #include "jsarray.h"
  59. #include "jsatom.h"
  60. #include "jsbool.h"
  61. #include "jscntxt.h"
  62. #include "jsgc.h"
  63. #include "jsinterp.h"
  64. #include "jslock.h"
  65. #include "jsnum.h"
  66. #include "jsobj.h"
  67. #include "jsopcode.h"
  68. #include "jsprobes.h"
  69. #include "jsscope.h"
  70. #include "jsstr.h"
  71. #include "jsversion.h"
  72. #include "builtin/RegExp.h"
  73. #include "vm/GlobalObject.h"
  74. #include "vm/RegExpObject.h"
  75. #include "jsinferinlines.h"
  76. #include "jsobjinlines.h"
  77. #include "jsautooplen.h" // generated headers last
  78. #include "vm/RegExpObject-inl.h"
  79. #include "vm/RegExpStatics-inl.h"
  80. #include "vm/StringObject-inl.h"
  81. #include "vm/String-inl.h"
  82. using namespace js;
  83. using namespace js::gc;
  84. using namespace js::types;
  85. using namespace js::unicode;
  86. static JSLinearString *
  87. ArgToRootedString(JSContext *cx, CallArgs &args, uintN argno)
  88. {
  89. if (argno >= args.length())
  90. return cx->runtime->atomState.typeAtoms[JSTYPE_VOID];
  91. Value &arg = args[argno];
  92. JSString *str = ToString(cx, arg);
  93. if (!str)
  94. return NULL;
  95. arg = StringValue(str);
  96. return str->ensureLinear(cx);
  97. }
  98. /*
  99. * Forward declarations for URI encode/decode and helper routines
  100. */
  101. static JSBool
  102. str_decodeURI(JSContext *cx, uintN argc, Value *vp);
  103. static JSBool
  104. str_decodeURI_Component(JSContext *cx, uintN argc, Value *vp);
  105. static JSBool
  106. str_encodeURI(JSContext *cx, uintN argc, Value *vp);
  107. static JSBool
  108. str_encodeURI_Component(JSContext *cx, uintN argc, Value *vp);
  109. static const uint32_t INVALID_UTF8 = UINT32_MAX;
  110. static uint32_t
  111. Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length);
  112. /*
  113. * Global string methods
  114. */
  115. /* ES5 B.2.1 */
  116. static JSBool
  117. str_escape(JSContext *cx, uintN argc, Value *vp)
  118. {
  119. CallArgs args = CallArgsFromVp(argc, vp);
  120. const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
  121. '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
  122. JSLinearString *str = ArgToRootedString(cx, args, 0);
  123. if (!str)
  124. return false;
  125. size_t length = str->length();
  126. const jschar *chars = str->chars();
  127. static const uint8_t shouldPassThrough[256] = {
  128. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  129. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  130. 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
  131. 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
  132. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
  133. 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
  134. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
  135. 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
  136. };
  137. /* In step 7, exactly 69 characters should pass through unencoded. */
  138. #ifdef DEBUG
  139. size_t count = 0;
  140. for (size_t i = 0; i < sizeof(shouldPassThrough); i++) {
  141. if (shouldPassThrough[i]) {
  142. count++;
  143. }
  144. }
  145. JS_ASSERT(count == 69);
  146. #endif
  147. /* Take a first pass and see how big the result string will need to be. */
  148. size_t newlength = length;
  149. for (size_t i = 0; i < length; i++) {
  150. jschar ch = chars[i];
  151. if (ch < 128 && shouldPassThrough[ch])
  152. continue;
  153. /* The character will be encoded as %XX or %uXXXX. */
  154. newlength += (ch < 256) ? 2 : 5;
  155. /*
  156. * This overflow test works because newlength is incremented by at
  157. * most 5 on each iteration.
  158. */
  159. if (newlength < length) {
  160. js_ReportAllocationOverflow(cx);
  161. return false;
  162. }
  163. }
  164. if (newlength >= ~(size_t)0 / sizeof(jschar)) {
  165. js_ReportAllocationOverflow(cx);
  166. return false;
  167. }
  168. jschar *newchars = (jschar *) cx->malloc_((newlength + 1) * sizeof(jschar));
  169. if (!newchars)
  170. return false;
  171. size_t i, ni;
  172. for (i = 0, ni = 0; i < length; i++) {
  173. jschar ch = chars[i];
  174. if (ch < 128 && shouldPassThrough[ch]) {
  175. newchars[ni++] = ch;
  176. } else if (ch < 256) {
  177. newchars[ni++] = '%';
  178. newchars[ni++] = digits[ch >> 4];
  179. newchars[ni++] = digits[ch & 0xF];
  180. } else {
  181. newchars[ni++] = '%';
  182. newchars[ni++] = 'u';
  183. newchars[ni++] = digits[ch >> 12];
  184. newchars[ni++] = digits[(ch & 0xF00) >> 8];
  185. newchars[ni++] = digits[(ch & 0xF0) >> 4];
  186. newchars[ni++] = digits[ch & 0xF];
  187. }
  188. }
  189. JS_ASSERT(ni == newlength);
  190. newchars[newlength] = 0;
  191. JSString *retstr = js_NewString(cx, newchars, newlength);
  192. if (!retstr) {
  193. cx->free_(newchars);
  194. return false;
  195. }
  196. args.rval() = StringValue(retstr);
  197. return true;
  198. }
  199. static inline bool
  200. Unhex4(const jschar *chars, jschar *result)
  201. {
  202. jschar a = chars[0],
  203. b = chars[1],
  204. c = chars[2],
  205. d = chars[3];
  206. if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
  207. return false;
  208. *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
  209. return true;
  210. }
  211. static inline bool
  212. Unhex2(const jschar *chars, jschar *result)
  213. {
  214. jschar a = chars[0],
  215. b = chars[1];
  216. if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
  217. return false;
  218. *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
  219. return true;
  220. }
  221. /* ES5 B.2.2 */
  222. static JSBool
  223. str_unescape(JSContext *cx, uintN argc, Value *vp)
  224. {
  225. CallArgs args = CallArgsFromVp(argc, vp);
  226. /* Step 1. */
  227. JSLinearString *str = ArgToRootedString(cx, args, 0);
  228. if (!str)
  229. return false;
  230. /* Step 2. */
  231. size_t length = str->length();
  232. const jschar *chars = str->chars();
  233. /* Step 3. */
  234. StringBuffer sb(cx);
  235. /*
  236. * Note that the spec algorithm has been optimized to avoid building
  237. * a string in the case where no escapes are present.
  238. */
  239. /* Step 4. */
  240. size_t k = 0;
  241. bool building = false;
  242. while (true) {
  243. /* Step 5. */
  244. if (k == length) {
  245. JSLinearString *result;
  246. if (building) {
  247. result = sb.finishString();
  248. if (!result)
  249. return false;
  250. } else {
  251. result = str;
  252. }
  253. args.rval() = StringValue(result);
  254. return true;
  255. }
  256. /* Step 6. */
  257. jschar c = chars[k];
  258. /* Step 7. */
  259. if (c != '%')
  260. goto step_18;
  261. /* Step 8. */
  262. if (k > length - 6)
  263. goto step_14;
  264. /* Step 9. */
  265. if (chars[k + 1] != 'u')
  266. goto step_14;
  267. #define ENSURE_BUILDING \
  268. JS_BEGIN_MACRO \
  269. if (!building) { \
  270. building = true; \
  271. if (!sb.reserve(length)) \
  272. return false; \
  273. sb.infallibleAppend(chars, chars + k); \
  274. } \
  275. JS_END_MACRO
  276. /* Step 10-13. */
  277. if (Unhex4(&chars[k + 2], &c)) {
  278. ENSURE_BUILDING;
  279. k += 5;
  280. goto step_18;
  281. }
  282. step_14:
  283. /* Step 14. */
  284. if (k > length - 3)
  285. goto step_18;
  286. /* Step 15-17. */
  287. if (Unhex2(&chars[k + 1], &c)) {
  288. ENSURE_BUILDING;
  289. k += 2;
  290. }
  291. step_18:
  292. if (building)
  293. sb.infallibleAppend(c);
  294. /* Step 19. */
  295. k += 1;
  296. }
  297. #undef ENSURE_BUILDING
  298. }
  299. #if JS_HAS_UNEVAL
  300. static JSBool
  301. str_uneval(JSContext *cx, uintN argc, Value *vp)
  302. {
  303. CallArgs args = CallArgsFromVp(argc, vp);
  304. JSString *str = js_ValueToSource(cx, args.length() != 0 ? args[0] : UndefinedValue());
  305. if (!str)
  306. return false;
  307. args.rval() = StringValue(str);
  308. return true;
  309. }
  310. #endif
  311. const char js_escape_str[] = "escape";
  312. const char js_unescape_str[] = "unescape";
  313. #if JS_HAS_UNEVAL
  314. const char js_uneval_str[] = "uneval";
  315. #endif
  316. const char js_decodeURI_str[] = "decodeURI";
  317. const char js_encodeURI_str[] = "encodeURI";
  318. const char js_decodeURIComponent_str[] = "decodeURIComponent";
  319. const char js_encodeURIComponent_str[] = "encodeURIComponent";
  320. static JSFunctionSpec string_functions[] = {
  321. JS_FN(js_escape_str, str_escape, 1,0),
  322. JS_FN(js_unescape_str, str_unescape, 1,0),
  323. #if JS_HAS_UNEVAL
  324. JS_FN(js_uneval_str, str_uneval, 1,0),
  325. #endif
  326. JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
  327. JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
  328. JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
  329. JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
  330. JS_FS_END
  331. };
  332. jschar js_empty_ucstr[] = {0};
  333. JSSubString js_EmptySubString = {0, js_empty_ucstr};
  334. static const uintN STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
  335. static JSBool
  336. str_enumerate(JSContext *cx, JSObject *obj)
  337. {
  338. JSString *str = obj->getPrimitiveThis().toString();
  339. for (size_t i = 0, length = str->length(); i < length; i++) {
  340. JSString *str1 = js_NewDependentString(cx, str, i, 1);
  341. if (!str1)
  342. return false;
  343. if (!obj->defineElement(cx, i, StringValue(str1),
  344. JS_PropertyStub, JS_StrictPropertyStub,
  345. STRING_ELEMENT_ATTRS)) {
  346. return false;
  347. }
  348. }
  349. return true;
  350. }
  351. static JSBool
  352. str_resolve(JSContext *cx, JSObject *obj, jsid id, uintN flags,
  353. JSObject **objp)
  354. {
  355. if (!JSID_IS_INT(id))
  356. return JS_TRUE;
  357. JSString *str = obj->getPrimitiveThis().toString();
  358. jsint slot = JSID_TO_INT(id);
  359. if ((size_t)slot < str->length()) {
  360. JSString *str1 = cx->runtime->staticStrings.getUnitStringForElement(cx, str, size_t(slot));
  361. if (!str1)
  362. return JS_FALSE;
  363. if (!obj->defineElement(cx, uint32_t(slot), StringValue(str1), NULL, NULL,
  364. STRING_ELEMENT_ATTRS)) {
  365. return JS_FALSE;
  366. }
  367. *objp = obj;
  368. }
  369. return JS_TRUE;
  370. }
  371. Class js::StringClass = {
  372. js_String_str,
  373. JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
  374. JSCLASS_NEW_RESOLVE | JSCLASS_HAS_CACHED_PROTO(JSProto_String),
  375. JS_PropertyStub, /* addProperty */
  376. JS_PropertyStub, /* delProperty */
  377. JS_PropertyStub, /* getProperty */
  378. JS_StrictPropertyStub, /* setProperty */
  379. str_enumerate,
  380. (JSResolveOp)str_resolve,
  381. JS_ConvertStub
  382. };
  383. /*
  384. * Returns a JSString * for the |this| value associated with 'call', or throws
  385. * a TypeError if |this| is null or undefined. This algorithm is the same as
  386. * calling CheckObjectCoercible(this), then returning ToString(this), as all
  387. * String.prototype.* methods do (other than toString and valueOf).
  388. */
  389. static JS_ALWAYS_INLINE JSString *
  390. ThisToStringForStringProto(JSContext *cx, CallReceiver call)
  391. {
  392. JS_CHECK_RECURSION(cx, return NULL);
  393. if (call.thisv().isString())
  394. return call.thisv().toString();
  395. if (call.thisv().isObject()) {
  396. JSObject *obj = &call.thisv().toObject();
  397. if (obj->isString() &&
  398. ClassMethodIsNative(cx, obj,
  399. &StringClass,
  400. ATOM_TO_JSID(cx->runtime->atomState.toStringAtom),
  401. js_str_toString))
  402. {
  403. call.thisv() = obj->getPrimitiveThis();
  404. return call.thisv().toString();
  405. }
  406. } else if (call.thisv().isNullOrUndefined()) {
  407. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_CONVERT_TO,
  408. call.thisv().isNull() ? "null" : "undefined", "object");
  409. return NULL;
  410. }
  411. JSString *str = ToStringSlow(cx, call.thisv());
  412. if (!str)
  413. return NULL;
  414. call.thisv().setString(str);
  415. return str;
  416. }
  417. #if JS_HAS_TOSOURCE
  418. /*
  419. * String.prototype.quote is generic (as are most string methods), unlike
  420. * toSource, toString, and valueOf.
  421. */
  422. static JSBool
  423. str_quote(JSContext *cx, uintN argc, Value *vp)
  424. {
  425. CallArgs args = CallArgsFromVp(argc, vp);
  426. JSString *str = ThisToStringForStringProto(cx, args);
  427. if (!str)
  428. return false;
  429. str = js_QuoteString(cx, str, '"');
  430. if (!str)
  431. return false;
  432. args.rval() = StringValue(str);
  433. return true;
  434. }
  435. static JSBool
  436. str_toSource(JSContext *cx, uintN argc, Value *vp)
  437. {
  438. CallArgs args = CallArgsFromVp(argc, vp);
  439. JSString *str;
  440. bool ok;
  441. if (!BoxedPrimitiveMethodGuard(cx, args, str_toSource, &str, &ok))
  442. return ok;
  443. str = js_QuoteString(cx, str, '"');
  444. if (!str)
  445. return false;
  446. StringBuffer sb(cx);
  447. if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
  448. return false;
  449. str = sb.finishString();
  450. if (!str)
  451. return false;
  452. args.rval() = StringValue(str);
  453. return true;
  454. }
  455. #endif /* JS_HAS_TOSOURCE */
  456. JSBool
  457. js_str_toString(JSContext *cx, uintN argc, Value *vp)
  458. {
  459. CallArgs args = CallArgsFromVp(argc, vp);
  460. JSString *str;
  461. bool ok;
  462. if (!BoxedPrimitiveMethodGuard(cx, args, js_str_toString, &str, &ok))
  463. return ok;
  464. args.rval() = StringValue(str);
  465. return true;
  466. }
  467. /*
  468. * Java-like string native methods.
  469. */
  470. JS_ALWAYS_INLINE bool
  471. ValueToIntegerRange(JSContext *cx, const Value &v, int32_t *out)
  472. {
  473. if (v.isInt32()) {
  474. *out = v.toInt32();
  475. } else {
  476. double d;
  477. if (!ToInteger(cx, v, &d))
  478. return false;
  479. if (d > INT32_MAX)
  480. *out = INT32_MAX;
  481. else if (d < INT32_MIN)
  482. *out = INT32_MIN;
  483. else
  484. *out = int32_t(d);
  485. }
  486. return true;
  487. }
  488. static JSBool
  489. str_substring(JSContext *cx, uintN argc, Value *vp)
  490. {
  491. CallArgs args = CallArgsFromVp(argc, vp);
  492. JSString *str = ThisToStringForStringProto(cx, args);
  493. if (!str)
  494. return false;
  495. int32_t length, begin, end;
  496. if (args.length() > 0) {
  497. end = length = int32_t(str->length());
  498. if (!ValueToIntegerRange(cx, args[0], &begin))
  499. return false;
  500. if (begin < 0)
  501. begin = 0;
  502. else if (begin > length)
  503. begin = length;
  504. if (args.length() > 1 && !args[1].isUndefined()) {
  505. if (!ValueToIntegerRange(cx, args[1], &end))
  506. return false;
  507. if (end > length) {
  508. end = length;
  509. } else {
  510. if (end < 0)
  511. end = 0;
  512. if (end < begin) {
  513. int32_t tmp = begin;
  514. begin = end;
  515. end = tmp;
  516. }
  517. }
  518. }
  519. str = js_NewDependentString(cx, str, size_t(begin), size_t(end - begin));
  520. if (!str)
  521. return false;
  522. }
  523. args.rval() = StringValue(str);
  524. return true;
  525. }
  526. JSString* JS_FASTCALL
  527. js_toLowerCase(JSContext *cx, JSString *str)
  528. {
  529. size_t n = str->length();
  530. const jschar *s = str->getChars(cx);
  531. if (!s)
  532. return NULL;
  533. jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
  534. if (!news)
  535. return NULL;
  536. for (size_t i = 0; i < n; i++)
  537. news[i] = unicode::ToLowerCase(s[i]);
  538. news[n] = 0;
  539. str = js_NewString(cx, news, n);
  540. if (!str) {
  541. cx->free_(news);
  542. return NULL;
  543. }
  544. return str;
  545. }
  546. static inline bool
  547. ToLowerCaseHelper(JSContext *cx, CallReceiver call)
  548. {
  549. JSString *str = ThisToStringForStringProto(cx, call);
  550. if (!str)
  551. return false;
  552. str = js_toLowerCase(cx, str);
  553. if (!str)
  554. return false;
  555. call.rval() = StringValue(str);
  556. return true;
  557. }
  558. static JSBool
  559. str_toLowerCase(JSContext *cx, uintN argc, Value *vp)
  560. {
  561. return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
  562. }
  563. static JSBool
  564. str_toLocaleLowerCase(JSContext *cx, uintN argc, Value *vp)
  565. {
  566. CallArgs args = CallArgsFromVp(argc, vp);
  567. /*
  568. * Forcefully ignore the first (or any) argument and return toLowerCase(),
  569. * ECMA has reserved that argument, presumably for defining the locale.
  570. */
  571. if (cx->localeCallbacks && cx->localeCallbacks->localeToLowerCase) {
  572. JSString *str = ThisToStringForStringProto(cx, args);
  573. if (!str)
  574. return false;
  575. Value result;
  576. if (!cx->localeCallbacks->localeToLowerCase(cx, str, &result))
  577. return false;
  578. args.rval() = result;
  579. return true;
  580. }
  581. return ToLowerCaseHelper(cx, args);
  582. }
  583. JSString* JS_FASTCALL
  584. js_toUpperCase(JSContext *cx, JSString *str)
  585. {
  586. size_t n = str->length();
  587. const jschar *s = str->getChars(cx);
  588. if (!s)
  589. return NULL;
  590. jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
  591. if (!news)
  592. return NULL;
  593. for (size_t i = 0; i < n; i++)
  594. news[i] = unicode::ToUpperCase(s[i]);
  595. news[n] = 0;
  596. str = js_NewString(cx, news, n);
  597. if (!str) {
  598. cx->free_(news);
  599. return NULL;
  600. }
  601. return str;
  602. }
  603. static JSBool
  604. ToUpperCaseHelper(JSContext *cx, CallReceiver call)
  605. {
  606. JSString *str = ThisToStringForStringProto(cx, call);
  607. if (!str)
  608. return false;
  609. str = js_toUpperCase(cx, str);
  610. if (!str)
  611. return false;
  612. call.rval() = StringValue(str);
  613. return true;
  614. }
  615. static JSBool
  616. str_toUpperCase(JSContext *cx, uintN argc, Value *vp)
  617. {
  618. return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
  619. }
  620. static JSBool
  621. str_toLocaleUpperCase(JSContext *cx, uintN argc, Value *vp)
  622. {
  623. CallArgs args = CallArgsFromVp(argc, vp);
  624. /*
  625. * Forcefully ignore the first (or any) argument and return toUpperCase(),
  626. * ECMA has reserved that argument, presumably for defining the locale.
  627. */
  628. if (cx->localeCallbacks && cx->localeCallbacks->localeToUpperCase) {
  629. JSString *str = ThisToStringForStringProto(cx, args);
  630. if (!str)
  631. return false;
  632. Value result;
  633. if (!cx->localeCallbacks->localeToUpperCase(cx, str, &result))
  634. return false;
  635. args.rval() = result;
  636. return true;
  637. }
  638. return ToUpperCaseHelper(cx, args);
  639. }
  640. static JSBool
  641. str_localeCompare(JSContext *cx, uintN argc, Value *vp)
  642. {
  643. CallArgs args = CallArgsFromVp(argc, vp);
  644. JSString *str = ThisToStringForStringProto(cx, args);
  645. if (!str)
  646. return false;
  647. if (args.length() == 0) {
  648. args.rval() = Int32Value(0);
  649. } else {
  650. JSString *thatStr = ToString(cx, args[0]);
  651. if (!thatStr)
  652. return false;
  653. if (cx->localeCallbacks && cx->localeCallbacks->localeCompare) {
  654. args[0].setString(thatStr);
  655. Value result;
  656. if (!cx->localeCallbacks->localeCompare(cx, str, thatStr, &result))
  657. return true;
  658. args.rval() = result;
  659. return true;
  660. }
  661. int32_t result;
  662. if (!CompareStrings(cx, str, thatStr, &result))
  663. return false;
  664. args.rval() = Int32Value(result);
  665. }
  666. return true;
  667. }
  668. JSBool
  669. js_str_charAt(JSContext *cx, uintN argc, Value *vp)
  670. {
  671. CallArgs args = CallArgsFromVp(argc, vp);
  672. JSString *str;
  673. size_t i;
  674. if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
  675. str = args.thisv().toString();
  676. i = size_t(args[0].toInt32());
  677. if (i >= str->length())
  678. goto out_of_range;
  679. } else {
  680. str = ThisToStringForStringProto(cx, args);
  681. if (!str)
  682. return false;
  683. double d = 0.0;
  684. if (args.length() > 0 && !ToInteger(cx, args[0], &d))
  685. return false;
  686. if (d < 0 || str->length() <= d)
  687. goto out_of_range;
  688. i = size_t(d);
  689. }
  690. str = cx->runtime->staticStrings.getUnitStringForElement(cx, str, i);
  691. if (!str)
  692. return false;
  693. args.rval() = StringValue(str);
  694. return true;
  695. out_of_range:
  696. args.rval() = StringValue(cx->runtime->emptyString);
  697. return true;
  698. }
  699. JSBool
  700. js_str_charCodeAt(JSContext *cx, uintN argc, Value *vp)
  701. {
  702. CallArgs args = CallArgsFromVp(argc, vp);
  703. JSString *str;
  704. size_t i;
  705. if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
  706. str = args.thisv().toString();
  707. i = size_t(args[0].toInt32());
  708. if (i >= str->length())
  709. goto out_of_range;
  710. } else {
  711. str = ThisToStringForStringProto(cx, args);
  712. if (!str)
  713. return false;
  714. double d = 0.0;
  715. if (args.length() > 0 && !ToInteger(cx, args[0], &d))
  716. return false;
  717. if (d < 0 || str->length() <= d)
  718. goto out_of_range;
  719. i = size_t(d);
  720. }
  721. const jschar *chars;
  722. chars = str->getChars(cx);
  723. if (!chars)
  724. return false;
  725. args.rval() = Int32Value(chars[i]);
  726. return true;
  727. out_of_range:
  728. args.rval() = DoubleValue(js_NaN);
  729. return true;
  730. }
  731. /*
  732. * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
  733. * The patlen argument must be positive and no greater than sBMHPatLenMax.
  734. *
  735. * Return the index of pat in text, or -1 if not found.
  736. */
  737. static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
  738. static const jsuint sBMHPatLenMax = 255; /* skip table element is uint8_t */
  739. static const jsint sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
  740. jsint
  741. js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
  742. const jschar *pat, jsuint patlen)
  743. {
  744. uint8_t skip[sBMHCharSetSize];
  745. JS_ASSERT(0 < patlen && patlen <= sBMHPatLenMax);
  746. for (jsuint i = 0; i < sBMHCharSetSize; i++)
  747. skip[i] = (uint8_t)patlen;
  748. jsuint m = patlen - 1;
  749. for (jsuint i = 0; i < m; i++) {
  750. jschar c = pat[i];
  751. if (c >= sBMHCharSetSize)
  752. return sBMHBadPattern;
  753. skip[c] = (uint8_t)(m - i);
  754. }
  755. jschar c;
  756. for (jsuint k = m;
  757. k < textlen;
  758. k += ((c = text[k]) >= sBMHCharSetSize) ? patlen : skip[c]) {
  759. for (jsuint i = k, j = m; ; i--, j--) {
  760. if (text[i] != pat[j])
  761. break;
  762. if (j == 0)
  763. return static_cast<jsint>(i); /* safe: max string size */
  764. }
  765. }
  766. return -1;
  767. }
  768. struct MemCmp {
  769. typedef jsuint Extent;
  770. static JS_ALWAYS_INLINE Extent computeExtent(const jschar *, jsuint patlen) {
  771. return (patlen - 1) * sizeof(jschar);
  772. }
  773. static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
  774. return memcmp(p, t, extent) == 0;
  775. }
  776. };
  777. struct ManualCmp {
  778. typedef const jschar *Extent;
  779. static JS_ALWAYS_INLINE Extent computeExtent(const jschar *pat, jsuint patlen) {
  780. return pat + patlen;
  781. }
  782. static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
  783. for (; p != extent; ++p, ++t) {
  784. if (*p != *t)
  785. return false;
  786. }
  787. return true;
  788. }
  789. };
  790. template <class InnerMatch>
  791. static jsint
  792. UnrolledMatch(const jschar *text, jsuint textlen, const jschar *pat, jsuint patlen)
  793. {
  794. JS_ASSERT(patlen > 0 && textlen > 0);
  795. const jschar *textend = text + textlen - (patlen - 1);
  796. const jschar p0 = *pat;
  797. const jschar *const patNext = pat + 1;
  798. const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
  799. uint8_t fixup;
  800. const jschar *t = text;
  801. switch ((textend - t) & 7) {
  802. case 0: if (*t++ == p0) { fixup = 8; goto match; }
  803. case 7: if (*t++ == p0) { fixup = 7; goto match; }
  804. case 6: if (*t++ == p0) { fixup = 6; goto match; }
  805. case 5: if (*t++ == p0) { fixup = 5; goto match; }
  806. case 4: if (*t++ == p0) { fixup = 4; goto match; }
  807. case 3: if (*t++ == p0) { fixup = 3; goto match; }
  808. case 2: if (*t++ == p0) { fixup = 2; goto match; }
  809. case 1: if (*t++ == p0) { fixup = 1; goto match; }
  810. }
  811. while (t != textend) {
  812. if (t[0] == p0) { t += 1; fixup = 8; goto match; }
  813. if (t[1] == p0) { t += 2; fixup = 7; goto match; }
  814. if (t[2] == p0) { t += 3; fixup = 6; goto match; }
  815. if (t[3] == p0) { t += 4; fixup = 5; goto match; }
  816. if (t[4] == p0) { t += 5; fixup = 4; goto match; }
  817. if (t[5] == p0) { t += 6; fixup = 3; goto match; }
  818. if (t[6] == p0) { t += 7; fixup = 2; goto match; }
  819. if (t[7] == p0) { t += 8; fixup = 1; goto match; }
  820. t += 8;
  821. continue;
  822. do {
  823. if (*t++ == p0) {
  824. match:
  825. if (!InnerMatch::match(patNext, t, extent))
  826. goto failed_match;
  827. return t - text - 1;
  828. }
  829. failed_match:;
  830. } while (--fixup > 0);
  831. }
  832. return -1;
  833. }
  834. static JS_ALWAYS_INLINE jsint
  835. StringMatch(const jschar *text, jsuint textlen,
  836. const jschar *pat, jsuint patlen)
  837. {
  838. if (patlen == 0)
  839. return 0;
  840. if (textlen < patlen)
  841. return -1;
  842. #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
  843. /*
  844. * Given enough registers, the unrolled loop below is faster than the
  845. * following loop. 32-bit x86 does not have enough registers.
  846. */
  847. if (patlen == 1) {
  848. const jschar p0 = *pat;
  849. for (const jschar *c = text, *end = text + textlen; c != end; ++c) {
  850. if (*c == p0)
  851. return c - text;
  852. }
  853. return -1;
  854. }
  855. #endif
  856. /*
  857. * If the text or pattern string is short, BMH will be more expensive than
  858. * the basic linear scan due to initialization cost and a more complex loop
  859. * body. While the correct threshold is input-dependent, we can make a few
  860. * conservative observations:
  861. * - When |textlen| is "big enough", the initialization time will be
  862. * proportionally small, so the worst-case slowdown is minimized.
  863. * - When |patlen| is "too small", even the best case for BMH will be
  864. * slower than a simple scan for large |textlen| due to the more complex
  865. * loop body of BMH.
  866. * From this, the values for "big enough" and "too small" are determined
  867. * empirically. See bug 526348.
  868. */
  869. if (textlen >= 512 && patlen >= 11 && patlen <= sBMHPatLenMax) {
  870. jsint index = js_BoyerMooreHorspool(text, textlen, pat, patlen);
  871. if (index != sBMHBadPattern)
  872. return index;
  873. }
  874. /*
  875. * For big patterns with large potential overlap we want the SIMD-optimized
  876. * speed of memcmp. For small patterns, a simple loop is faster.
  877. *
  878. * FIXME: Linux memcmp performance is sad and the manual loop is faster.
  879. */
  880. return
  881. #if !defined(__linux__)
  882. patlen > 128 ? UnrolledMatch<MemCmp>(text, textlen, pat, patlen)
  883. :
  884. #endif
  885. UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
  886. }
  887. static const size_t sRopeMatchThresholdRatioLog2 = 5;
  888. /*
  889. * RopeMatch takes the text to search, the patern to search for in the text.
  890. * RopeMatch returns false on OOM and otherwise returns the match index through
  891. * the 'match' outparam (-1 for not found).
  892. */
  893. static bool
  894. RopeMatch(JSContext *cx, JSString *textstr, const jschar *pat, jsuint patlen, jsint *match)
  895. {
  896. JS_ASSERT(textstr->isRope());
  897. if (patlen == 0) {
  898. *match = 0;
  899. return true;
  900. }
  901. if (textstr->length() < patlen) {
  902. *match = -1;
  903. return true;
  904. }
  905. /*
  906. * List of leaf nodes in the rope. If we run out of memory when trying to
  907. * append to this list, we can still fall back to StringMatch, so use the
  908. * system allocator so we don't report OOM in that case.
  909. */
  910. Vector<JSLinearString *, 16, SystemAllocPolicy> strs;
  911. /*
  912. * We don't want to do rope matching if there is a poor node-to-char ratio,
  913. * since this means spending a lot of time in the match loop below. We also
  914. * need to build the list of leaf nodes. Do both here: iterate over the
  915. * nodes so long as there are not too many.
  916. */
  917. {
  918. size_t textstrlen = textstr->length();
  919. size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2;
  920. StringSegmentRange r(cx);
  921. if (!r.init(textstr))
  922. return false;
  923. while (!r.empty()) {
  924. if (threshold-- == 0 || !strs.append(r.front())) {
  925. const jschar *chars = textstr->getChars(cx);
  926. if (!chars)
  927. return false;
  928. *match = StringMatch(chars, textstrlen, pat, patlen);
  929. return true;
  930. }
  931. if (!r.popFront())
  932. return false;
  933. }
  934. }
  935. /* Absolute offset from the beginning of the logical string textstr. */
  936. jsint pos = 0;
  937. for (JSLinearString **outerp = strs.begin(); outerp != strs.end(); ++outerp) {
  938. /* Try to find a match within 'outer'. */
  939. JSLinearString *outer = *outerp;
  940. const jschar *chars = outer->chars();
  941. size_t len = outer->length();
  942. jsint matchResult = StringMatch(chars, len, pat, patlen);
  943. if (matchResult != -1) {
  944. /* Matched! */
  945. *match = pos + matchResult;
  946. return true;
  947. }
  948. /* Try to find a match starting in 'outer' and running into other nodes. */
  949. const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1);
  950. const jschar *const textend = chars + len;
  951. const jschar p0 = *pat;
  952. const jschar *const p1 = pat + 1;
  953. const jschar *const patend = pat + patlen;
  954. for (const jschar *t = text; t != textend; ) {
  955. if (*t++ != p0)
  956. continue;
  957. JSLinearString **innerp = outerp;
  958. const jschar *ttend = textend;
  959. for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) {
  960. while (tt == ttend) {
  961. if (++innerp == strs.end()) {
  962. *match = -1;
  963. return true;
  964. }
  965. JSLinearString *inner = *innerp;
  966. tt = inner->chars();
  967. ttend = tt + inner->length();
  968. }
  969. if (*pp != *tt)
  970. goto break_continue;
  971. }
  972. /* Matched! */
  973. *match = pos + (t - chars) - 1; /* -1 because of *t++ above */
  974. return true;
  975. break_continue:;
  976. }
  977. pos += len;
  978. }
  979. *match = -1;
  980. return true;
  981. }
  982. static JSBool
  983. str_indexOf(JSContext *cx, uintN argc, Value *vp)
  984. {
  985. CallArgs args = CallArgsFromVp(argc, vp);
  986. JSString *str = ThisToStringForStringProto(cx, args);
  987. if (!str)
  988. return false;
  989. JSLinearString *patstr = ArgToRootedString(cx, args, 0);
  990. if (!patstr)
  991. return false;
  992. jsuint textlen = str->length();
  993. const jschar *text = str->getChars(cx);
  994. if (!text)
  995. return false;
  996. jsuint patlen = patstr->length();
  997. const jschar *pat = patstr->chars();
  998. jsuint start;
  999. if (args.length() > 1) {
  1000. if (args[1].isInt32()) {
  1001. jsint i = args[1].toInt32();
  1002. if (i <= 0) {
  1003. start = 0;
  1004. } else if (jsuint(i) > textlen) {
  1005. start = textlen;
  1006. textlen = 0;
  1007. } else {
  1008. start = i;
  1009. text += start;
  1010. textlen -= start;
  1011. }
  1012. } else {
  1013. jsdouble d;
  1014. if (!ToInteger(cx, args[1], &d))
  1015. return false;
  1016. if (d <= 0) {
  1017. start = 0;
  1018. } else if (d > textlen) {
  1019. start = textlen;
  1020. textlen = 0;
  1021. } else {
  1022. start = (jsint)d;
  1023. text += start;
  1024. textlen -= start;
  1025. }
  1026. }
  1027. } else {
  1028. start = 0;
  1029. }
  1030. jsint match = StringMatch(text, textlen, pat, patlen);
  1031. args.rval() = Int32Value((match == -1) ? -1 : start + match);
  1032. return true;
  1033. }
  1034. static JSBool
  1035. str_lastIndexOf(JSContext *cx, uintN argc, Value *vp)
  1036. {
  1037. CallArgs args = CallArgsFromVp(argc, vp);
  1038. JSString *textstr = ThisToStringForStringProto(cx, args);
  1039. if (!textstr)
  1040. return false;
  1041. size_t textlen = textstr->length();
  1042. const jschar *text = textstr->getChars(cx);
  1043. if (!text)
  1044. return false;
  1045. JSLinearString *patstr = ArgToRootedString(cx, args, 0);
  1046. if (!patstr)
  1047. return false;
  1048. size_t patlen = patstr->length();
  1049. const jschar *pat = patstr->chars();
  1050. jsint i = textlen - patlen; // Start searching here
  1051. if (i < 0) {
  1052. args.rval() = Int32Value(-1);
  1053. return true;
  1054. }
  1055. if (args.length() > 1) {
  1056. if (args[1].isInt32()) {
  1057. jsint j = args[1].toInt32();
  1058. if (j <= 0)
  1059. i = 0;
  1060. else if (j < i)
  1061. i = j;
  1062. } else {
  1063. double d;
  1064. if (!ToNumber(cx, args[1], &d))
  1065. return false;
  1066. if (!JSDOUBLE_IS_NaN(d)) {
  1067. d = js_DoubleToInteger(d);
  1068. if (d <= 0)
  1069. i = 0;
  1070. else if (d < i)
  1071. i = (jsint)d;
  1072. }
  1073. }
  1074. }
  1075. if (patlen == 0) {
  1076. args.rval() = Int32Value(i);
  1077. return true;
  1078. }
  1079. const jschar *t = text + i;
  1080. const jschar *textend = text - 1;
  1081. const jschar p0 = *pat;
  1082. const jschar *patNext = pat + 1;
  1083. const jschar *patEnd = pat + patlen;
  1084. for (; t != textend; --t) {
  1085. if (*t == p0) {
  1086. const jschar *t1 = t + 1;
  1087. for (const jschar *p1 = patNext; p1 != patEnd; ++p1, ++t1) {
  1088. if (*t1 != *p1)
  1089. goto break_continue;
  1090. }
  1091. args.rval() = Int32Value(t - text);
  1092. return true;
  1093. }
  1094. break_continue:;
  1095. }
  1096. args.rval() = Int32Value(-1);
  1097. return true;
  1098. }
  1099. static JSBool
  1100. js_TrimString(JSContext *cx, Value *vp, JSBool trimLeft, JSBool trimRight)
  1101. {
  1102. CallReceiver call = CallReceiverFromVp(vp);
  1103. JSString *str = ThisToStringForStringProto(cx, call);
  1104. if (!str)
  1105. return false;
  1106. size_t length = str->length();
  1107. const jschar *chars = str->getChars(cx);
  1108. if (!chars)
  1109. return false;
  1110. size_t begin = 0;
  1111. size_t end = length;
  1112. if (trimLeft) {
  1113. while (begin < length && unicode::IsSpace(chars[begin]))
  1114. ++begin;
  1115. }
  1116. if (trimRight) {
  1117. while (end > begin && unicode::IsSpace(chars[end - 1]))
  1118. --end;
  1119. }
  1120. str = js_NewDependentString(cx, str, begin, end - begin);
  1121. if (!str)
  1122. return false;
  1123. call.rval() = StringValue(str);
  1124. return true;
  1125. }
  1126. static JSBool
  1127. str_trim(JSContext *cx, uintN argc, Value *vp)
  1128. {
  1129. return js_TrimString(cx, vp, JS_TRUE, JS_TRUE);
  1130. }
  1131. static JSBool
  1132. str_trimLeft(JSContext *cx, uintN argc, Value *vp)
  1133. {
  1134. return js_TrimString(cx, vp, JS_TRUE, JS_FALSE);
  1135. }
  1136. static JSBool
  1137. str_trimRight(JSContext *cx, uintN argc, Value *vp)
  1138. {
  1139. return js_TrimString(cx, vp, JS_FALSE, JS_TRUE);
  1140. }
  1141. /*
  1142. * Perl-inspired string functions.
  1143. */
  1144. /* Result of a successfully performed flat match. */
  1145. class FlatMatch
  1146. {
  1147. JSAtom *patstr;
  1148. const jschar *pat;
  1149. size_t patlen;
  1150. int32_t match_;
  1151. friend class RegExpGuard;
  1152. public:
  1153. FlatMatch() : patstr(NULL) {} /* Old GCC wants this initialization. */
  1154. JSLinearString *pattern() const { return patstr; }
  1155. size_t patternLength() const { return patlen; }
  1156. /*
  1157. * Note: The match is -1 when the match is performed successfully,
  1158. * but no match is found.
  1159. */
  1160. int32_t match() const { return match_; }
  1161. };
  1162. static inline bool
  1163. IsRegExpMetaChar(jschar c)
  1164. {
  1165. switch (c) {
  1166. /* Taken from the PatternCharacter production in 15.10.1. */
  1167. case '^': case '$': case '\\': case '.': case '*': case '+':
  1168. case '?': case '(': case ')': case '[': case ']': case '{':
  1169. case '}': case '|':
  1170. return true;
  1171. default:
  1172. return false;
  1173. }
  1174. }
  1175. static inline bool
  1176. HasRegExpMetaChars(const jschar *chars, size_t length)
  1177. {
  1178. for (size_t i = 0; i < length; ++i) {
  1179. if (IsRegExpMetaChar(chars[i]))
  1180. return true;
  1181. }
  1182. return false;
  1183. }
  1184. /*
  1185. * RegExpGuard factors logic out of String regexp operations.
  1186. *
  1187. * |optarg| indicates in which argument position RegExp flags will be found, if
  1188. * present. This is a Mozilla extension and not part of any ECMA spec.
  1189. */
  1190. class RegExpGuard
  1191. {
  1192. RegExpGuard(const RegExpGuard &) MOZ_DELETE;
  1193. void operator=(const RegExpGuard &) MOZ_DELETE;
  1194. RegExpShared::Guard re_;
  1195. FlatMatch fm;
  1196. /*
  1197. * Upper bound on the number of characters we are willing to potentially
  1198. * waste on searching for RegExp meta-characters.
  1199. */
  1200. static const size_t MAX_FLAT_PAT_LEN = 256;
  1201. static JSAtom *
  1202. flattenPattern(JSContext *cx, JSAtom *patstr)
  1203. {
  1204. StringBuffer sb(cx);
  1205. if (!sb.reserve(patstr->length()))
  1206. return NULL;
  1207. static const jschar ESCAPE_CHAR = '\\';
  1208. const jschar *chars = patstr->chars();
  1209. size_t len = patstr->length();
  1210. for (const jschar *it = chars; it != chars + len; ++it) {
  1211. if (IsRegExpMetaChar(*it)) {
  1212. if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
  1213. return NULL;
  1214. } else {
  1215. if (!sb.append(*it))
  1216. return NULL;
  1217. }
  1218. }
  1219. return sb.finishAtom();
  1220. }
  1221. public:
  1222. RegExpGuard() {}
  1223. /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
  1224. bool init(JSContext *cx, CallArgs args, bool convertVoid = false)
  1225. {
  1226. if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
  1227. RegExpShared *shared = RegExpToShared(cx, args[0].toObject());
  1228. if (!shared)
  1229. return false;
  1230. re_.init(*shared);
  1231. } else {
  1232. if (convertVoid && (args.length() == 0 || args[0].isUndefined())) {
  1233. fm.patstr = cx->runtime->emptyString;
  1234. return true;
  1235. }
  1236. JSString *arg = ArgToRootedString(cx, args, 0);
  1237. if (!arg)
  1238. return false;
  1239. fm.patstr = js_AtomizeString(cx, arg);
  1240. if (!fm.patstr)
  1241. return false;
  1242. }
  1243. return true;
  1244. }
  1245. /*
  1246. * Attempt to match |patstr| to |textstr|. A flags argument, metachars in the
  1247. * pattern string, or a lengthy pattern string can thwart this process.
  1248. *
  1249. * |checkMetaChars| looks for regexp metachars in the pattern string.
  1250. *
  1251. * Return whether flat matching could be used.
  1252. *
  1253. * N.B. tryFlatMatch returns NULL on OOM, so the caller must check cx->isExceptionPending().
  1254. */
  1255. const FlatMatch *
  1256. tryFlatMatch(JSContext *cx, JSString *textstr, uintN optarg, uintN argc,
  1257. bool checkMetaChars = true)
  1258. {
  1259. if (re_.initialized())
  1260. return NULL;
  1261. fm.pat = fm.patstr->chars();
  1262. fm.patlen = fm.patstr->length();
  1263. if (optarg < argc)
  1264. return NULL;
  1265. if (checkMetaChars &&
  1266. (fm.patlen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat, fm.patlen))) {
  1267. return NULL;
  1268. }
  1269. /*
  1270. * textstr could be a rope, so we want to avoid flattening it for as
  1271. * long as possible.
  1272. */
  1273. if (textstr->isRope()) {
  1274. if (!RopeMatch(cx, textstr, fm.pat, fm.patlen, &fm.match_))
  1275. return NULL;
  1276. } else {
  1277. const jschar *text = textstr->asLinear().chars();
  1278. size_t textlen = textstr->length();
  1279. fm.match_ = StringMatch(text, textlen, fm.pat, fm.patlen);
  1280. }
  1281. return &fm;
  1282. }
  1283. /* If the pattern is not already a regular expression, make it so. */
  1284. bool normalizeRegExp(JSContext *cx, bool flat, uintN optarg, CallArgs args)
  1285. {
  1286. if (re_.initialized())
  1287. return true;
  1288. /* Build RegExp from pattern string. */
  1289. JSString *opt;
  1290. if (optarg < args.length()) {
  1291. opt = ToString(cx, args[optarg]);
  1292. if (!opt)
  1293. return false;
  1294. } else {
  1295. opt = NULL;
  1296. }
  1297. JSAtom *patstr;
  1298. if (flat) {
  1299. patstr = flattenPattern(cx, fm.patstr);
  1300. if (!patstr)
  1301. return false;
  1302. } else {
  1303. patstr = fm.patstr;
  1304. }
  1305. JS_ASSERT(patstr);
  1306. RegExpShared *re = cx->compartment->regExps.get(cx, patstr, opt);
  1307. if (!re)
  1308. return false;
  1309. re_.init(*re);
  1310. return true;
  1311. }
  1312. RegExpShared &regExp() { return *re_; }
  1313. };
  1314. /* ExecuteRegExp indicates success in two ways, based on the 'test' flag. */
  1315. static JS_ALWAYS_INLINE bool
  1316. Matched(RegExpExecType type, const Value &v)
  1317. {
  1318. return (type == RegExpTest) ? v.isTrue() : !v.isNull();
  1319. }
  1320. typedef bool (*DoMatchCallback)(JSContext *cx, RegExpStatics *res, size_t count, void *data);
  1321. /*
  1322. * BitOR-ing these flags allows the DoMatch caller to control when how the
  1323. * RegExp engine is called and when callbacks are fired.
  1324. */
  1325. enum MatchControlFlags {
  1326. TEST_GLOBAL_BIT = 0x1, /* use RegExp.test for global regexps */
  1327. TEST_SINGLE_BIT = 0x2, /* use RegExp.test for non-global regexps */
  1328. CALLBACK_ON_SINGLE_BIT = 0x4, /* fire callback on non-global match */
  1329. MATCH_ARGS = TEST_GLOBAL_BIT,
  1330. MATCHALL_ARGS = CALLBACK_ON_SINGLE_BIT,
  1331. REPLACE_ARGS = TEST_GLOBAL_BIT | TEST_SINGLE_BIT | CALLBACK_ON_SINGLE_BIT
  1332. };
  1333. /* Factor out looping and matching logic. */
  1334. static bool
  1335. DoMatch(JSContext *cx, RegExpStatics *res, JSString *str, RegExpShared &re,
  1336. DoMatchCallback callback, void *data, MatchControlFlags flags, Value *rval)
  1337. {
  1338. JSLinearString *linearStr = str->ensureLinear(cx);
  1339. if (!linearStr)
  1340. return false;
  1341. const jschar *chars = linearStr->chars();
  1342. size_t length = linearStr->length();
  1343. if (re.global()) {
  1344. RegExpExecType type = (flags & TEST_GLOBAL_BIT) ? RegExpTest : RegExpExec;
  1345. for (size_t count = 0, i = 0, length = str->length(); i <= length; ++count) {
  1346. if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
  1347. return false;
  1348. if (!Matched(type, *rval))
  1349. break;
  1350. if (!callback(cx, res, count, data))
  1351. return false;
  1352. if (!res->matched())
  1353. ++i;
  1354. }
  1355. } else {
  1356. RegExpExecType type = (flags & TEST_SINGLE_BIT) ? RegExpTest : RegExpExec;
  1357. bool callbackOnSingle = !!(flags & CALLBACK_ON_SINGLE_BIT);
  1358. size_t i = 0;
  1359. if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
  1360. return false;
  1361. if (callbackOnSingle && Matched(type, *rval) && !callback(cx, res, 0, data))
  1362. return false;
  1363. }
  1364. return true;
  1365. }
  1366. static bool
  1367. BuildFlatMatchArray(JSContext *cx, JSString *textstr, const FlatMatch &fm, CallArgs *args)
  1368. {
  1369. if (fm.match() < 0) {
  1370. args->rval() = NullValue();
  1371. return true;
  1372. }
  1373. /* For this non-global match, produce a RegExp.exec-style array. */
  1374. JSObject *obj = NewSlowEmptyArray(cx);
  1375. if (!obj)
  1376. return false;
  1377. if (!obj->defineElement(cx, 0, StringValue(fm.pattern())) ||
  1378. !obj->defineProperty(cx, cx->runtime->atomState.indexAtom, Int32Value(fm.match())) ||
  1379. !obj->defineProperty(cx, cx->runtime->atomState.inputAtom, StringValue(textstr)))
  1380. {
  1381. return false;
  1382. }
  1383. args->rval() = ObjectValue(*obj);
  1384. return true;
  1385. }
  1386. typedef JSObject **MatchArgType;
  1387. /*
  1388. * DoMatch will only callback on global matches, hence this function builds
  1389. * only the "array of matches" returned by match on global regexps.
  1390. */
  1391. static bool
  1392. MatchCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
  1393. {
  1394. JS_ASSERT(count <= JSID_INT_MAX); /* by max string length */
  1395. JSObject *&arrayobj = *static_cast<MatchArgType>(p);
  1396. if (!arrayobj) {
  1397. arrayobj = NewDenseEmptyArray(cx);
  1398. if (!arrayobj)
  1399. return false;
  1400. }
  1401. Value v;
  1402. return res->createLastMatch(cx, &v) && arrayobj->defineElement(cx, count, v);
  1403. }
  1404. JSBool
  1405. js::str_match(JSContext *cx, uintN argc, Value *vp)
  1406. {
  1407. CallArgs args = CallArgsFromVp(argc, vp);
  1408. JSString *str = ThisToStringForStringProto(cx, args);
  1409. if (!str)
  1410. return false;
  1411. RegExpGuard g;
  1412. if (!g.init(cx, args, true))
  1413. return false;
  1414. if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length()))
  1415. return BuildFlatMatchArray(cx, str, *fm, &args);
  1416. /* Return if there was an error in tryFlatMatch. */
  1417. if (cx->isExceptionPending())
  1418. return false;
  1419. if (!g.normalizeRegExp(cx, false, 1, args))
  1420. return false;
  1421. JSObject *array = NULL;
  1422. MatchArgType arg = &array;
  1423. RegExpStatics *res = cx->regExpStatics();
  1424. Value rval;
  1425. if (!DoMatch(cx, res, str, g.regExp(), MatchCallback, arg, MATCH_ARGS, &rval))
  1426. return false;
  1427. if (g.regExp().global())
  1428. args.rval() = ObjectOrNullValue(array);
  1429. else
  1430. args.rval() = rval;
  1431. return true;
  1432. }
  1433. JSBool
  1434. js::str_search(JSContext *cx, uintN argc, Value *vp)
  1435. {
  1436. CallArgs args = CallArgsFromVp(argc, vp);
  1437. JSString *str = ThisToStringForStringProto(cx, args);
  1438. if (!str)
  1439. return false;
  1440. RegExpGuard g;
  1441. if (!g.init(cx, args, true))
  1442. return false;
  1443. if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length())) {
  1444. args.rval() = Int32Value(fm->match());
  1445. return true;
  1446. }
  1447. if (cx->isExceptionPending()) /* from tryFlatMatch */
  1448. return false;
  1449. if (!g.normalizeRegExp(cx, false, 1, args))
  1450. return false;
  1451. JSLinearString *linearStr = str->ensureLinear(cx);
  1452. if (!linearStr)
  1453. return false;
  1454. const jschar *chars = linearStr->chars();
  1455. size_t length = linearStr->length();
  1456. RegExpStatics *res = cx->regExpStatics();
  1457. /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
  1458. size_t i = 0;
  1459. Value result;
  1460. if (!ExecuteRegExp(cx, res, g.regExp(), linearStr, chars, length, &i, RegExpTest, &result))
  1461. return false;
  1462. if (result.isTrue())
  1463. args.rval() = Int32Value(res->matchStart());
  1464. else
  1465. args.rval() = Int32Value(-1);
  1466. return true;
  1467. }
  1468. struct ReplaceData
  1469. {
  1470. ReplaceData(JSContext *cx)
  1471. : sb(cx)
  1472. {}
  1473. JSString *str; /* 'this' parameter object as a string */
  1474. RegExpGuard g; /* regexp parameter object and private data */
  1475. JSObject *lambda; /* replacement function object or null */
  1476. JSObject *elembase; /* object for function(a){return b[a]} replace */
  1477. JSLinearString *repstr; /* replacement string */
  1478. const jschar *dollar; /* null or pointer to first $ in repstr */
  1479. const jschar *dollarEnd; /* limit pointer for js_strchr_limit */
  1480. jsint leftIndex; /* left context index in str->chars */
  1481. JSSubString dollarStr; /* for "$$" InterpretDollar result */
  1482. bool calledBack; /* record whether callback has been called */
  1483. InvokeArgsGuard args; /* arguments for lambda call */
  1484. StringBuffer sb; /* buffer built during DoMatch */
  1485. };
  1486. static bool
  1487. InterpretDollar(JSContext *cx, RegExpStatics *res, const jschar *dp, const jschar *ep,
  1488. ReplaceData &rdata, JSSubString *out, size_t *skip)
  1489. {