PageRenderTime 72ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/js/src/jsstr.cpp

http://github.com/zpao/v8monkey
C++ | 4356 lines | 3507 code | 476 blank | 373 comment | 684 complexity | fbf53656a39e95829f89c223c92027e4 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, LGPL-3.0, AGPL-1.0, LGPL-2.1, BSD-3-Clause, GPL-2.0, JSON, Apache-2.0, 0BSD
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
  2. * vim: set ts=8 sw=4 et tw=99:
  3. *
  4. * ***** BEGIN LICENSE BLOCK *****
  5. * Version: MPL 1.1/GPL 2.0/LGPL 2.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is Mozilla Communicator client code, released
  18. * March 31, 1998.
  19. *
  20. * The Initial Developer of the Original Code is
  21. * Netscape Communications Corporation.
  22. * Portions created by the Initial Developer are Copyright (C) 1998
  23. * the Initial Developer. All Rights Reserved.
  24. *
  25. * Contributor(s):
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the MPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the MPL, the GPL or the LGPL.
  38. *
  39. * ***** END LICENSE BLOCK ***** */
  40. /*
  41. * JS string type implementation.
  42. *
  43. * In order to avoid unnecessary js_LockGCThing/js_UnlockGCThing calls, these
  44. * native methods store strings (possibly newborn) converted from their 'this'
  45. * parameter and arguments on the stack: 'this' conversions at argv[-1], arg
  46. * conversions at their index (argv[0], argv[1]). This is a legitimate method
  47. * of rooting things that might lose their newborn root due to subsequent GC
  48. * allocations in the same native method.
  49. */
  50. #include "mozilla/Attributes.h"
  51. #include <stdlib.h>
  52. #include <string.h>
  53. #include "jstypes.h"
  54. #include "jsutil.h"
  55. #include "jshash.h"
  56. #include "jsprf.h"
  57. #include "jsapi.h"
  58. #include "jsarray.h"
  59. #include "jsatom.h"
  60. #include "jsbool.h"
  61. #include "jscntxt.h"
  62. #include "jsgc.h"
  63. #include "jsinterp.h"
  64. #include "jslock.h"
  65. #include "jsnum.h"
  66. #include "jsobj.h"
  67. #include "jsopcode.h"
  68. #include "jsprobes.h"
  69. #include "jsscope.h"
  70. #include "jsstr.h"
  71. #include "jsversion.h"
  72. #include "builtin/RegExp.h"
  73. #include "vm/GlobalObject.h"
  74. #include "vm/RegExpObject.h"
  75. #include "jsinferinlines.h"
  76. #include "jsobjinlines.h"
  77. #include "jsautooplen.h" // generated headers last
  78. #include "vm/RegExpObject-inl.h"
  79. #include "vm/RegExpStatics-inl.h"
  80. #include "vm/StringObject-inl.h"
  81. #include "vm/String-inl.h"
  82. using namespace js;
  83. using namespace js::gc;
  84. using namespace js::types;
  85. using namespace js::unicode;
  86. static JSLinearString *
  87. ArgToRootedString(JSContext *cx, CallArgs &args, uintN argno)
  88. {
  89. if (argno >= args.length())
  90. return cx->runtime->atomState.typeAtoms[JSTYPE_VOID];
  91. Value &arg = args[argno];
  92. JSString *str = ToString(cx, arg);
  93. if (!str)
  94. return NULL;
  95. arg = StringValue(str);
  96. return str->ensureLinear(cx);
  97. }
  98. /*
  99. * Forward declarations for URI encode/decode and helper routines
  100. */
  101. static JSBool
  102. str_decodeURI(JSContext *cx, uintN argc, Value *vp);
  103. static JSBool
  104. str_decodeURI_Component(JSContext *cx, uintN argc, Value *vp);
  105. static JSBool
  106. str_encodeURI(JSContext *cx, uintN argc, Value *vp);
  107. static JSBool
  108. str_encodeURI_Component(JSContext *cx, uintN argc, Value *vp);
  109. static const uint32_t INVALID_UTF8 = UINT32_MAX;
  110. static uint32_t
  111. Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length);
  112. /*
  113. * Global string methods
  114. */
  115. /* ES5 B.2.1 */
  116. static JSBool
  117. str_escape(JSContext *cx, uintN argc, Value *vp)
  118. {
  119. CallArgs args = CallArgsFromVp(argc, vp);
  120. const char digits[] = {'0', '1', '2', '3', '4', '5', '6', '7',
  121. '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
  122. JSLinearString *str = ArgToRootedString(cx, args, 0);
  123. if (!str)
  124. return false;
  125. size_t length = str->length();
  126. const jschar *chars = str->chars();
  127. static const uint8_t shouldPassThrough[256] = {
  128. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  129. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  130. 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
  131. 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
  132. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
  133. 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
  134. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
  135. 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
  136. };
  137. /* In step 7, exactly 69 characters should pass through unencoded. */
  138. #ifdef DEBUG
  139. size_t count = 0;
  140. for (size_t i = 0; i < sizeof(shouldPassThrough); i++) {
  141. if (shouldPassThrough[i]) {
  142. count++;
  143. }
  144. }
  145. JS_ASSERT(count == 69);
  146. #endif
  147. /* Take a first pass and see how big the result string will need to be. */
  148. size_t newlength = length;
  149. for (size_t i = 0; i < length; i++) {
  150. jschar ch = chars[i];
  151. if (ch < 128 && shouldPassThrough[ch])
  152. continue;
  153. /* The character will be encoded as %XX or %uXXXX. */
  154. newlength += (ch < 256) ? 2 : 5;
  155. /*
  156. * This overflow test works because newlength is incremented by at
  157. * most 5 on each iteration.
  158. */
  159. if (newlength < length) {
  160. js_ReportAllocationOverflow(cx);
  161. return false;
  162. }
  163. }
  164. if (newlength >= ~(size_t)0 / sizeof(jschar)) {
  165. js_ReportAllocationOverflow(cx);
  166. return false;
  167. }
  168. jschar *newchars = (jschar *) cx->malloc_((newlength + 1) * sizeof(jschar));
  169. if (!newchars)
  170. return false;
  171. size_t i, ni;
  172. for (i = 0, ni = 0; i < length; i++) {
  173. jschar ch = chars[i];
  174. if (ch < 128 && shouldPassThrough[ch]) {
  175. newchars[ni++] = ch;
  176. } else if (ch < 256) {
  177. newchars[ni++] = '%';
  178. newchars[ni++] = digits[ch >> 4];
  179. newchars[ni++] = digits[ch & 0xF];
  180. } else {
  181. newchars[ni++] = '%';
  182. newchars[ni++] = 'u';
  183. newchars[ni++] = digits[ch >> 12];
  184. newchars[ni++] = digits[(ch & 0xF00) >> 8];
  185. newchars[ni++] = digits[(ch & 0xF0) >> 4];
  186. newchars[ni++] = digits[ch & 0xF];
  187. }
  188. }
  189. JS_ASSERT(ni == newlength);
  190. newchars[newlength] = 0;
  191. JSString *retstr = js_NewString(cx, newchars, newlength);
  192. if (!retstr) {
  193. cx->free_(newchars);
  194. return false;
  195. }
  196. args.rval() = StringValue(retstr);
  197. return true;
  198. }
  199. static inline bool
  200. Unhex4(const jschar *chars, jschar *result)
  201. {
  202. jschar a = chars[0],
  203. b = chars[1],
  204. c = chars[2],
  205. d = chars[3];
  206. if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
  207. return false;
  208. *result = (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) + JS7_UNHEX(d);
  209. return true;
  210. }
  211. static inline bool
  212. Unhex2(const jschar *chars, jschar *result)
  213. {
  214. jschar a = chars[0],
  215. b = chars[1];
  216. if (!(JS7_ISHEX(a) && JS7_ISHEX(b)))
  217. return false;
  218. *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
  219. return true;
  220. }
  221. /* ES5 B.2.2 */
  222. static JSBool
  223. str_unescape(JSContext *cx, uintN argc, Value *vp)
  224. {
  225. CallArgs args = CallArgsFromVp(argc, vp);
  226. /* Step 1. */
  227. JSLinearString *str = ArgToRootedString(cx, args, 0);
  228. if (!str)
  229. return false;
  230. /* Step 2. */
  231. size_t length = str->length();
  232. const jschar *chars = str->chars();
  233. /* Step 3. */
  234. StringBuffer sb(cx);
  235. /*
  236. * Note that the spec algorithm has been optimized to avoid building
  237. * a string in the case where no escapes are present.
  238. */
  239. /* Step 4. */
  240. size_t k = 0;
  241. bool building = false;
  242. while (true) {
  243. /* Step 5. */
  244. if (k == length) {
  245. JSLinearString *result;
  246. if (building) {
  247. result = sb.finishString();
  248. if (!result)
  249. return false;
  250. } else {
  251. result = str;
  252. }
  253. args.rval() = StringValue(result);
  254. return true;
  255. }
  256. /* Step 6. */
  257. jschar c = chars[k];
  258. /* Step 7. */
  259. if (c != '%')
  260. goto step_18;
  261. /* Step 8. */
  262. if (k > length - 6)
  263. goto step_14;
  264. /* Step 9. */
  265. if (chars[k + 1] != 'u')
  266. goto step_14;
  267. #define ENSURE_BUILDING \
  268. JS_BEGIN_MACRO \
  269. if (!building) { \
  270. building = true; \
  271. if (!sb.reserve(length)) \
  272. return false; \
  273. sb.infallibleAppend(chars, chars + k); \
  274. } \
  275. JS_END_MACRO
  276. /* Step 10-13. */
  277. if (Unhex4(&chars[k + 2], &c)) {
  278. ENSURE_BUILDING;
  279. k += 5;
  280. goto step_18;
  281. }
  282. step_14:
  283. /* Step 14. */
  284. if (k > length - 3)
  285. goto step_18;
  286. /* Step 15-17. */
  287. if (Unhex2(&chars[k + 1], &c)) {
  288. ENSURE_BUILDING;
  289. k += 2;
  290. }
  291. step_18:
  292. if (building)
  293. sb.infallibleAppend(c);
  294. /* Step 19. */
  295. k += 1;
  296. }
  297. #undef ENSURE_BUILDING
  298. }
  299. #if JS_HAS_UNEVAL
  300. static JSBool
  301. str_uneval(JSContext *cx, uintN argc, Value *vp)
  302. {
  303. CallArgs args = CallArgsFromVp(argc, vp);
  304. JSString *str = js_ValueToSource(cx, args.length() != 0 ? args[0] : UndefinedValue());
  305. if (!str)
  306. return false;
  307. args.rval() = StringValue(str);
  308. return true;
  309. }
  310. #endif
  311. const char js_escape_str[] = "escape";
  312. const char js_unescape_str[] = "unescape";
  313. #if JS_HAS_UNEVAL
  314. const char js_uneval_str[] = "uneval";
  315. #endif
  316. const char js_decodeURI_str[] = "decodeURI";
  317. const char js_encodeURI_str[] = "encodeURI";
  318. const char js_decodeURIComponent_str[] = "decodeURIComponent";
  319. const char js_encodeURIComponent_str[] = "encodeURIComponent";
  320. static JSFunctionSpec string_functions[] = {
  321. JS_FN(js_escape_str, str_escape, 1,0),
  322. JS_FN(js_unescape_str, str_unescape, 1,0),
  323. #if JS_HAS_UNEVAL
  324. JS_FN(js_uneval_str, str_uneval, 1,0),
  325. #endif
  326. JS_FN(js_decodeURI_str, str_decodeURI, 1,0),
  327. JS_FN(js_encodeURI_str, str_encodeURI, 1,0),
  328. JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,0),
  329. JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,0),
  330. JS_FS_END
  331. };
  332. jschar js_empty_ucstr[] = {0};
  333. JSSubString js_EmptySubString = {0, js_empty_ucstr};
  334. static const uintN STRING_ELEMENT_ATTRS = JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
  335. static JSBool
  336. str_enumerate(JSContext *cx, JSObject *obj)
  337. {
  338. JSString *str = obj->getPrimitiveThis().toString();
  339. for (size_t i = 0, length = str->length(); i < length; i++) {
  340. JSString *str1 = js_NewDependentString(cx, str, i, 1);
  341. if (!str1)
  342. return false;
  343. if (!obj->defineElement(cx, i, StringValue(str1),
  344. JS_PropertyStub, JS_StrictPropertyStub,
  345. STRING_ELEMENT_ATTRS)) {
  346. return false;
  347. }
  348. }
  349. return true;
  350. }
  351. static JSBool
  352. str_resolve(JSContext *cx, JSObject *obj, jsid id, uintN flags,
  353. JSObject **objp)
  354. {
  355. if (!JSID_IS_INT(id))
  356. return JS_TRUE;
  357. JSString *str = obj->getPrimitiveThis().toString();
  358. jsint slot = JSID_TO_INT(id);
  359. if ((size_t)slot < str->length()) {
  360. JSString *str1 = cx->runtime->staticStrings.getUnitStringForElement(cx, str, size_t(slot));
  361. if (!str1)
  362. return JS_FALSE;
  363. if (!obj->defineElement(cx, uint32_t(slot), StringValue(str1), NULL, NULL,
  364. STRING_ELEMENT_ATTRS)) {
  365. return JS_FALSE;
  366. }
  367. *objp = obj;
  368. }
  369. return JS_TRUE;
  370. }
  371. Class js::StringClass = {
  372. js_String_str,
  373. JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
  374. JSCLASS_NEW_RESOLVE | JSCLASS_HAS_CACHED_PROTO(JSProto_String),
  375. JS_PropertyStub, /* addProperty */
  376. JS_PropertyStub, /* delProperty */
  377. JS_PropertyStub, /* getProperty */
  378. JS_StrictPropertyStub, /* setProperty */
  379. str_enumerate,
  380. (JSResolveOp)str_resolve,
  381. JS_ConvertStub
  382. };
  383. /*
  384. * Returns a JSString * for the |this| value associated with 'call', or throws
  385. * a TypeError if |this| is null or undefined. This algorithm is the same as
  386. * calling CheckObjectCoercible(this), then returning ToString(this), as all
  387. * String.prototype.* methods do (other than toString and valueOf).
  388. */
  389. static JS_ALWAYS_INLINE JSString *
  390. ThisToStringForStringProto(JSContext *cx, CallReceiver call)
  391. {
  392. JS_CHECK_RECURSION(cx, return NULL);
  393. if (call.thisv().isString())
  394. return call.thisv().toString();
  395. if (call.thisv().isObject()) {
  396. JSObject *obj = &call.thisv().toObject();
  397. if (obj->isString() &&
  398. ClassMethodIsNative(cx, obj,
  399. &StringClass,
  400. ATOM_TO_JSID(cx->runtime->atomState.toStringAtom),
  401. js_str_toString))
  402. {
  403. call.thisv() = obj->getPrimitiveThis();
  404. return call.thisv().toString();
  405. }
  406. } else if (call.thisv().isNullOrUndefined()) {
  407. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_CANT_CONVERT_TO,
  408. call.thisv().isNull() ? "null" : "undefined", "object");
  409. return NULL;
  410. }
  411. JSString *str = ToStringSlow(cx, call.thisv());
  412. if (!str)
  413. return NULL;
  414. call.thisv().setString(str);
  415. return str;
  416. }
  417. #if JS_HAS_TOSOURCE
  418. /*
  419. * String.prototype.quote is generic (as are most string methods), unlike
  420. * toSource, toString, and valueOf.
  421. */
  422. static JSBool
  423. str_quote(JSContext *cx, uintN argc, Value *vp)
  424. {
  425. CallArgs args = CallArgsFromVp(argc, vp);
  426. JSString *str = ThisToStringForStringProto(cx, args);
  427. if (!str)
  428. return false;
  429. str = js_QuoteString(cx, str, '"');
  430. if (!str)
  431. return false;
  432. args.rval() = StringValue(str);
  433. return true;
  434. }
  435. static JSBool
  436. str_toSource(JSContext *cx, uintN argc, Value *vp)
  437. {
  438. CallArgs args = CallArgsFromVp(argc, vp);
  439. JSString *str;
  440. bool ok;
  441. if (!BoxedPrimitiveMethodGuard(cx, args, str_toSource, &str, &ok))
  442. return ok;
  443. str = js_QuoteString(cx, str, '"');
  444. if (!str)
  445. return false;
  446. StringBuffer sb(cx);
  447. if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
  448. return false;
  449. str = sb.finishString();
  450. if (!str)
  451. return false;
  452. args.rval() = StringValue(str);
  453. return true;
  454. }
  455. #endif /* JS_HAS_TOSOURCE */
  456. JSBool
  457. js_str_toString(JSContext *cx, uintN argc, Value *vp)
  458. {
  459. CallArgs args = CallArgsFromVp(argc, vp);
  460. JSString *str;
  461. bool ok;
  462. if (!BoxedPrimitiveMethodGuard(cx, args, js_str_toString, &str, &ok))
  463. return ok;
  464. args.rval() = StringValue(str);
  465. return true;
  466. }
  467. /*
  468. * Java-like string native methods.
  469. */
  470. JS_ALWAYS_INLINE bool
  471. ValueToIntegerRange(JSContext *cx, const Value &v, int32_t *out)
  472. {
  473. if (v.isInt32()) {
  474. *out = v.toInt32();
  475. } else {
  476. double d;
  477. if (!ToInteger(cx, v, &d))
  478. return false;
  479. if (d > INT32_MAX)
  480. *out = INT32_MAX;
  481. else if (d < INT32_MIN)
  482. *out = INT32_MIN;
  483. else
  484. *out = int32_t(d);
  485. }
  486. return true;
  487. }
  488. static JSBool
  489. str_substring(JSContext *cx, uintN argc, Value *vp)
  490. {
  491. CallArgs args = CallArgsFromVp(argc, vp);
  492. JSString *str = ThisToStringForStringProto(cx, args);
  493. if (!str)
  494. return false;
  495. int32_t length, begin, end;
  496. if (args.length() > 0) {
  497. end = length = int32_t(str->length());
  498. if (!ValueToIntegerRange(cx, args[0], &begin))
  499. return false;
  500. if (begin < 0)
  501. begin = 0;
  502. else if (begin > length)
  503. begin = length;
  504. if (args.length() > 1 && !args[1].isUndefined()) {
  505. if (!ValueToIntegerRange(cx, args[1], &end))
  506. return false;
  507. if (end > length) {
  508. end = length;
  509. } else {
  510. if (end < 0)
  511. end = 0;
  512. if (end < begin) {
  513. int32_t tmp = begin;
  514. begin = end;
  515. end = tmp;
  516. }
  517. }
  518. }
  519. str = js_NewDependentString(cx, str, size_t(begin), size_t(end - begin));
  520. if (!str)
  521. return false;
  522. }
  523. args.rval() = StringValue(str);
  524. return true;
  525. }
  526. JSString* JS_FASTCALL
  527. js_toLowerCase(JSContext *cx, JSString *str)
  528. {
  529. size_t n = str->length();
  530. const jschar *s = str->getChars(cx);
  531. if (!s)
  532. return NULL;
  533. jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
  534. if (!news)
  535. return NULL;
  536. for (size_t i = 0; i < n; i++)
  537. news[i] = unicode::ToLowerCase(s[i]);
  538. news[n] = 0;
  539. str = js_NewString(cx, news, n);
  540. if (!str) {
  541. cx->free_(news);
  542. return NULL;
  543. }
  544. return str;
  545. }
  546. static inline bool
  547. ToLowerCaseHelper(JSContext *cx, CallReceiver call)
  548. {
  549. JSString *str = ThisToStringForStringProto(cx, call);
  550. if (!str)
  551. return false;
  552. str = js_toLowerCase(cx, str);
  553. if (!str)
  554. return false;
  555. call.rval() = StringValue(str);
  556. return true;
  557. }
  558. static JSBool
  559. str_toLowerCase(JSContext *cx, uintN argc, Value *vp)
  560. {
  561. return ToLowerCaseHelper(cx, CallArgsFromVp(argc, vp));
  562. }
  563. static JSBool
  564. str_toLocaleLowerCase(JSContext *cx, uintN argc, Value *vp)
  565. {
  566. CallArgs args = CallArgsFromVp(argc, vp);
  567. /*
  568. * Forcefully ignore the first (or any) argument and return toLowerCase(),
  569. * ECMA has reserved that argument, presumably for defining the locale.
  570. */
  571. if (cx->localeCallbacks && cx->localeCallbacks->localeToLowerCase) {
  572. JSString *str = ThisToStringForStringProto(cx, args);
  573. if (!str)
  574. return false;
  575. Value result;
  576. if (!cx->localeCallbacks->localeToLowerCase(cx, str, &result))
  577. return false;
  578. args.rval() = result;
  579. return true;
  580. }
  581. return ToLowerCaseHelper(cx, args);
  582. }
  583. JSString* JS_FASTCALL
  584. js_toUpperCase(JSContext *cx, JSString *str)
  585. {
  586. size_t n = str->length();
  587. const jschar *s = str->getChars(cx);
  588. if (!s)
  589. return NULL;
  590. jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
  591. if (!news)
  592. return NULL;
  593. for (size_t i = 0; i < n; i++)
  594. news[i] = unicode::ToUpperCase(s[i]);
  595. news[n] = 0;
  596. str = js_NewString(cx, news, n);
  597. if (!str) {
  598. cx->free_(news);
  599. return NULL;
  600. }
  601. return str;
  602. }
  603. static JSBool
  604. ToUpperCaseHelper(JSContext *cx, CallReceiver call)
  605. {
  606. JSString *str = ThisToStringForStringProto(cx, call);
  607. if (!str)
  608. return false;
  609. str = js_toUpperCase(cx, str);
  610. if (!str)
  611. return false;
  612. call.rval() = StringValue(str);
  613. return true;
  614. }
  615. static JSBool
  616. str_toUpperCase(JSContext *cx, uintN argc, Value *vp)
  617. {
  618. return ToUpperCaseHelper(cx, CallArgsFromVp(argc, vp));
  619. }
  620. static JSBool
  621. str_toLocaleUpperCase(JSContext *cx, uintN argc, Value *vp)
  622. {
  623. CallArgs args = CallArgsFromVp(argc, vp);
  624. /*
  625. * Forcefully ignore the first (or any) argument and return toUpperCase(),
  626. * ECMA has reserved that argument, presumably for defining the locale.
  627. */
  628. if (cx->localeCallbacks && cx->localeCallbacks->localeToUpperCase) {
  629. JSString *str = ThisToStringForStringProto(cx, args);
  630. if (!str)
  631. return false;
  632. Value result;
  633. if (!cx->localeCallbacks->localeToUpperCase(cx, str, &result))
  634. return false;
  635. args.rval() = result;
  636. return true;
  637. }
  638. return ToUpperCaseHelper(cx, args);
  639. }
  640. static JSBool
  641. str_localeCompare(JSContext *cx, uintN argc, Value *vp)
  642. {
  643. CallArgs args = CallArgsFromVp(argc, vp);
  644. JSString *str = ThisToStringForStringProto(cx, args);
  645. if (!str)
  646. return false;
  647. if (args.length() == 0) {
  648. args.rval() = Int32Value(0);
  649. } else {
  650. JSString *thatStr = ToString(cx, args[0]);
  651. if (!thatStr)
  652. return false;
  653. if (cx->localeCallbacks && cx->localeCallbacks->localeCompare) {
  654. args[0].setString(thatStr);
  655. Value result;
  656. if (!cx->localeCallbacks->localeCompare(cx, str, thatStr, &result))
  657. return true;
  658. args.rval() = result;
  659. return true;
  660. }
  661. int32_t result;
  662. if (!CompareStrings(cx, str, thatStr, &result))
  663. return false;
  664. args.rval() = Int32Value(result);
  665. }
  666. return true;
  667. }
  668. JSBool
  669. js_str_charAt(JSContext *cx, uintN argc, Value *vp)
  670. {
  671. CallArgs args = CallArgsFromVp(argc, vp);
  672. JSString *str;
  673. size_t i;
  674. if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
  675. str = args.thisv().toString();
  676. i = size_t(args[0].toInt32());
  677. if (i >= str->length())
  678. goto out_of_range;
  679. } else {
  680. str = ThisToStringForStringProto(cx, args);
  681. if (!str)
  682. return false;
  683. double d = 0.0;
  684. if (args.length() > 0 && !ToInteger(cx, args[0], &d))
  685. return false;
  686. if (d < 0 || str->length() <= d)
  687. goto out_of_range;
  688. i = size_t(d);
  689. }
  690. str = cx->runtime->staticStrings.getUnitStringForElement(cx, str, i);
  691. if (!str)
  692. return false;
  693. args.rval() = StringValue(str);
  694. return true;
  695. out_of_range:
  696. args.rval() = StringValue(cx->runtime->emptyString);
  697. return true;
  698. }
  699. JSBool
  700. js_str_charCodeAt(JSContext *cx, uintN argc, Value *vp)
  701. {
  702. CallArgs args = CallArgsFromVp(argc, vp);
  703. JSString *str;
  704. size_t i;
  705. if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
  706. str = args.thisv().toString();
  707. i = size_t(args[0].toInt32());
  708. if (i >= str->length())
  709. goto out_of_range;
  710. } else {
  711. str = ThisToStringForStringProto(cx, args);
  712. if (!str)
  713. return false;
  714. double d = 0.0;
  715. if (args.length() > 0 && !ToInteger(cx, args[0], &d))
  716. return false;
  717. if (d < 0 || str->length() <= d)
  718. goto out_of_range;
  719. i = size_t(d);
  720. }
  721. const jschar *chars;
  722. chars = str->getChars(cx);
  723. if (!chars)
  724. return false;
  725. args.rval() = Int32Value(chars[i]);
  726. return true;
  727. out_of_range:
  728. args.rval() = DoubleValue(js_NaN);
  729. return true;
  730. }
  731. /*
  732. * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
  733. * The patlen argument must be positive and no greater than sBMHPatLenMax.
  734. *
  735. * Return the index of pat in text, or -1 if not found.
  736. */
  737. static const jsuint sBMHCharSetSize = 256; /* ISO-Latin-1 */
  738. static const jsuint sBMHPatLenMax = 255; /* skip table element is uint8_t */
  739. static const jsint sBMHBadPattern = -2; /* return value if pat is not ISO-Latin-1 */
  740. jsint
  741. js_BoyerMooreHorspool(const jschar *text, jsuint textlen,
  742. const jschar *pat, jsuint patlen)
  743. {
  744. uint8_t skip[sBMHCharSetSize];
  745. JS_ASSERT(0 < patlen && patlen <= sBMHPatLenMax);
  746. for (jsuint i = 0; i < sBMHCharSetSize; i++)
  747. skip[i] = (uint8_t)patlen;
  748. jsuint m = patlen - 1;
  749. for (jsuint i = 0; i < m; i++) {
  750. jschar c = pat[i];
  751. if (c >= sBMHCharSetSize)
  752. return sBMHBadPattern;
  753. skip[c] = (uint8_t)(m - i);
  754. }
  755. jschar c;
  756. for (jsuint k = m;
  757. k < textlen;
  758. k += ((c = text[k]) >= sBMHCharSetSize) ? patlen : skip[c]) {
  759. for (jsuint i = k, j = m; ; i--, j--) {
  760. if (text[i] != pat[j])
  761. break;
  762. if (j == 0)
  763. return static_cast<jsint>(i); /* safe: max string size */
  764. }
  765. }
  766. return -1;
  767. }
  768. struct MemCmp {
  769. typedef jsuint Extent;
  770. static JS_ALWAYS_INLINE Extent computeExtent(const jschar *, jsuint patlen) {
  771. return (patlen - 1) * sizeof(jschar);
  772. }
  773. static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
  774. return memcmp(p, t, extent) == 0;
  775. }
  776. };
  777. struct ManualCmp {
  778. typedef const jschar *Extent;
  779. static JS_ALWAYS_INLINE Extent computeExtent(const jschar *pat, jsuint patlen) {
  780. return pat + patlen;
  781. }
  782. static JS_ALWAYS_INLINE bool match(const jschar *p, const jschar *t, Extent extent) {
  783. for (; p != extent; ++p, ++t) {
  784. if (*p != *t)
  785. return false;
  786. }
  787. return true;
  788. }
  789. };
  790. template <class InnerMatch>
  791. static jsint
  792. UnrolledMatch(const jschar *text, jsuint textlen, const jschar *pat, jsuint patlen)
  793. {
  794. JS_ASSERT(patlen > 0 && textlen > 0);
  795. const jschar *textend = text + textlen - (patlen - 1);
  796. const jschar p0 = *pat;
  797. const jschar *const patNext = pat + 1;
  798. const typename InnerMatch::Extent extent = InnerMatch::computeExtent(pat, patlen);
  799. uint8_t fixup;
  800. const jschar *t = text;
  801. switch ((textend - t) & 7) {
  802. case 0: if (*t++ == p0) { fixup = 8; goto match; }
  803. case 7: if (*t++ == p0) { fixup = 7; goto match; }
  804. case 6: if (*t++ == p0) { fixup = 6; goto match; }
  805. case 5: if (*t++ == p0) { fixup = 5; goto match; }
  806. case 4: if (*t++ == p0) { fixup = 4; goto match; }
  807. case 3: if (*t++ == p0) { fixup = 3; goto match; }
  808. case 2: if (*t++ == p0) { fixup = 2; goto match; }
  809. case 1: if (*t++ == p0) { fixup = 1; goto match; }
  810. }
  811. while (t != textend) {
  812. if (t[0] == p0) { t += 1; fixup = 8; goto match; }
  813. if (t[1] == p0) { t += 2; fixup = 7; goto match; }
  814. if (t[2] == p0) { t += 3; fixup = 6; goto match; }
  815. if (t[3] == p0) { t += 4; fixup = 5; goto match; }
  816. if (t[4] == p0) { t += 5; fixup = 4; goto match; }
  817. if (t[5] == p0) { t += 6; fixup = 3; goto match; }
  818. if (t[6] == p0) { t += 7; fixup = 2; goto match; }
  819. if (t[7] == p0) { t += 8; fixup = 1; goto match; }
  820. t += 8;
  821. continue;
  822. do {
  823. if (*t++ == p0) {
  824. match:
  825. if (!InnerMatch::match(patNext, t, extent))
  826. goto failed_match;
  827. return t - text - 1;
  828. }
  829. failed_match:;
  830. } while (--fixup > 0);
  831. }
  832. return -1;
  833. }
  834. static JS_ALWAYS_INLINE jsint
  835. StringMatch(const jschar *text, jsuint textlen,
  836. const jschar *pat, jsuint patlen)
  837. {
  838. if (patlen == 0)
  839. return 0;
  840. if (textlen < patlen)
  841. return -1;
  842. #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
  843. /*
  844. * Given enough registers, the unrolled loop below is faster than the
  845. * following loop. 32-bit x86 does not have enough registers.
  846. */
  847. if (patlen == 1) {
  848. const jschar p0 = *pat;
  849. for (const jschar *c = text, *end = text + textlen; c != end; ++c) {
  850. if (*c == p0)
  851. return c - text;
  852. }
  853. return -1;
  854. }
  855. #endif
  856. /*
  857. * If the text or pattern string is short, BMH will be more expensive than
  858. * the basic linear scan due to initialization cost and a more complex loop
  859. * body. While the correct threshold is input-dependent, we can make a few
  860. * conservative observations:
  861. * - When |textlen| is "big enough", the initialization time will be
  862. * proportionally small, so the worst-case slowdown is minimized.
  863. * - When |patlen| is "too small", even the best case for BMH will be
  864. * slower than a simple scan for large |textlen| due to the more complex
  865. * loop body of BMH.
  866. * From this, the values for "big enough" and "too small" are determined
  867. * empirically. See bug 526348.
  868. */
  869. if (textlen >= 512 && patlen >= 11 && patlen <= sBMHPatLenMax) {
  870. jsint index = js_BoyerMooreHorspool(text, textlen, pat, patlen);
  871. if (index != sBMHBadPattern)
  872. return index;
  873. }
  874. /*
  875. * For big patterns with large potential overlap we want the SIMD-optimized
  876. * speed of memcmp. For small patterns, a simple loop is faster.
  877. *
  878. * FIXME: Linux memcmp performance is sad and the manual loop is faster.
  879. */
  880. return
  881. #if !defined(__linux__)
  882. patlen > 128 ? UnrolledMatch<MemCmp>(text, textlen, pat, patlen)
  883. :
  884. #endif
  885. UnrolledMatch<ManualCmp>(text, textlen, pat, patlen);
  886. }
  887. static const size_t sRopeMatchThresholdRatioLog2 = 5;
  888. /*
  889. * RopeMatch takes the text to search, the patern to search for in the text.
  890. * RopeMatch returns false on OOM and otherwise returns the match index through
  891. * the 'match' outparam (-1 for not found).
  892. */
  893. static bool
  894. RopeMatch(JSContext *cx, JSString *textstr, const jschar *pat, jsuint patlen, jsint *match)
  895. {
  896. JS_ASSERT(textstr->isRope());
  897. if (patlen == 0) {
  898. *match = 0;
  899. return true;
  900. }
  901. if (textstr->length() < patlen) {
  902. *match = -1;
  903. return true;
  904. }
  905. /*
  906. * List of leaf nodes in the rope. If we run out of memory when trying to
  907. * append to this list, we can still fall back to StringMatch, so use the
  908. * system allocator so we don't report OOM in that case.
  909. */
  910. Vector<JSLinearString *, 16, SystemAllocPolicy> strs;
  911. /*
  912. * We don't want to do rope matching if there is a poor node-to-char ratio,
  913. * since this means spending a lot of time in the match loop below. We also
  914. * need to build the list of leaf nodes. Do both here: iterate over the
  915. * nodes so long as there are not too many.
  916. */
  917. {
  918. size_t textstrlen = textstr->length();
  919. size_t threshold = textstrlen >> sRopeMatchThresholdRatioLog2;
  920. StringSegmentRange r(cx);
  921. if (!r.init(textstr))
  922. return false;
  923. while (!r.empty()) {
  924. if (threshold-- == 0 || !strs.append(r.front())) {
  925. const jschar *chars = textstr->getChars(cx);
  926. if (!chars)
  927. return false;
  928. *match = StringMatch(chars, textstrlen, pat, patlen);
  929. return true;
  930. }
  931. if (!r.popFront())
  932. return false;
  933. }
  934. }
  935. /* Absolute offset from the beginning of the logical string textstr. */
  936. jsint pos = 0;
  937. for (JSLinearString **outerp = strs.begin(); outerp != strs.end(); ++outerp) {
  938. /* Try to find a match within 'outer'. */
  939. JSLinearString *outer = *outerp;
  940. const jschar *chars = outer->chars();
  941. size_t len = outer->length();
  942. jsint matchResult = StringMatch(chars, len, pat, patlen);
  943. if (matchResult != -1) {
  944. /* Matched! */
  945. *match = pos + matchResult;
  946. return true;
  947. }
  948. /* Try to find a match starting in 'outer' and running into other nodes. */
  949. const jschar *const text = chars + (patlen > len ? 0 : len - patlen + 1);
  950. const jschar *const textend = chars + len;
  951. const jschar p0 = *pat;
  952. const jschar *const p1 = pat + 1;
  953. const jschar *const patend = pat + patlen;
  954. for (const jschar *t = text; t != textend; ) {
  955. if (*t++ != p0)
  956. continue;
  957. JSLinearString **innerp = outerp;
  958. const jschar *ttend = textend;
  959. for (const jschar *pp = p1, *tt = t; pp != patend; ++pp, ++tt) {
  960. while (tt == ttend) {
  961. if (++innerp == strs.end()) {
  962. *match = -1;
  963. return true;
  964. }
  965. JSLinearString *inner = *innerp;
  966. tt = inner->chars();
  967. ttend = tt + inner->length();
  968. }
  969. if (*pp != *tt)
  970. goto break_continue;
  971. }
  972. /* Matched! */
  973. *match = pos + (t - chars) - 1; /* -1 because of *t++ above */
  974. return true;
  975. break_continue:;
  976. }
  977. pos += len;
  978. }
  979. *match = -1;
  980. return true;
  981. }
  982. static JSBool
  983. str_indexOf(JSContext *cx, uintN argc, Value *vp)
  984. {
  985. CallArgs args = CallArgsFromVp(argc, vp);
  986. JSString *str = ThisToStringForStringProto(cx, args);
  987. if (!str)
  988. return false;
  989. JSLinearString *patstr = ArgToRootedString(cx, args, 0);
  990. if (!patstr)
  991. return false;
  992. jsuint textlen = str->length();
  993. const jschar *text = str->getChars(cx);
  994. if (!text)
  995. return false;
  996. jsuint patlen = patstr->length();
  997. const jschar *pat = patstr->chars();
  998. jsuint start;
  999. if (args.length() > 1) {
  1000. if (args[1].isInt32()) {
  1001. jsint i = args[1].toInt32();
  1002. if (i <= 0) {
  1003. start = 0;
  1004. } else if (jsuint(i) > textlen) {
  1005. start = textlen;
  1006. textlen = 0;
  1007. } else {
  1008. start = i;
  1009. text += start;
  1010. textlen -= start;
  1011. }
  1012. } else {
  1013. jsdouble d;
  1014. if (!ToInteger(cx, args[1], &d))
  1015. return false;
  1016. if (d <= 0) {
  1017. start = 0;
  1018. } else if (d > textlen) {
  1019. start = textlen;
  1020. textlen = 0;
  1021. } else {
  1022. start = (jsint)d;
  1023. text += start;
  1024. textlen -= start;
  1025. }
  1026. }
  1027. } else {
  1028. start = 0;
  1029. }
  1030. jsint match = StringMatch(text, textlen, pat, patlen);
  1031. args.rval() = Int32Value((match == -1) ? -1 : start + match);
  1032. return true;
  1033. }
  1034. static JSBool
  1035. str_lastIndexOf(JSContext *cx, uintN argc, Value *vp)
  1036. {
  1037. CallArgs args = CallArgsFromVp(argc, vp);
  1038. JSString *textstr = ThisToStringForStringProto(cx, args);
  1039. if (!textstr)
  1040. return false;
  1041. size_t textlen = textstr->length();
  1042. const jschar *text = textstr->getChars(cx);
  1043. if (!text)
  1044. return false;
  1045. JSLinearString *patstr = ArgToRootedString(cx, args, 0);
  1046. if (!patstr)
  1047. return false;
  1048. size_t patlen = patstr->length();
  1049. const jschar *pat = patstr->chars();
  1050. jsint i = textlen - patlen; // Start searching here
  1051. if (i < 0) {
  1052. args.rval() = Int32Value(-1);
  1053. return true;
  1054. }
  1055. if (args.length() > 1) {
  1056. if (args[1].isInt32()) {
  1057. jsint j = args[1].toInt32();
  1058. if (j <= 0)
  1059. i = 0;
  1060. else if (j < i)
  1061. i = j;
  1062. } else {
  1063. double d;
  1064. if (!ToNumber(cx, args[1], &d))
  1065. return false;
  1066. if (!JSDOUBLE_IS_NaN(d)) {
  1067. d = js_DoubleToInteger(d);
  1068. if (d <= 0)
  1069. i = 0;
  1070. else if (d < i)
  1071. i = (jsint)d;
  1072. }
  1073. }
  1074. }
  1075. if (patlen == 0) {
  1076. args.rval() = Int32Value(i);
  1077. return true;
  1078. }
  1079. const jschar *t = text + i;
  1080. const jschar *textend = text - 1;
  1081. const jschar p0 = *pat;
  1082. const jschar *patNext = pat + 1;
  1083. const jschar *patEnd = pat + patlen;
  1084. for (; t != textend; --t) {
  1085. if (*t == p0) {
  1086. const jschar *t1 = t + 1;
  1087. for (const jschar *p1 = patNext; p1 != patEnd; ++p1, ++t1) {
  1088. if (*t1 != *p1)
  1089. goto break_continue;
  1090. }
  1091. args.rval() = Int32Value(t - text);
  1092. return true;
  1093. }
  1094. break_continue:;
  1095. }
  1096. args.rval() = Int32Value(-1);
  1097. return true;
  1098. }
  1099. static JSBool
  1100. js_TrimString(JSContext *cx, Value *vp, JSBool trimLeft, JSBool trimRight)
  1101. {
  1102. CallReceiver call = CallReceiverFromVp(vp);
  1103. JSString *str = ThisToStringForStringProto(cx, call);
  1104. if (!str)
  1105. return false;
  1106. size_t length = str->length();
  1107. const jschar *chars = str->getChars(cx);
  1108. if (!chars)
  1109. return false;
  1110. size_t begin = 0;
  1111. size_t end = length;
  1112. if (trimLeft) {
  1113. while (begin < length && unicode::IsSpace(chars[begin]))
  1114. ++begin;
  1115. }
  1116. if (trimRight) {
  1117. while (end > begin && unicode::IsSpace(chars[end - 1]))
  1118. --end;
  1119. }
  1120. str = js_NewDependentString(cx, str, begin, end - begin);
  1121. if (!str)
  1122. return false;
  1123. call.rval() = StringValue(str);
  1124. return true;
  1125. }
  1126. static JSBool
  1127. str_trim(JSContext *cx, uintN argc, Value *vp)
  1128. {
  1129. return js_TrimString(cx, vp, JS_TRUE, JS_TRUE);
  1130. }
  1131. static JSBool
  1132. str_trimLeft(JSContext *cx, uintN argc, Value *vp)
  1133. {
  1134. return js_TrimString(cx, vp, JS_TRUE, JS_FALSE);
  1135. }
  1136. static JSBool
  1137. str_trimRight(JSContext *cx, uintN argc, Value *vp)
  1138. {
  1139. return js_TrimString(cx, vp, JS_FALSE, JS_TRUE);
  1140. }
  1141. /*
  1142. * Perl-inspired string functions.
  1143. */
  1144. /* Result of a successfully performed flat match. */
  1145. class FlatMatch
  1146. {
  1147. JSAtom *patstr;
  1148. const jschar *pat;
  1149. size_t patlen;
  1150. int32_t match_;
  1151. friend class RegExpGuard;
  1152. public:
  1153. FlatMatch() : patstr(NULL) {} /* Old GCC wants this initialization. */
  1154. JSLinearString *pattern() const { return patstr; }
  1155. size_t patternLength() const { return patlen; }
  1156. /*
  1157. * Note: The match is -1 when the match is performed successfully,
  1158. * but no match is found.
  1159. */
  1160. int32_t match() const { return match_; }
  1161. };
  1162. static inline bool
  1163. IsRegExpMetaChar(jschar c)
  1164. {
  1165. switch (c) {
  1166. /* Taken from the PatternCharacter production in 15.10.1. */
  1167. case '^': case '$': case '\\': case '.': case '*': case '+':
  1168. case '?': case '(': case ')': case '[': case ']': case '{':
  1169. case '}': case '|':
  1170. return true;
  1171. default:
  1172. return false;
  1173. }
  1174. }
  1175. static inline bool
  1176. HasRegExpMetaChars(const jschar *chars, size_t length)
  1177. {
  1178. for (size_t i = 0; i < length; ++i) {
  1179. if (IsRegExpMetaChar(chars[i]))
  1180. return true;
  1181. }
  1182. return false;
  1183. }
  1184. /*
  1185. * RegExpGuard factors logic out of String regexp operations.
  1186. *
  1187. * |optarg| indicates in which argument position RegExp flags will be found, if
  1188. * present. This is a Mozilla extension and not part of any ECMA spec.
  1189. */
  1190. class RegExpGuard
  1191. {
  1192. RegExpGuard(const RegExpGuard &) MOZ_DELETE;
  1193. void operator=(const RegExpGuard &) MOZ_DELETE;
  1194. RegExpShared::Guard re_;
  1195. FlatMatch fm;
  1196. /*
  1197. * Upper bound on the number of characters we are willing to potentially
  1198. * waste on searching for RegExp meta-characters.
  1199. */
  1200. static const size_t MAX_FLAT_PAT_LEN = 256;
  1201. static JSAtom *
  1202. flattenPattern(JSContext *cx, JSAtom *patstr)
  1203. {
  1204. StringBuffer sb(cx);
  1205. if (!sb.reserve(patstr->length()))
  1206. return NULL;
  1207. static const jschar ESCAPE_CHAR = '\\';
  1208. const jschar *chars = patstr->chars();
  1209. size_t len = patstr->length();
  1210. for (const jschar *it = chars; it != chars + len; ++it) {
  1211. if (IsRegExpMetaChar(*it)) {
  1212. if (!sb.append(ESCAPE_CHAR) || !sb.append(*it))
  1213. return NULL;
  1214. } else {
  1215. if (!sb.append(*it))
  1216. return NULL;
  1217. }
  1218. }
  1219. return sb.finishAtom();
  1220. }
  1221. public:
  1222. RegExpGuard() {}
  1223. /* init must succeed in order to call tryFlatMatch or normalizeRegExp. */
  1224. bool init(JSContext *cx, CallArgs args, bool convertVoid = false)
  1225. {
  1226. if (args.length() != 0 && IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
  1227. RegExpShared *shared = RegExpToShared(cx, args[0].toObject());
  1228. if (!shared)
  1229. return false;
  1230. re_.init(*shared);
  1231. } else {
  1232. if (convertVoid && (args.length() == 0 || args[0].isUndefined())) {
  1233. fm.patstr = cx->runtime->emptyString;
  1234. return true;
  1235. }
  1236. JSString *arg = ArgToRootedString(cx, args, 0);
  1237. if (!arg)
  1238. return false;
  1239. fm.patstr = js_AtomizeString(cx, arg);
  1240. if (!fm.patstr)
  1241. return false;
  1242. }
  1243. return true;
  1244. }
  1245. /*
  1246. * Attempt to match |patstr| to |textstr|. A flags argument, metachars in the
  1247. * pattern string, or a lengthy pattern string can thwart this process.
  1248. *
  1249. * |checkMetaChars| looks for regexp metachars in the pattern string.
  1250. *
  1251. * Return whether flat matching could be used.
  1252. *
  1253. * N.B. tryFlatMatch returns NULL on OOM, so the caller must check cx->isExceptionPending().
  1254. */
  1255. const FlatMatch *
  1256. tryFlatMatch(JSContext *cx, JSString *textstr, uintN optarg, uintN argc,
  1257. bool checkMetaChars = true)
  1258. {
  1259. if (re_.initialized())
  1260. return NULL;
  1261. fm.pat = fm.patstr->chars();
  1262. fm.patlen = fm.patstr->length();
  1263. if (optarg < argc)
  1264. return NULL;
  1265. if (checkMetaChars &&
  1266. (fm.patlen > MAX_FLAT_PAT_LEN || HasRegExpMetaChars(fm.pat, fm.patlen))) {
  1267. return NULL;
  1268. }
  1269. /*
  1270. * textstr could be a rope, so we want to avoid flattening it for as
  1271. * long as possible.
  1272. */
  1273. if (textstr->isRope()) {
  1274. if (!RopeMatch(cx, textstr, fm.pat, fm.patlen, &fm.match_))
  1275. return NULL;
  1276. } else {
  1277. const jschar *text = textstr->asLinear().chars();
  1278. size_t textlen = textstr->length();
  1279. fm.match_ = StringMatch(text, textlen, fm.pat, fm.patlen);
  1280. }
  1281. return &fm;
  1282. }
  1283. /* If the pattern is not already a regular expression, make it so. */
  1284. bool normalizeRegExp(JSContext *cx, bool flat, uintN optarg, CallArgs args)
  1285. {
  1286. if (re_.initialized())
  1287. return true;
  1288. /* Build RegExp from pattern string. */
  1289. JSString *opt;
  1290. if (optarg < args.length()) {
  1291. opt = ToString(cx, args[optarg]);
  1292. if (!opt)
  1293. return false;
  1294. } else {
  1295. opt = NULL;
  1296. }
  1297. JSAtom *patstr;
  1298. if (flat) {
  1299. patstr = flattenPattern(cx, fm.patstr);
  1300. if (!patstr)
  1301. return false;
  1302. } else {
  1303. patstr = fm.patstr;
  1304. }
  1305. JS_ASSERT(patstr);
  1306. RegExpShared *re = cx->compartment->regExps.get(cx, patstr, opt);
  1307. if (!re)
  1308. return false;
  1309. re_.init(*re);
  1310. return true;
  1311. }
  1312. RegExpShared &regExp() { return *re_; }
  1313. };
  1314. /* ExecuteRegExp indicates success in two ways, based on the 'test' flag. */
  1315. static JS_ALWAYS_INLINE bool
  1316. Matched(RegExpExecType type, const Value &v)
  1317. {
  1318. return (type == RegExpTest) ? v.isTrue() : !v.isNull();
  1319. }
  1320. typedef bool (*DoMatchCallback)(JSContext *cx, RegExpStatics *res, size_t count, void *data);
  1321. /*
  1322. * BitOR-ing these flags allows the DoMatch caller to control when how the
  1323. * RegExp engine is called and when callbacks are fired.
  1324. */
  1325. enum MatchControlFlags {
  1326. TEST_GLOBAL_BIT = 0x1, /* use RegExp.test for global regexps */
  1327. TEST_SINGLE_BIT = 0x2, /* use RegExp.test for non-global regexps */
  1328. CALLBACK_ON_SINGLE_BIT = 0x4, /* fire callback on non-global match */
  1329. MATCH_ARGS = TEST_GLOBAL_BIT,
  1330. MATCHALL_ARGS = CALLBACK_ON_SINGLE_BIT,
  1331. REPLACE_ARGS = TEST_GLOBAL_BIT | TEST_SINGLE_BIT | CALLBACK_ON_SINGLE_BIT
  1332. };
  1333. /* Factor out looping and matching logic. */
  1334. static bool
  1335. DoMatch(JSContext *cx, RegExpStatics *res, JSString *str, RegExpShared &re,
  1336. DoMatchCallback callback, void *data, MatchControlFlags flags, Value *rval)
  1337. {
  1338. JSLinearString *linearStr = str->ensureLinear(cx);
  1339. if (!linearStr)
  1340. return false;
  1341. const jschar *chars = linearStr->chars();
  1342. size_t length = linearStr->length();
  1343. if (re.global()) {
  1344. RegExpExecType type = (flags & TEST_GLOBAL_BIT) ? RegExpTest : RegExpExec;
  1345. for (size_t count = 0, i = 0, length = str->length(); i <= length; ++count) {
  1346. if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
  1347. return false;
  1348. if (!Matched(type, *rval))
  1349. break;
  1350. if (!callback(cx, res, count, data))
  1351. return false;
  1352. if (!res->matched())
  1353. ++i;
  1354. }
  1355. } else {
  1356. RegExpExecType type = (flags & TEST_SINGLE_BIT) ? RegExpTest : RegExpExec;
  1357. bool callbackOnSingle = !!(flags & CALLBACK_ON_SINGLE_BIT);
  1358. size_t i = 0;
  1359. if (!ExecuteRegExp(cx, res, re, linearStr, chars, length, &i, type, rval))
  1360. return false;
  1361. if (callbackOnSingle && Matched(type, *rval) && !callback(cx, res, 0, data))
  1362. return false;
  1363. }
  1364. return true;
  1365. }
  1366. static bool
  1367. BuildFlatMatchArray(JSContext *cx, JSString *textstr, const FlatMatch &fm, CallArgs *args)
  1368. {
  1369. if (fm.match() < 0) {
  1370. args->rval() = NullValue();
  1371. return true;
  1372. }
  1373. /* For this non-global match, produce a RegExp.exec-style array. */
  1374. JSObject *obj = NewSlowEmptyArray(cx);
  1375. if (!obj)
  1376. return false;
  1377. if (!obj->defineElement(cx, 0, StringValue(fm.pattern())) ||
  1378. !obj->defineProperty(cx, cx->runtime->atomState.indexAtom, Int32Value(fm.match())) ||
  1379. !obj->defineProperty(cx, cx->runtime->atomState.inputAtom, StringValue(textstr)))
  1380. {
  1381. return false;
  1382. }
  1383. args->rval() = ObjectValue(*obj);
  1384. return true;
  1385. }
  1386. typedef JSObject **MatchArgType;
  1387. /*
  1388. * DoMatch will only callback on global matches, hence this function builds
  1389. * only the "array of matches" returned by match on global regexps.
  1390. */
  1391. static bool
  1392. MatchCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
  1393. {
  1394. JS_ASSERT(count <= JSID_INT_MAX); /* by max string length */
  1395. JSObject *&arrayobj = *static_cast<MatchArgType>(p);
  1396. if (!arrayobj) {
  1397. arrayobj = NewDenseEmptyArray(cx);
  1398. if (!arrayobj)
  1399. return false;
  1400. }
  1401. Value v;
  1402. return res->createLastMatch(cx, &v) && arrayobj->defineElement(cx, count, v);
  1403. }
  1404. JSBool
  1405. js::str_match(JSContext *cx, uintN argc, Value *vp)
  1406. {
  1407. CallArgs args = CallArgsFromVp(argc, vp);
  1408. JSString *str = ThisToStringForStringProto(cx, args);
  1409. if (!str)
  1410. return false;
  1411. RegExpGuard g;
  1412. if (!g.init(cx, args, true))
  1413. return false;
  1414. if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length()))
  1415. return BuildFlatMatchArray(cx, str, *fm, &args);
  1416. /* Return if there was an error in tryFlatMatch. */
  1417. if (cx->isExceptionPending())
  1418. return false;
  1419. if (!g.normalizeRegExp(cx, false, 1, args))
  1420. return false;
  1421. JSObject *array = NULL;
  1422. MatchArgType arg = &array;
  1423. RegExpStatics *res = cx->regExpStatics();
  1424. Value rval;
  1425. if (!DoMatch(cx, res, str, g.regExp(), MatchCallback, arg, MATCH_ARGS, &rval))
  1426. return false;
  1427. if (g.regExp().global())
  1428. args.rval() = ObjectOrNullValue(array);
  1429. else
  1430. args.rval() = rval;
  1431. return true;
  1432. }
  1433. JSBool
  1434. js::str_search(JSContext *cx, uintN argc, Value *vp)
  1435. {
  1436. CallArgs args = CallArgsFromVp(argc, vp);
  1437. JSString *str = ThisToStringForStringProto(cx, args);
  1438. if (!str)
  1439. return false;
  1440. RegExpGuard g;
  1441. if (!g.init(cx, args, true))
  1442. return false;
  1443. if (const FlatMatch *fm = g.tryFlatMatch(cx, str, 1, args.length())) {
  1444. args.rval() = Int32Value(fm->match());
  1445. return true;
  1446. }
  1447. if (cx->isExceptionPending()) /* from tryFlatMatch */
  1448. return false;
  1449. if (!g.normalizeRegExp(cx, false, 1, args))
  1450. return false;
  1451. JSLinearString *linearStr = str->ensureLinear(cx);
  1452. if (!linearStr)
  1453. return false;
  1454. const jschar *chars = linearStr->chars();
  1455. size_t length = linearStr->length();
  1456. RegExpStatics *res = cx->regExpStatics();
  1457. /* Per ECMAv5 15.5.4.12 (5) The last index property is ignored and left unchanged. */
  1458. size_t i = 0;
  1459. Value result;
  1460. if (!ExecuteRegExp(cx, res, g.regExp(), linearStr, chars, length, &i, RegExpTest, &result))
  1461. return false;
  1462. if (result.isTrue())
  1463. args.rval() = Int32Value(res->matchStart());
  1464. else
  1465. args.rval() = Int32Value(-1);
  1466. return true;
  1467. }
  1468. struct ReplaceData
  1469. {
  1470. ReplaceData(JSContext *cx)
  1471. : sb(cx)
  1472. {}
  1473. JSString *str; /* 'this' parameter object as a string */
  1474. RegExpGuard g; /* regexp parameter object and private data */
  1475. JSObject *lambda; /* replacement function object or null */
  1476. JSObject *elembase; /* object for function(a){return b[a]} replace */
  1477. JSLinearString *repstr; /* replacement string */
  1478. const jschar *dollar; /* null or pointer to first $ in repstr */
  1479. const jschar *dollarEnd; /* limit pointer for js_strchr_limit */
  1480. jsint leftIndex; /* left context index in str->chars */
  1481. JSSubString dollarStr; /* for "$$" InterpretDollar result */
  1482. bool calledBack; /* record whether callback has been called */
  1483. InvokeArgsGuard args; /* arguments for lambda call */
  1484. StringBuffer sb; /* buffer built during DoMatch */
  1485. };
  1486. static bool
  1487. InterpretDollar(JSContext *cx, RegExpStatics *res, const jschar *dp, const jschar *ep,
  1488. ReplaceData &rdata, JSSubString *out, size_t *skip)
  1489. {
  1490. JS_ASSERT(*dp == '$');
  1491. /* If there is only a dollar, bail now */
  1492. if (dp + 1 >= ep)
  1493. return false;
  1494. /* Interpret all Perl match-induced dollar variables. */
  1495. jschar dc = dp[1];
  1496. if (JS7_ISDEC(dc)) {
  1497. /* ECMA-262 Edition 3: 1-9 or 01-99 */
  1498. uintN num = JS7_UNDEC(dc);
  1499. if (num > res->parenCount())
  1500. return false;
  1501. const jschar *cp = dp + 2;
  1502. if (cp < ep && (dc = *cp, JS7_ISDEC(dc))) {
  1503. uintN tmp = 10 * num + JS7_UNDEC(dc);
  1504. if (tmp <= res->parenCount()) {
  1505. cp++;
  1506. num = tmp;
  1507. }
  1508. }
  1509. if (num == 0)
  1510. return false;
  1511. *skip = cp - dp;
  1512. JS_ASSERT(num <= res->parenCount());
  1513. /*
  1514. * Note: we index to get the paren with the (1-indexed) pair
  1515. * number, as opposed to a (0-indexed) paren number.
  1516. */
  1517. res->getParen(num, out);
  1518. return true;
  1519. }
  1520. *skip = 2;
  1521. switch (dc) {
  1522. case '$':
  1523. rdata.dollarStr.chars = dp;
  1524. rdata.dollarStr.length = 1;
  1525. *out = rdata.dollarStr;
  1526. return true;
  1527. case '&':
  1528. res->getLastMatch(out);
  1529. return true;
  1530. case '+':
  1531. res->getLastParen(out);
  1532. return true;
  1533. case '`':
  1534. res->getLeftContext(out);
  1535. return true;
  1536. case '\'':
  1537. res->getRightContext(out);
  1538. return true;
  1539. }
  1540. return false;
  1541. }
  1542. static bool
  1543. FindReplaceLength(JSContext *cx, RegExpStatics *res, ReplaceData &rdata, size_t *sizep)
  1544. {
  1545. JSObject *base = rdata.elembase;
  1546. if (base) {
  1547. /*
  1548. * The base object is used when replace was passed a lambda which looks like
  1549. * 'function(a) { return b[a]; }' for the base object b. b will not change
  1550. * in the course of the replace unless we end up making a scripted call due
  1551. * to accessing a scripted getter or a value with a scripted toString.
  1552. */
  1553. JS_ASSERT(rdata.lambda);
  1554. JS_ASSERT(!base->getOps()->lookupProperty);
  1555. JS_ASSERT(!base->getOps()->getProperty);
  1556. Value match;
  1557. if (!res->createLastMatch(cx, &match))
  1558. return false;
  1559. JSString *str = match.toString();
  1560. JSAtom *atom;
  1561. if (str->isAtom()) {
  1562. atom = &str->asAtom();
  1563. } else {
  1564. atom = js_AtomizeString(cx, str);
  1565. if (!atom)
  1566. return false;
  1567. }
  1568. jsid id = ATOM_TO_JSID(atom);
  1569. JSObject *holder;
  1570. JSProperty *prop = NULL;
  1571. if (!LookupPropertyWithFlags(cx, base, id, JSRESOLVE_QUALIFIED, &holder, &prop))
  1572. return false;
  1573. /* Only handle the case where the property exists and is on this object. */
  1574. if (prop && holder == base) {
  1575. Shape *shape = (Shape *) prop;
  1576. if (shape->hasSlot() && shape->hasDefaultGetter()) {
  1577. Value value = base->getSlot(shape->slot());
  1578. if (value.isString()) {
  1579. rdata.repstr = value.toString()->ensureLinear(cx);
  1580. if (!rdata.repstr)
  1581. return false;
  1582. *sizep = rdata.repstr->length();
  1583. return true;
  1584. }
  1585. }
  1586. }
  1587. /*
  1588. * Couldn't handle this property, fall through and despecialize to the
  1589. * general lambda case.
  1590. */
  1591. rdata.elembase = NULL;
  1592. }
  1593. JSObject *lambda = rdata.lambda;
  1594. if (lambda) {
  1595. PreserveRegExpStatics staticsGuard(res);
  1596. if (!staticsGuard.init(cx))
  1597. return false;
  1598. /*
  1599. * In the lambda case, not only do we find the replacement string's
  1600. * length, we compute repstr and return it via rdata for use within
  1601. * DoReplace. The lambda is called with arguments ($&, $1, $2, ...,
  1602. * index, input), i.e., all the properties of a regexp match array.
  1603. * For $&, etc., we must create string jsvals from cx->regExpStatics.
  1604. * We grab up stack space to keep the newborn strings GC-rooted.
  1605. */
  1606. uintN p = res->parenCount();
  1607. uintN argc = 1 + p + 2;
  1608. InvokeArgsGuard &args = rdata.args;
  1609. if (!args.pushed() && !cx->stack.pushInvokeArgs(cx, argc, &args))
  1610. return false;
  1611. args.setCallee(ObjectValue(*lambda));
  1612. args.thisv() = UndefinedValue();
  1613. /* Push $&, $1, $2, ... */
  1614. uintN argi = 0;
  1615. if (!res->createLastMatch(cx, &args[argi++]))
  1616. return false;
  1617. for (size_t i = 0; i < res->parenCount(); ++i) {
  1618. if (!res->createParen(cx, i + 1, &args[argi++]))
  1619. return false;
  1620. }
  1621. /* Push match index and input string. */
  1622. args[argi++].setInt32(res->matchStart());
  1623. args[argi].setString(rdata.str);
  1624. if (!Invoke(cx, args))
  1625. return false;
  1626. /* root repstr: rdata is on the stack, so scanned by conservative gc. */
  1627. JSString *repstr = ToString(cx, args.rval());
  1628. if (!repstr)
  1629. return false;
  1630. rdata.repstr = repstr->ensureLinear(cx);
  1631. if (!rdata.repstr)
  1632. return false;
  1633. *sizep = rdata.repstr->length();
  1634. return true;
  1635. }
  1636. JSString *repstr = rdata.repstr;
  1637. size_t replen = repstr->length();
  1638. for (const jschar *dp = rdata.dollar, *ep = rdata.dollarEnd; dp;
  1639. dp = js_strchr_limit(dp, '$', ep)) {
  1640. JSSubString sub;
  1641. size_t skip;
  1642. if (InterpretDollar(cx, res, dp, ep, rdata, &sub, &skip)) {
  1643. replen += sub.length - skip;
  1644. dp += skip;
  1645. } else {
  1646. dp++;
  1647. }
  1648. }
  1649. *sizep = replen;
  1650. return true;
  1651. }
  1652. /*
  1653. * Precondition: |rdata.sb| already has necessary growth space reserved (as
  1654. * derived from FindReplaceLength).
  1655. */
  1656. static void
  1657. DoReplace(JSContext *cx, RegExpStatics *res, ReplaceData &rdata)
  1658. {
  1659. JSLinearString *repstr = rdata.repstr;
  1660. const jschar *cp;
  1661. const jschar *bp = cp = repstr->chars();
  1662. const jschar *dp = rdata.dollar;
  1663. const jschar *ep = rdata.dollarEnd;
  1664. for (; dp; dp = js_strchr_limit(dp, '$', ep)) {
  1665. /* Move one of the constant portions of the replacement value. */
  1666. size_t len = dp - cp;
  1667. rdata.sb.infallibleAppend(cp, len);
  1668. cp = dp;
  1669. JSSubString sub;
  1670. size_t skip;
  1671. if (InterpretDollar(cx, res, dp, ep, rdata, &sub, &skip)) {
  1672. len = sub.length;
  1673. rdata.sb.infallibleAppend(sub.chars, len);
  1674. cp += skip;
  1675. dp += skip;
  1676. } else {
  1677. dp++;
  1678. }
  1679. }
  1680. rdata.sb.infallibleAppend(cp, repstr->length() - (cp - bp));
  1681. }
  1682. static bool
  1683. ReplaceRegExpCallback(JSContext *cx, RegExpStatics *res, size_t count, void *p)
  1684. {
  1685. ReplaceData &rdata = *static_cast<ReplaceData *>(p);
  1686. rdata.calledBack = true;
  1687. JSLinearString &str = rdata.str->asLinear(); /* flattened for regexp */
  1688. size_t leftoff = rdata.leftIndex;
  1689. const jschar *left = str.chars() + leftoff;
  1690. size_t leftlen = res->matchStart() - leftoff;
  1691. rdata.leftIndex = res->matchLimit();
  1692. size_t replen = 0; /* silence 'unused' warning */
  1693. if (!FindReplaceLength(cx, res, rdata, &replen))
  1694. return false;
  1695. size_t growth = leftlen + replen;
  1696. if (!rdata.sb.reserve(rdata.sb.length() + growth))
  1697. return false;
  1698. rdata.sb.infallibleAppend(left, leftlen); /* skipped-over portion of the search value */
  1699. DoReplace(cx, res, rdata);
  1700. return true;
  1701. }
  1702. static bool
  1703. BuildFlatReplacement(JSContext *cx, JSString *textstr, JSString *repstr,
  1704. const FlatMatch &fm, CallArgs *args)
  1705. {
  1706. RopeBuilder builder(cx);
  1707. size_t match = fm.match();
  1708. size_t matchEnd = match + fm.patternLength();
  1709. if (textstr->isRope()) {
  1710. /*
  1711. * If we are replacing over a rope, avoid flattening it by iterating
  1712. * through it, building a new rope.
  1713. */
  1714. StringSegmentRange r(cx);
  1715. if (!r.init(textstr))
  1716. return false;
  1717. size_t pos = 0;
  1718. while (!r.empty()) {
  1719. JSString *str = r.front();
  1720. size_t len = str->length();
  1721. size_t strEnd = pos + len;
  1722. if (pos < matchEnd && strEnd > match) {
  1723. /*
  1724. * We need to special-case any part of the rope that overlaps
  1725. * with the replacement string.
  1726. */
  1727. if (match >= pos) {
  1728. /*
  1729. * If this part of the rope overlaps with the left side of
  1730. * the pattern, then it must be the only one to overlap with
  1731. * the first character in the pattern, so we include the
  1732. * replacement string here.
  1733. */
  1734. JSString *leftSide = js_NewDependentString(cx, str, 0, match - pos);
  1735. if (!leftSide ||
  1736. !builder.append(leftSide) ||
  1737. !builder.append(repstr)) {
  1738. return false;
  1739. }
  1740. }
  1741. /*
  1742. * If str runs off the end of the matched string, append the
  1743. * last part of str.
  1744. */
  1745. if (strEnd > matchEnd) {
  1746. JSString *rightSide = js_NewDependentString(cx, str, matchEnd - pos,
  1747. strEnd - matchEnd);
  1748. if (!rightSide || !builder.append(rightSide))
  1749. return false;
  1750. }
  1751. } else {
  1752. if (!builder.append(str))
  1753. return false;
  1754. }
  1755. pos += str->length();
  1756. if (!r.popFront())
  1757. return false;
  1758. }
  1759. } else {
  1760. JSString *leftSide = js_NewDependentString(cx, textstr, 0, match);
  1761. if (!leftSide)
  1762. return false;
  1763. JSString *rightSide = js_NewDependentString(cx, textstr, match + fm.patternLength(),
  1764. textstr->length() - match - fm.patternLength());
  1765. if (!rightSide ||
  1766. !builder.append(leftSide) ||
  1767. !builder.append(repstr) ||
  1768. !builder.append(rightSide)) {
  1769. return false;
  1770. }
  1771. }
  1772. args->rval() = StringValue(builder.result());
  1773. return true;
  1774. }
  1775. /*
  1776. * Perform a linear-scan dollar substitution on the replacement text,
  1777. * constructing a result string that looks like:
  1778. *
  1779. * newstring = string[:matchStart] + dollarSub(replaceValue) + string[matchLimit:]
  1780. */
  1781. static inline bool
  1782. BuildDollarReplacement(JSContext *cx, JSString *textstrArg, JSLinearString *repstr,
  1783. const jschar *firstDollar, const FlatMatch &fm, CallArgs *args)
  1784. {
  1785. JSLinearString *textstr = textstrArg->ensureLinear(cx);
  1786. if (!textstr)
  1787. return NULL;
  1788. JS_ASSERT(repstr->chars() <= firstDollar && firstDollar < repstr->chars() + repstr->length());
  1789. size_t matchStart = fm.match();
  1790. size_t matchLimit = matchStart + fm.patternLength();
  1791. /*
  1792. * Most probably:
  1793. *
  1794. * len(newstr) >= len(orig) - len(match) + len(replacement)
  1795. *
  1796. * Note that dollar vars _could_ make the resulting text smaller than this.
  1797. */
  1798. StringBuffer newReplaceChars(cx);
  1799. if (!newReplaceChars.reserve(textstr->length() - fm.patternLength() + repstr->length()))
  1800. return false;
  1801. /* Move the pre-dollar chunk in bulk. */
  1802. newReplaceChars.infallibleAppend(repstr->chars(), firstDollar);
  1803. /* Move the rest char-by-char, interpreting dollars as we encounter them. */
  1804. #define ENSURE(__cond) if (!(__cond)) return false;
  1805. const jschar *repstrLimit = repstr->chars() + repstr->length();
  1806. for (const jschar *it = firstDollar; it < repstrLimit; ++it) {
  1807. if (*it != '$' || it == repstrLimit - 1) {
  1808. ENSURE(newReplaceChars.append(*it));
  1809. continue;
  1810. }
  1811. switch (*(it + 1)) {
  1812. case '$': /* Eat one of the dollars. */
  1813. ENSURE(newReplaceChars.append(*it));
  1814. break;
  1815. case '&':
  1816. ENSURE(newReplaceChars.append(textstr->chars() + matchStart,
  1817. textstr->chars() + matchLimit));
  1818. break;
  1819. case '`':
  1820. ENSURE(newReplaceChars.append(textstr->chars(), textstr->chars() + matchStart));
  1821. break;
  1822. case '\'':
  1823. ENSURE(newReplaceChars.append(textstr->chars() + matchLimit,
  1824. textstr->chars() + textstr->length()));
  1825. break;
  1826. default: /* The dollar we saw was not special (no matter what its mother told it). */
  1827. ENSURE(newReplaceChars.append(*it));
  1828. continue;
  1829. }
  1830. ++it; /* We always eat an extra char in the above switch. */
  1831. }
  1832. JSString *leftSide = js_NewDependentString(cx, textstr, 0, matchStart);
  1833. ENSURE(leftSide);
  1834. JSString *newReplace = newReplaceChars.finishString();
  1835. ENSURE(newReplace);
  1836. JS_ASSERT(textstr->length() >= matchLimit);
  1837. JSString *rightSide = js_NewDependentString(cx, textstr, matchLimit,
  1838. textstr->length() - matchLimit);
  1839. ENSURE(rightSide);
  1840. RopeBuilder builder(cx);
  1841. ENSURE(builder.append(leftSide) &&
  1842. builder.append(newReplace) &&
  1843. builder.append(rightSide));
  1844. #undef ENSURE
  1845. args->rval() = StringValue(builder.result());
  1846. return true;
  1847. }
  1848. static inline bool
  1849. str_replace_regexp(JSContext *cx, CallArgs args, ReplaceData &rdata)
  1850. {
  1851. if (!rdata.g.normalizeRegExp(cx, true, 2, args))
  1852. return false;
  1853. rdata.leftIndex = 0;
  1854. rdata.calledBack = false;
  1855. RegExpStatics *res = cx->regExpStatics();
  1856. RegExpShared &re = rdata.g.regExp();
  1857. Value tmp;
  1858. if (!DoMatch(cx, res, rdata.str, re, ReplaceRegExpCallback, &rdata, REPLACE_ARGS, &tmp))
  1859. return false;
  1860. if (!rdata.calledBack) {
  1861. /* Didn't match, so the string is unmodified. */
  1862. args.rval() = StringValue(rdata.str);
  1863. return true;
  1864. }
  1865. JSSubString sub;
  1866. res->getRightContext(&sub);
  1867. if (!rdata.sb.append(sub.chars, sub.length))
  1868. return false;
  1869. JSString *retstr = rdata.sb.finishString();
  1870. if (!retstr)
  1871. return false;
  1872. args.rval() = StringValue(retstr);
  1873. return true;
  1874. }
  1875. static inline bool
  1876. str_replace_flat_lambda(JSContext *cx, CallArgs outerArgs, ReplaceData &rdata, const FlatMatch &fm)
  1877. {
  1878. JS_ASSERT(fm.match() >= 0);
  1879. JSString *matchStr = js_NewDependentString(cx, rdata.str, fm.match(), fm.patternLength());
  1880. if (!matchStr)
  1881. return false;
  1882. /* lambda(matchStr, matchStart, textstr) */
  1883. static const uint32_t lambdaArgc = 3;
  1884. if (!cx->stack.pushInvokeArgs(cx, lambdaArgc, &rdata.args))
  1885. return false;
  1886. CallArgs &args = rdata.args;
  1887. args.calleev().setObject(*rdata.lambda);
  1888. args.thisv().setUndefined();
  1889. Value *sp = args.array();
  1890. sp[0].setString(matchStr);
  1891. sp[1].setInt32(fm.match());
  1892. sp[2].setString(rdata.str);
  1893. if (!Invoke(cx, rdata.args))
  1894. return false;
  1895. JSString *repstr = ToString(cx, args.rval());
  1896. if (!repstr)
  1897. return false;
  1898. JSString *leftSide = js_NewDependentString(cx, rdata.str, 0, fm.match());
  1899. if (!leftSide)
  1900. return false;
  1901. size_t matchLimit = fm.match() + fm.patternLength();
  1902. JSString *rightSide = js_NewDependentString(cx, rdata.str, matchLimit,
  1903. rdata.str->length() - matchLimit);
  1904. if (!rightSide)
  1905. return false;
  1906. RopeBuilder builder(cx);
  1907. if (!(builder.append(leftSide) &&
  1908. builder.append(repstr) &&
  1909. builder.append(rightSide))) {
  1910. return false;
  1911. }
  1912. outerArgs.rval() = StringValue(builder.result());
  1913. return true;
  1914. }
  1915. static const uint32_t ReplaceOptArg = 2;
  1916. JSBool
  1917. js::str_replace(JSContext *cx, uintN argc, Value *vp)
  1918. {
  1919. CallArgs args = CallArgsFromVp(argc, vp);
  1920. ReplaceData rdata(cx);
  1921. rdata.str = ThisToStringForStringProto(cx, args);
  1922. if (!rdata.str)
  1923. return false;
  1924. if (!rdata.g.init(cx, args))
  1925. return false;
  1926. /* Extract replacement string/function. */
  1927. if (args.length() >= ReplaceOptArg && js_IsCallable(args[1])) {
  1928. rdata.lambda = &args[1].toObject();
  1929. rdata.elembase = NULL;
  1930. rdata.repstr = NULL;
  1931. rdata.dollar = rdata.dollarEnd = NULL;
  1932. if (rdata.lambda->isFunction()) {
  1933. JSFunction *fun = rdata.lambda->toFunction();
  1934. if (fun->isInterpreted()) {
  1935. /*
  1936. * Pattern match the script to check if it is is indexing into a
  1937. * particular object, e.g. 'function(a) { return b[a]; }'. Avoid
  1938. * calling the script in such cases, which are used by javascript
  1939. * packers (particularly the popular Dean Edwards packer) to efficiently
  1940. * encode large scripts. We only handle the code patterns generated
  1941. * by such packers here.
  1942. */
  1943. JSScript *script = fun->script();
  1944. jsbytecode *pc = script->code;
  1945. Value table = UndefinedValue();
  1946. if (JSOp(*pc) == JSOP_GETFCSLOT) {
  1947. table = fun->getFlatClosureUpvar(GET_UINT16(pc));
  1948. pc += JSOP_GETFCSLOT_LENGTH;
  1949. }
  1950. if (table.isObject() &&
  1951. JSOp(*pc) == JSOP_GETARG && GET_SLOTNO(pc) == 0 &&
  1952. JSOp(pc[JSOP_GETARG_LENGTH]) == JSOP_GETELEM &&
  1953. JSOp(pc[JSOP_GETARG_LENGTH + JSOP_GETELEM_LENGTH]) == JSOP_RETURN) {
  1954. Class *clasp = table.toObject().getClass();
  1955. if (clasp->isNative() &&
  1956. !clasp->ops.lookupProperty &&
  1957. !clasp->ops.getProperty) {
  1958. rdata.elembase = &table.toObject();
  1959. }
  1960. }
  1961. }
  1962. }
  1963. } else {
  1964. rdata.lambda = NULL;
  1965. rdata.elembase = NULL;
  1966. rdata.repstr = ArgToRootedString(cx, args, 1);
  1967. if (!rdata.repstr)
  1968. return false;
  1969. /* We're about to store pointers into the middle of our string. */
  1970. JSFixedString *fixed = rdata.repstr->ensureFixed(cx);
  1971. if (!fixed)
  1972. return false;
  1973. rdata.dollarEnd = fixed->chars() + fixed->length();
  1974. rdata.dollar = js_strchr_limit(fixed->chars(), '$', rdata.dollarEnd);
  1975. }
  1976. /*
  1977. * Unlike its |String.prototype| brethren, |replace| doesn't convert
  1978. * its input to a regular expression. (Even if it contains metachars.)
  1979. *
  1980. * However, if the user invokes our (non-standard) |flags| argument
  1981. * extension then we revert to creating a regular expression. Note that
  1982. * this is observable behavior through the side-effect mutation of the
  1983. * |RegExp| statics.
  1984. */
  1985. const FlatMatch *fm = rdata.g.tryFlatMatch(cx, rdata.str, ReplaceOptArg, args.length(), false);
  1986. if (!fm) {
  1987. if (cx->isExceptionPending()) /* oom in RopeMatch in tryFlatMatch */
  1988. return false;
  1989. return str_replace_regexp(cx, args, rdata);
  1990. }
  1991. if (fm->match() < 0) {
  1992. args.rval() = StringValue(rdata.str);
  1993. return true;
  1994. }
  1995. if (rdata.lambda)
  1996. return str_replace_flat_lambda(cx, args, rdata, *fm);
  1997. /*
  1998. * Note: we could optimize the text.length == pattern.length case if we wanted,
  1999. * even in the presence of dollar metachars.
  2000. */
  2001. if (rdata.dollar)
  2002. return BuildDollarReplacement(cx, rdata.str, rdata.repstr, rdata.dollar, *fm, &args);
  2003. return BuildFlatReplacement(cx, rdata.str, rdata.repstr, *fm, &args);
  2004. }
  2005. class SplitMatchResult {
  2006. size_t endIndex_;
  2007. size_t length_;
  2008. public:
  2009. void setFailure() {
  2010. JS_STATIC_ASSERT(SIZE_MAX > JSString::MAX_LENGTH);
  2011. endIndex_ = SIZE_MAX;
  2012. }
  2013. bool isFailure() const {
  2014. return (endIndex_ == SIZE_MAX);
  2015. }
  2016. size_t endIndex() const {
  2017. JS_ASSERT(!isFailure());
  2018. return endIndex_;
  2019. }
  2020. size_t length() const {
  2021. JS_ASSERT(!isFailure());
  2022. return length_;
  2023. }
  2024. void setResult(size_t length, size_t endIndex) {
  2025. length_ = length;
  2026. endIndex_ = endIndex;
  2027. }
  2028. };
  2029. template<class Matcher>
  2030. static JSObject *
  2031. SplitHelper(JSContext *cx, JSLinearString *str, uint32_t limit, Matcher splitMatch, TypeObject *type)
  2032. {
  2033. size_t strLength = str->length();
  2034. SplitMatchResult result;
  2035. /* Step 11. */
  2036. if (strLength == 0) {
  2037. if (!splitMatch(cx, str, 0, &result))
  2038. return NULL;
  2039. /*
  2040. * NB: Unlike in the non-empty string case, it's perfectly fine
  2041. * (indeed the spec requires it) if we match at the end of the
  2042. * string. Thus these cases should hold:
  2043. *
  2044. * var a = "".split("");
  2045. * assertEq(a.length, 0);
  2046. * var b = "".split(/.?/);
  2047. * assertEq(b.length, 0);
  2048. */
  2049. if (!result.isFailure())
  2050. return NewDenseEmptyArray(cx);
  2051. Value v = StringValue(str);
  2052. return NewDenseCopiedArray(cx, 1, &v);
  2053. }
  2054. /* Step 12. */
  2055. size_t lastEndIndex = 0;
  2056. size_t index = 0;
  2057. /* Step 13. */
  2058. AutoValueVector splits(cx);
  2059. while (index < strLength) {
  2060. /* Step 13(a). */
  2061. if (!splitMatch(cx, str, index, &result))
  2062. return NULL;
  2063. /*
  2064. * Step 13(b).
  2065. *
  2066. * Our match algorithm differs from the spec in that it returns the
  2067. * next index at which a match happens. If no match happens we're
  2068. * done.
  2069. *
  2070. * But what if the match is at the end of the string (and the string is
  2071. * not empty)? Per 13(c)(ii) this shouldn't be a match, so we have to
  2072. * specially exclude it. Thus this case should hold:
  2073. *
  2074. * var a = "abc".split(/\b/);
  2075. * assertEq(a.length, 1);
  2076. * assertEq(a[0], "abc");
  2077. */
  2078. if (result.isFailure())
  2079. break;
  2080. /* Step 13(c)(i). */
  2081. size_t sepLength = result.length();
  2082. size_t endIndex = result.endIndex();
  2083. if (sepLength == 0 && endIndex == strLength)
  2084. break;
  2085. /* Step 13(c)(ii). */
  2086. if (endIndex == lastEndIndex) {
  2087. index++;
  2088. continue;
  2089. }
  2090. /* Step 13(c)(iii). */
  2091. JS_ASSERT(lastEndIndex < endIndex);
  2092. JS_ASSERT(sepLength <= strLength);
  2093. JS_ASSERT(lastEndIndex + sepLength <= endIndex);
  2094. /* Steps 13(c)(iii)(1-3). */
  2095. size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
  2096. JSString *sub = js_NewDependentString(cx, str, lastEndIndex, subLength);
  2097. if (!sub || !splits.append(StringValue(sub)))
  2098. return NULL;
  2099. /* Step 13(c)(iii)(4). */
  2100. if (splits.length() == limit)
  2101. return NewDenseCopiedArray(cx, splits.length(), splits.begin());
  2102. /* Step 13(c)(iii)(5). */
  2103. lastEndIndex = endIndex;
  2104. /* Step 13(c)(iii)(6-7). */
  2105. if (Matcher::returnsCaptures) {
  2106. RegExpStatics *res = cx->regExpStatics();
  2107. for (size_t i = 0; i < res->parenCount(); i++) {
  2108. /* Steps 13(c)(iii)(7)(a-c). */
  2109. if (res->pairIsPresent(i + 1)) {
  2110. JSSubString parsub;
  2111. res->getParen(i + 1, &parsub);
  2112. sub = js_NewStringCopyN(cx, parsub.chars, parsub.length);
  2113. if (!sub || !splits.append(StringValue(sub)))
  2114. return NULL;
  2115. } else {
  2116. /* Only string entries have been accounted for so far. */
  2117. AddTypeProperty(cx, type, NULL, UndefinedValue());
  2118. if (!splits.append(UndefinedValue()))
  2119. return NULL;
  2120. }
  2121. /* Step 13(c)(iii)(7)(d). */
  2122. if (splits.length() == limit)
  2123. return NewDenseCopiedArray(cx, splits.length(), splits.begin());
  2124. }
  2125. }
  2126. /* Step 13(c)(iii)(8). */
  2127. index = lastEndIndex;
  2128. }
  2129. /* Steps 14-15. */
  2130. JSString *sub = js_NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
  2131. if (!sub || !splits.append(StringValue(sub)))
  2132. return NULL;
  2133. /* Step 16. */
  2134. return NewDenseCopiedArray(cx, splits.length(), splits.begin());
  2135. }
  2136. /*
  2137. * The SplitMatch operation from ES5 15.5.4.14 is implemented using different
  2138. * paths for regular expression and string separators.
  2139. *
  2140. * The algorithm differs from the spec in that the we return the next index at
  2141. * which a match happens.
  2142. */
  2143. class SplitRegExpMatcher
  2144. {
  2145. RegExpShared &re;
  2146. RegExpStatics *res;
  2147. public:
  2148. SplitRegExpMatcher(RegExpShared &re, RegExpStatics *res) : re(re), res(res) {}
  2149. static const bool returnsCaptures = true;
  2150. bool operator()(JSContext *cx, JSLinearString *str, size_t index, SplitMatchResult *result)
  2151. {
  2152. Value rval = UndefinedValue();
  2153. const jschar *chars = str->chars();
  2154. size_t length = str->length();
  2155. if (!ExecuteRegExp(cx, res, re, str, chars, length, &index, RegExpTest, &rval))
  2156. return false;
  2157. if (!rval.isTrue()) {
  2158. result->setFailure();
  2159. return true;
  2160. }
  2161. JSSubString sep;
  2162. res->getLastMatch(&sep);
  2163. result->setResult(sep.length, index);
  2164. return true;
  2165. }
  2166. };
  2167. class SplitStringMatcher
  2168. {
  2169. const jschar *sepChars;
  2170. size_t sepLength;
  2171. public:
  2172. SplitStringMatcher(JSLinearString *sep) {
  2173. sepChars = sep->chars();
  2174. sepLength = sep->length();
  2175. }
  2176. static const bool returnsCaptures = false;
  2177. bool operator()(JSContext *cx, JSLinearString *str, size_t index, SplitMatchResult *res)
  2178. {
  2179. JS_ASSERT(index == 0 || index < str->length());
  2180. const jschar *chars = str->chars();
  2181. jsint match = StringMatch(chars + index, str->length() - index, sepChars, sepLength);
  2182. if (match == -1)
  2183. res->setFailure();
  2184. else
  2185. res->setResult(sepLength, index + match + sepLength);
  2186. return true;
  2187. }
  2188. };
  2189. /* ES5 15.5.4.14 */
  2190. JSBool
  2191. js::str_split(JSContext *cx, uintN argc, Value *vp)
  2192. {
  2193. CallArgs args = CallArgsFromVp(argc, vp);
  2194. /* Steps 1-2. */
  2195. JSString *str = ThisToStringForStringProto(cx, args);
  2196. if (!str)
  2197. return false;
  2198. TypeObject *type = GetTypeCallerInitObject(cx, JSProto_Array);
  2199. if (!type)
  2200. return false;
  2201. AddTypeProperty(cx, type, NULL, Type::StringType());
  2202. /* Step 5: Use the second argument as the split limit, if given. */
  2203. uint32_t limit;
  2204. if (args.length() > 1 && !args[1].isUndefined()) {
  2205. jsdouble d;
  2206. if (!ToNumber(cx, args[1], &d))
  2207. return false;
  2208. limit = js_DoubleToECMAUint32(d);
  2209. } else {
  2210. limit = UINT32_MAX;
  2211. }
  2212. /* Step 8. */
  2213. RegExpShared::Guard re;
  2214. JSLinearString *sepstr = NULL;
  2215. bool sepUndefined = (args.length() == 0 || args[0].isUndefined());
  2216. if (!sepUndefined) {
  2217. if (IsObjectWithClass(args[0], ESClass_RegExp, cx)) {
  2218. RegExpShared *shared = RegExpToShared(cx, args[0].toObject());
  2219. if (!shared)
  2220. return false;
  2221. re.init(*shared);
  2222. } else {
  2223. sepstr = ArgToRootedString(cx, args, 0);
  2224. if (!sepstr)
  2225. return false;
  2226. }
  2227. }
  2228. /* Step 9. */
  2229. if (limit == 0) {
  2230. JSObject *aobj = NewDenseEmptyArray(cx);
  2231. if (!aobj)
  2232. return false;
  2233. aobj->setType(type);
  2234. args.rval() = ObjectValue(*aobj);
  2235. return true;
  2236. }
  2237. /* Step 10. */
  2238. if (sepUndefined) {
  2239. Value v = StringValue(str);
  2240. JSObject *aobj = NewDenseCopiedArray(cx, 1, &v);
  2241. if (!aobj)
  2242. return false;
  2243. aobj->setType(type);
  2244. args.rval() = ObjectValue(*aobj);
  2245. return true;
  2246. }
  2247. JSLinearString *strlin = str->ensureLinear(cx);
  2248. if (!strlin)
  2249. return false;
  2250. /* Steps 11-15. */
  2251. JSObject *aobj;
  2252. if (!re.initialized())
  2253. aobj = SplitHelper(cx, strlin, limit, SplitStringMatcher(sepstr), type);
  2254. else
  2255. aobj = SplitHelper(cx, strlin, limit, SplitRegExpMatcher(*re, cx->regExpStatics()), type);
  2256. if (!aobj)
  2257. return false;
  2258. /* Step 16. */
  2259. aobj->setType(type);
  2260. args.rval() = ObjectValue(*aobj);
  2261. return true;
  2262. }
  2263. #if JS_HAS_PERL_SUBSTR
  2264. static JSBool
  2265. str_substr(JSContext *cx, uintN argc, Value *vp)
  2266. {
  2267. CallArgs args = CallArgsFromVp(argc, vp);
  2268. JSString *str = ThisToStringForStringProto(cx, args);
  2269. if (!str)
  2270. return false;
  2271. int32_t length, len, begin;
  2272. if (args.length() > 0) {
  2273. length = int32_t(str->length());
  2274. if (!ValueToIntegerRange(cx, args[0], &begin))
  2275. return false;
  2276. if (begin >= length) {
  2277. str = cx->runtime->emptyString;
  2278. goto out;
  2279. }
  2280. if (begin < 0) {
  2281. begin += length; /* length + INT_MIN will always be less than 0 */
  2282. if (begin < 0)
  2283. begin = 0;
  2284. }
  2285. if (args.length() == 1 || args[1].isUndefined()) {
  2286. len = length - begin;
  2287. } else {
  2288. if (!ValueToIntegerRange(cx, args[1], &len))
  2289. return false;
  2290. if (len <= 0) {
  2291. str = cx->runtime->emptyString;
  2292. goto out;
  2293. }
  2294. if (uint32_t(length) < uint32_t(begin + len))
  2295. len = length - begin;
  2296. }
  2297. str = js_NewDependentString(cx, str, size_t(begin), size_t(len));
  2298. if (!str)
  2299. return false;
  2300. }
  2301. out:
  2302. args.rval() = StringValue(str);
  2303. return true;
  2304. }
  2305. #endif /* JS_HAS_PERL_SUBSTR */
  2306. /*
  2307. * Python-esque sequence operations.
  2308. */
  2309. static JSBool
  2310. str_concat(JSContext *cx, uintN argc, Value *vp)
  2311. {
  2312. CallArgs args = CallArgsFromVp(argc, vp);
  2313. JSString *str = ThisToStringForStringProto(cx, args);
  2314. if (!str)
  2315. return false;
  2316. for (uintN i = 0; i < args.length(); i++) {
  2317. JSString *argStr = ToString(cx, args[i]);
  2318. if (!argStr)
  2319. return false;
  2320. str = js_ConcatStrings(cx, str, argStr);
  2321. if (!str)
  2322. return false;
  2323. }
  2324. args.rval() = StringValue(str);
  2325. return true;
  2326. }
  2327. static JSBool
  2328. str_slice(JSContext *cx, uintN argc, Value *vp)
  2329. {
  2330. CallArgs args = CallArgsFromVp(argc, vp);
  2331. if (args.length() == 1 && args.thisv().isString() && args[0].isInt32()) {
  2332. size_t begin, end, length;
  2333. JSString *str = args.thisv().toString();
  2334. begin = args[0].toInt32();
  2335. end = str->length();
  2336. if (begin <= end) {
  2337. length = end - begin;
  2338. if (length == 0) {
  2339. str = cx->runtime->emptyString;
  2340. } else {
  2341. str = (length == 1)
  2342. ? cx->runtime->staticStrings.getUnitStringForElement(cx, str, begin)
  2343. : js_NewDependentString(cx, str, begin, length);
  2344. if (!str)
  2345. return false;
  2346. }
  2347. args.rval() = StringValue(str);
  2348. return true;
  2349. }
  2350. }
  2351. JSString *str = ThisToStringForStringProto(cx, args);
  2352. if (!str)
  2353. return false;
  2354. if (args.length() != 0) {
  2355. double begin, end, length;
  2356. if (!ToInteger(cx, args[0], &begin))
  2357. return false;
  2358. length = str->length();
  2359. if (begin < 0) {
  2360. begin += length;
  2361. if (begin < 0)
  2362. begin = 0;
  2363. } else if (begin > length) {
  2364. begin = length;
  2365. }
  2366. if (args.length() == 1 || args[1].isUndefined()) {
  2367. end = length;
  2368. } else {
  2369. if (!ToInteger(cx, args[1], &end))
  2370. return false;
  2371. if (end < 0) {
  2372. end += length;
  2373. if (end < 0)
  2374. end = 0;
  2375. } else if (end > length) {
  2376. end = length;
  2377. }
  2378. if (end < begin)
  2379. end = begin;
  2380. }
  2381. str = js_NewDependentString(cx, str,
  2382. (size_t)begin,
  2383. (size_t)(end - begin));
  2384. if (!str)
  2385. return false;
  2386. }
  2387. args.rval() = StringValue(str);
  2388. return true;
  2389. }
  2390. #if JS_HAS_STR_HTML_HELPERS
  2391. /*
  2392. * HTML composition aids.
  2393. */
  2394. static bool
  2395. tagify(JSContext *cx, const char *begin, JSLinearString *param, const char *end,
  2396. CallReceiver call)
  2397. {
  2398. JSString *thisstr = ThisToStringForStringProto(cx, call);
  2399. if (!thisstr)
  2400. return false;
  2401. JSLinearString *str = thisstr->ensureLinear(cx);
  2402. if (!str)
  2403. return false;
  2404. if (!end)
  2405. end = begin;
  2406. size_t beglen = strlen(begin);
  2407. size_t taglen = 1 + beglen + 1; /* '<begin' + '>' */
  2408. size_t parlen = 0; /* Avoid warning. */
  2409. if (param) {
  2410. parlen = param->length();
  2411. taglen += 2 + parlen + 1; /* '="param"' */
  2412. }
  2413. size_t endlen = strlen(end);
  2414. taglen += str->length() + 2 + endlen + 1; /* 'str</end>' */
  2415. if (taglen >= ~(size_t)0 / sizeof(jschar)) {
  2416. js_ReportAllocationOverflow(cx);
  2417. return false;
  2418. }
  2419. jschar *tagbuf = (jschar *) cx->malloc_((taglen + 1) * sizeof(jschar));
  2420. if (!tagbuf)
  2421. return false;
  2422. size_t j = 0;
  2423. tagbuf[j++] = '<';
  2424. for (size_t i = 0; i < beglen; i++)
  2425. tagbuf[j++] = (jschar)begin[i];
  2426. if (param) {
  2427. tagbuf[j++] = '=';
  2428. tagbuf[j++] = '"';
  2429. js_strncpy(&tagbuf[j], param->chars(), parlen);
  2430. j += parlen;
  2431. tagbuf[j++] = '"';
  2432. }
  2433. tagbuf[j++] = '>';
  2434. js_strncpy(&tagbuf[j], str->chars(), str->length());
  2435. j += str->length();
  2436. tagbuf[j++] = '<';
  2437. tagbuf[j++] = '/';
  2438. for (size_t i = 0; i < endlen; i++)
  2439. tagbuf[j++] = (jschar)end[i];
  2440. tagbuf[j++] = '>';
  2441. JS_ASSERT(j == taglen);
  2442. tagbuf[j] = 0;
  2443. JSString *retstr = js_NewString(cx, tagbuf, taglen);
  2444. if (!retstr) {
  2445. Foreground::free_((char *)tagbuf);
  2446. return false;
  2447. }
  2448. call.rval() = StringValue(retstr);
  2449. return true;
  2450. }
  2451. static JSBool
  2452. tagify_value(JSContext *cx, CallArgs args, const char *begin, const char *end)
  2453. {
  2454. JSLinearString *param = ArgToRootedString(cx, args, 0);
  2455. if (!param)
  2456. return false;
  2457. return tagify(cx, begin, param, end, args);
  2458. }
  2459. static JSBool
  2460. str_bold(JSContext *cx, uintN argc, Value *vp)
  2461. {
  2462. return tagify(cx, "b", NULL, NULL, CallReceiverFromVp(vp));
  2463. }
  2464. static JSBool
  2465. str_italics(JSContext *cx, uintN argc, Value *vp)
  2466. {
  2467. return tagify(cx, "i", NULL, NULL, CallReceiverFromVp(vp));
  2468. }
  2469. static JSBool
  2470. str_fixed(JSContext *cx, uintN argc, Value *vp)
  2471. {
  2472. return tagify(cx, "tt", NULL, NULL, CallReceiverFromVp(vp));
  2473. }
  2474. static JSBool
  2475. str_fontsize(JSContext *cx, uintN argc, Value *vp)
  2476. {
  2477. return tagify_value(cx, CallArgsFromVp(argc, vp), "font size", "font");
  2478. }
  2479. static JSBool
  2480. str_fontcolor(JSContext *cx, uintN argc, Value *vp)
  2481. {
  2482. return tagify_value(cx, CallArgsFromVp(argc, vp), "font color", "font");
  2483. }
  2484. static JSBool
  2485. str_link(JSContext *cx, uintN argc, Value *vp)
  2486. {
  2487. return tagify_value(cx, CallArgsFromVp(argc, vp), "a href", "a");
  2488. }
  2489. static JSBool
  2490. str_anchor(JSContext *cx, uintN argc, Value *vp)
  2491. {
  2492. return tagify_value(cx, CallArgsFromVp(argc, vp), "a name", "a");
  2493. }
  2494. static JSBool
  2495. str_strike(JSContext *cx, uintN argc, Value *vp)
  2496. {
  2497. return tagify(cx, "strike", NULL, NULL, CallReceiverFromVp(vp));
  2498. }
  2499. static JSBool
  2500. str_small(JSContext *cx, uintN argc, Value *vp)
  2501. {
  2502. return tagify(cx, "small", NULL, NULL, CallReceiverFromVp(vp));
  2503. }
  2504. static JSBool
  2505. str_big(JSContext *cx, uintN argc, Value *vp)
  2506. {
  2507. return tagify(cx, "big", NULL, NULL, CallReceiverFromVp(vp));
  2508. }
  2509. static JSBool
  2510. str_blink(JSContext *cx, uintN argc, Value *vp)
  2511. {
  2512. return tagify(cx, "blink", NULL, NULL, CallReceiverFromVp(vp));
  2513. }
  2514. static JSBool
  2515. str_sup(JSContext *cx, uintN argc, Value *vp)
  2516. {
  2517. return tagify(cx, "sup", NULL, NULL, CallReceiverFromVp(vp));
  2518. }
  2519. static JSBool
  2520. str_sub(JSContext *cx, uintN argc, Value *vp)
  2521. {
  2522. return tagify(cx, "sub", NULL, NULL, CallReceiverFromVp(vp));
  2523. }
  2524. #endif /* JS_HAS_STR_HTML_HELPERS */
  2525. static JSFunctionSpec string_methods[] = {
  2526. #if JS_HAS_TOSOURCE
  2527. JS_FN("quote", str_quote, 0,JSFUN_GENERIC_NATIVE),
  2528. JS_FN(js_toSource_str, str_toSource, 0,0),
  2529. #endif
  2530. /* Java-like methods. */
  2531. JS_FN(js_toString_str, js_str_toString, 0,0),
  2532. JS_FN(js_valueOf_str, js_str_toString, 0,0),
  2533. JS_FN("substring", str_substring, 2,JSFUN_GENERIC_NATIVE),
  2534. JS_FN("toLowerCase", str_toLowerCase, 0,JSFUN_GENERIC_NATIVE),
  2535. JS_FN("toUpperCase", str_toUpperCase, 0,JSFUN_GENERIC_NATIVE),
  2536. JS_FN("charAt", js_str_charAt, 1,JSFUN_GENERIC_NATIVE),
  2537. JS_FN("charCodeAt", js_str_charCodeAt, 1,JSFUN_GENERIC_NATIVE),
  2538. JS_FN("indexOf", str_indexOf, 1,JSFUN_GENERIC_NATIVE),
  2539. JS_FN("lastIndexOf", str_lastIndexOf, 1,JSFUN_GENERIC_NATIVE),
  2540. JS_FN("trim", str_trim, 0,JSFUN_GENERIC_NATIVE),
  2541. JS_FN("trimLeft", str_trimLeft, 0,JSFUN_GENERIC_NATIVE),
  2542. JS_FN("trimRight", str_trimRight, 0,JSFUN_GENERIC_NATIVE),
  2543. JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0,JSFUN_GENERIC_NATIVE),
  2544. JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0,JSFUN_GENERIC_NATIVE),
  2545. JS_FN("localeCompare", str_localeCompare, 1,JSFUN_GENERIC_NATIVE),
  2546. /* Perl-ish methods (search is actually Python-esque). */
  2547. JS_FN("match", str_match, 1,JSFUN_GENERIC_NATIVE),
  2548. JS_FN("search", str_search, 1,JSFUN_GENERIC_NATIVE),
  2549. JS_FN("replace", str_replace, 2,JSFUN_GENERIC_NATIVE),
  2550. JS_FN("split", str_split, 2,JSFUN_GENERIC_NATIVE),
  2551. #if JS_HAS_PERL_SUBSTR
  2552. JS_FN("substr", str_substr, 2,JSFUN_GENERIC_NATIVE),
  2553. #endif
  2554. /* Python-esque sequence methods. */
  2555. JS_FN("concat", str_concat, 1,JSFUN_GENERIC_NATIVE),
  2556. JS_FN("slice", str_slice, 2,JSFUN_GENERIC_NATIVE),
  2557. /* HTML string methods. */
  2558. #if JS_HAS_STR_HTML_HELPERS
  2559. JS_FN("bold", str_bold, 0,0),
  2560. JS_FN("italics", str_italics, 0,0),
  2561. JS_FN("fixed", str_fixed, 0,0),
  2562. JS_FN("fontsize", str_fontsize, 1,0),
  2563. JS_FN("fontcolor", str_fontcolor, 1,0),
  2564. JS_FN("link", str_link, 1,0),
  2565. JS_FN("anchor", str_anchor, 1,0),
  2566. JS_FN("strike", str_strike, 0,0),
  2567. JS_FN("small", str_small, 0,0),
  2568. JS_FN("big", str_big, 0,0),
  2569. JS_FN("blink", str_blink, 0,0),
  2570. JS_FN("sup", str_sup, 0,0),
  2571. JS_FN("sub", str_sub, 0,0),
  2572. #endif
  2573. JS_FS_END
  2574. };
  2575. JSBool
  2576. js_String(JSContext *cx, uintN argc, Value *vp)
  2577. {
  2578. CallArgs args = CallArgsFromVp(argc, vp);
  2579. JSString *str;
  2580. if (args.length() > 0) {
  2581. str = ToString(cx, args[0]);
  2582. if (!str)
  2583. return false;
  2584. } else {
  2585. str = cx->runtime->emptyString;
  2586. }
  2587. if (IsConstructing(args)) {
  2588. StringObject *strobj = StringObject::create(cx, str);
  2589. if (!strobj)
  2590. return false;
  2591. args.rval() = ObjectValue(*strobj);
  2592. return true;
  2593. }
  2594. args.rval() = StringValue(str);
  2595. return true;
  2596. }
  2597. JSBool
  2598. js::str_fromCharCode(JSContext *cx, uintN argc, Value *vp)
  2599. {
  2600. CallArgs args = CallArgsFromVp(argc, vp);
  2601. JS_ASSERT(args.length() <= StackSpace::ARGS_LENGTH_MAX);
  2602. if (args.length() == 1) {
  2603. uint16_t code;
  2604. if (!ValueToUint16(cx, args[0], &code))
  2605. return JS_FALSE;
  2606. if (StaticStrings::hasUnit(code)) {
  2607. args.rval() = StringValue(cx->runtime->staticStrings.getUnit(code));
  2608. return JS_TRUE;
  2609. }
  2610. args[0].setInt32(code);
  2611. }
  2612. jschar *chars = (jschar *) cx->malloc_((args.length() + 1) * sizeof(jschar));
  2613. if (!chars)
  2614. return JS_FALSE;
  2615. for (uintN i = 0; i < args.length(); i++) {
  2616. uint16_t code;
  2617. if (!ValueToUint16(cx, args[i], &code)) {
  2618. cx->free_(chars);
  2619. return JS_FALSE;
  2620. }
  2621. chars[i] = (jschar)code;
  2622. }
  2623. chars[args.length()] = 0;
  2624. JSString *str = js_NewString(cx, chars, args.length());
  2625. if (!str) {
  2626. cx->free_(chars);
  2627. return JS_FALSE;
  2628. }
  2629. args.rval() = StringValue(str);
  2630. return JS_TRUE;
  2631. }
  2632. static JSFunctionSpec string_static_methods[] = {
  2633. JS_FN("fromCharCode", js::str_fromCharCode, 1, 0),
  2634. JS_FS_END
  2635. };
  2636. Shape *
  2637. StringObject::assignInitialShape(JSContext *cx)
  2638. {
  2639. JS_ASSERT(nativeEmpty());
  2640. return addDataProperty(cx, ATOM_TO_JSID(cx->runtime->atomState.lengthAtom),
  2641. LENGTH_SLOT, JSPROP_PERMANENT | JSPROP_READONLY);
  2642. }
  2643. JSObject *
  2644. js_InitStringClass(JSContext *cx, JSObject *obj)
  2645. {
  2646. JS_ASSERT(obj->isNative());
  2647. GlobalObject *global = &obj->asGlobal();
  2648. JSObject *proto = global->createBlankPrototype(cx, &StringClass);
  2649. if (!proto || !proto->asString().init(cx, cx->runtime->emptyString))
  2650. return NULL;
  2651. /* Now create the String function. */
  2652. JSFunction *ctor = global->createConstructor(cx, js_String, &StringClass,
  2653. CLASS_ATOM(cx, String), 1);
  2654. if (!ctor)
  2655. return NULL;
  2656. if (!LinkConstructorAndPrototype(cx, ctor, proto))
  2657. return NULL;
  2658. if (!DefinePropertiesAndBrand(cx, proto, NULL, string_methods) ||
  2659. !DefinePropertiesAndBrand(cx, ctor, NULL, string_static_methods))
  2660. {
  2661. return NULL;
  2662. }
  2663. /* Capture normal data properties pregenerated for String objects. */
  2664. TypeObject *type = proto->getNewType(cx);
  2665. if (!type)
  2666. return NULL;
  2667. AddTypeProperty(cx, type, "length", Type::Int32Type());
  2668. if (!DefineConstructorAndPrototype(cx, global, JSProto_String, ctor, proto))
  2669. return NULL;
  2670. /*
  2671. * Define escape/unescape, the URI encode/decode functions, and maybe
  2672. * uneval on the global object.
  2673. */
  2674. if (!JS_DefineFunctions(cx, global, string_functions))
  2675. return NULL;
  2676. return proto;
  2677. }
  2678. JSFixedString *
  2679. js_NewString(JSContext *cx, jschar *chars, size_t length)
  2680. {
  2681. JSFixedString *s = JSFixedString::new_(cx, chars, length);
  2682. if (s)
  2683. Probes::createString(cx, s, length);
  2684. return s;
  2685. }
  2686. static JS_ALWAYS_INLINE JSFixedString *
  2687. NewShortString(JSContext *cx, const jschar *chars, size_t length)
  2688. {
  2689. /*
  2690. * Don't bother trying to find a static atom; measurement shows that not
  2691. * many get here (for one, Atomize is catching them).
  2692. */
  2693. JS_ASSERT(JSShortString::lengthFits(length));
  2694. JSInlineString *str = JSInlineString::lengthFits(length)
  2695. ? JSInlineString::new_(cx)
  2696. : JSShortString::new_(cx);
  2697. if (!str)
  2698. return NULL;
  2699. jschar *storage = str->init(length);
  2700. PodCopy(storage, chars, length);
  2701. storage[length] = 0;
  2702. Probes::createString(cx, str, length);
  2703. return str;
  2704. }
  2705. static JSInlineString *
  2706. NewShortString(JSContext *cx, const char *chars, size_t length)
  2707. {
  2708. JS_ASSERT(JSShortString::lengthFits(length));
  2709. JSInlineString *str = JSInlineString::lengthFits(length)
  2710. ? JSInlineString::new_(cx)
  2711. : JSShortString::new_(cx);
  2712. if (!str)
  2713. return NULL;
  2714. jschar *storage = str->init(length);
  2715. if (js_CStringsAreUTF8) {
  2716. #ifdef DEBUG
  2717. size_t oldLength = length;
  2718. #endif
  2719. if (!InflateUTF8StringToBuffer(cx, chars, length, storage, &length))
  2720. return NULL;
  2721. JS_ASSERT(length <= oldLength);
  2722. storage[length] = 0;
  2723. str->resetLength(length);
  2724. } else {
  2725. size_t n = length;
  2726. jschar *p = storage;
  2727. while (n--)
  2728. *p++ = (unsigned char)*chars++;
  2729. *p = 0;
  2730. }
  2731. Probes::createString(cx, str, length);
  2732. return str;
  2733. }
  2734. jschar *
  2735. StringBuffer::extractWellSized()
  2736. {
  2737. size_t capacity = cb.capacity();
  2738. size_t length = cb.length();
  2739. jschar *buf = cb.extractRawBuffer();
  2740. if (!buf)
  2741. return NULL;
  2742. /* For medium/big buffers, avoid wasting more than 1/4 of the memory. */
  2743. JS_ASSERT(capacity >= length);
  2744. if (length > CharBuffer::sMaxInlineStorage && capacity - length > length / 4) {
  2745. size_t bytes = sizeof(jschar) * (length + 1);
  2746. JSContext *cx = context();
  2747. jschar *tmp = (jschar *)cx->realloc_(buf, bytes);
  2748. if (!tmp) {
  2749. cx->free_(buf);
  2750. return NULL;
  2751. }
  2752. buf = tmp;
  2753. }
  2754. return buf;
  2755. }
  2756. JSFixedString *
  2757. StringBuffer::finishString()
  2758. {
  2759. JSContext *cx = context();
  2760. if (cb.empty())
  2761. return cx->runtime->atomState.emptyAtom;
  2762. size_t length = cb.length();
  2763. if (!checkLength(length))
  2764. return NULL;
  2765. JS_STATIC_ASSERT(JSShortString::MAX_SHORT_LENGTH < CharBuffer::InlineLength);
  2766. if (JSShortString::lengthFits(length))
  2767. return NewShortString(cx, cb.begin(), length);
  2768. if (!cb.append('\0'))
  2769. return NULL;
  2770. jschar *buf = extractWellSized();
  2771. if (!buf)
  2772. return NULL;
  2773. JSFixedString *str = js_NewString(cx, buf, length);
  2774. if (!str)
  2775. cx->free_(buf);
  2776. return str;
  2777. }
  2778. JSAtom *
  2779. StringBuffer::finishAtom()
  2780. {
  2781. JSContext *cx = context();
  2782. size_t length = cb.length();
  2783. if (length == 0)
  2784. return cx->runtime->atomState.emptyAtom;
  2785. JSAtom *atom = js_AtomizeChars(cx, cb.begin(), length);
  2786. cb.clear();
  2787. return atom;
  2788. }
  2789. JSLinearString *
  2790. js_NewDependentString(JSContext *cx, JSString *baseArg, size_t start, size_t length)
  2791. {
  2792. if (length == 0)
  2793. return cx->runtime->emptyString;
  2794. JSLinearString *base = baseArg->ensureLinear(cx);
  2795. if (!base)
  2796. return NULL;
  2797. if (start == 0 && length == base->length())
  2798. return base;
  2799. const jschar *chars = base->chars() + start;
  2800. if (JSLinearString *staticStr = cx->runtime->staticStrings.lookup(chars, length))
  2801. return staticStr;
  2802. JSLinearString *s = JSDependentString::new_(cx, base, chars, length);
  2803. Probes::createString(cx, s, length);
  2804. return s;
  2805. }
  2806. JSFixedString *
  2807. js_NewStringCopyN(JSContext *cx, const jschar *s, size_t n)
  2808. {
  2809. if (JSShortString::lengthFits(n))
  2810. return NewShortString(cx, s, n);
  2811. jschar *news = (jschar *) cx->malloc_((n + 1) * sizeof(jschar));
  2812. if (!news)
  2813. return NULL;
  2814. js_strncpy(news, s, n);
  2815. news[n] = 0;
  2816. JSFixedString *str = js_NewString(cx, news, n);
  2817. if (!str)
  2818. cx->free_(news);
  2819. return str;
  2820. }
  2821. JSFixedString *
  2822. js_NewStringCopyN(JSContext *cx, const char *s, size_t n)
  2823. {
  2824. if (JSShortString::lengthFits(n))
  2825. return NewShortString(cx, s, n);
  2826. jschar *chars = InflateString(cx, s, &n);
  2827. if (!chars)
  2828. return NULL;
  2829. JSFixedString *str = js_NewString(cx, chars, n);
  2830. if (!str)
  2831. cx->free_(chars);
  2832. return str;
  2833. }
  2834. JSFixedString *
  2835. js_NewStringCopyZ(JSContext *cx, const jschar *s)
  2836. {
  2837. size_t n = js_strlen(s);
  2838. if (JSShortString::lengthFits(n))
  2839. return NewShortString(cx, s, n);
  2840. size_t m = (n + 1) * sizeof(jschar);
  2841. jschar *news = (jschar *) cx->malloc_(m);
  2842. if (!news)
  2843. return NULL;
  2844. js_memcpy(news, s, m);
  2845. JSFixedString *str = js_NewString(cx, news, n);
  2846. if (!str)
  2847. cx->free_(news);
  2848. return str;
  2849. }
  2850. JSFixedString *
  2851. js_NewStringCopyZ(JSContext *cx, const char *s)
  2852. {
  2853. return js_NewStringCopyN(cx, s, strlen(s));
  2854. }
  2855. const char *
  2856. js_ValueToPrintable(JSContext *cx, const Value &v, JSAutoByteString *bytes, bool asSource)
  2857. {
  2858. JSString *str;
  2859. str = (asSource ? js_ValueToSource : ToString)(cx, v);
  2860. if (!str)
  2861. return NULL;
  2862. str = js_QuoteString(cx, str, 0);
  2863. if (!str)
  2864. return NULL;
  2865. return bytes->encode(cx, str);
  2866. }
  2867. JSString *
  2868. js::ToStringSlow(JSContext *cx, const Value &arg)
  2869. {
  2870. /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
  2871. JS_ASSERT(!arg.isString());
  2872. Value v = arg;
  2873. if (!ToPrimitive(cx, JSTYPE_STRING, &v))
  2874. return NULL;
  2875. JSString *str;
  2876. if (v.isString()) {
  2877. str = v.toString();
  2878. } else if (v.isInt32()) {
  2879. str = js_IntToString(cx, v.toInt32());
  2880. } else if (v.isDouble()) {
  2881. str = js_NumberToString(cx, v.toDouble());
  2882. } else if (v.isBoolean()) {
  2883. str = js_BooleanToString(cx, v.toBoolean());
  2884. } else if (v.isNull()) {
  2885. str = cx->runtime->atomState.nullAtom;
  2886. } else {
  2887. str = cx->runtime->atomState.typeAtoms[JSTYPE_VOID];
  2888. }
  2889. return str;
  2890. }
  2891. /* This function implements E-262-3 section 9.8, toString. */
  2892. bool
  2893. js::ValueToStringBufferSlow(JSContext *cx, const Value &arg, StringBuffer &sb)
  2894. {
  2895. Value v = arg;
  2896. if (!ToPrimitive(cx, JSTYPE_STRING, &v))
  2897. return false;
  2898. if (v.isString())
  2899. return sb.append(v.toString());
  2900. if (v.isNumber())
  2901. return NumberValueToStringBuffer(cx, v, sb);
  2902. if (v.isBoolean())
  2903. return BooleanToStringBuffer(cx, v.toBoolean(), sb);
  2904. if (v.isNull())
  2905. return sb.append(cx->runtime->atomState.nullAtom);
  2906. JS_ASSERT(v.isUndefined());
  2907. return sb.append(cx->runtime->atomState.typeAtoms[JSTYPE_VOID]);
  2908. }
  2909. JS_FRIEND_API(JSString *)
  2910. js_ValueToSource(JSContext *cx, const Value &v)
  2911. {
  2912. JS_CHECK_RECURSION(cx, return NULL);
  2913. if (v.isUndefined())
  2914. return cx->runtime->atomState.void0Atom;
  2915. if (v.isString())
  2916. return js_QuoteString(cx, v.toString(), '"');
  2917. if (v.isPrimitive()) {
  2918. /* Special case to preserve negative zero, _contra_ toString. */
  2919. if (v.isDouble() && JSDOUBLE_IS_NEGZERO(v.toDouble())) {
  2920. /* NB: _ucNstr rather than _ucstr to indicate non-terminated. */
  2921. static const jschar js_negzero_ucNstr[] = {'-', '0'};
  2922. return js_NewStringCopyN(cx, js_negzero_ucNstr, 2);
  2923. }
  2924. return ToString(cx, v);
  2925. }
  2926. Value rval = NullValue();
  2927. Value fval;
  2928. jsid id = ATOM_TO_JSID(cx->runtime->atomState.toSourceAtom);
  2929. if (!js_GetMethod(cx, &v.toObject(), id, JSGET_NO_METHOD_BARRIER, &fval))
  2930. return NULL;
  2931. if (js_IsCallable(fval)) {
  2932. if (!Invoke(cx, v, fval, 0, NULL, &rval))
  2933. return NULL;
  2934. }
  2935. return ToString(cx, rval);
  2936. }
  2937. namespace js {
  2938. bool
  2939. EqualStrings(JSContext *cx, JSString *str1, JSString *str2, bool *result)
  2940. {
  2941. if (str1 == str2) {
  2942. *result = true;
  2943. return true;
  2944. }
  2945. size_t length1 = str1->length();
  2946. if (length1 != str2->length()) {
  2947. *result = false;
  2948. return true;
  2949. }
  2950. JSLinearString *linear1 = str1->ensureLinear(cx);
  2951. if (!linear1)
  2952. return false;
  2953. JSLinearString *linear2 = str2->ensureLinear(cx);
  2954. if (!linear2)
  2955. return false;
  2956. *result = PodEqual(linear1->chars(), linear2->chars(), length1);
  2957. return true;
  2958. }
  2959. bool
  2960. EqualStrings(JSLinearString *str1, JSLinearString *str2)
  2961. {
  2962. if (str1 == str2)
  2963. return true;
  2964. size_t length1 = str1->length();
  2965. if (length1 != str2->length())
  2966. return false;
  2967. return PodEqual(str1->chars(), str2->chars(), length1);
  2968. }
  2969. } /* namespace js */
  2970. namespace js {
  2971. static bool
  2972. CompareStringsImpl(JSContext *cx, JSString *str1, JSString *str2, int32_t *result)
  2973. {
  2974. JS_ASSERT(str1);
  2975. JS_ASSERT(str2);
  2976. if (str1 == str2) {
  2977. *result = 0;
  2978. return true;
  2979. }
  2980. const jschar *s1 = str1->getChars(cx);
  2981. if (!s1)
  2982. return false;
  2983. const jschar *s2 = str2->getChars(cx);
  2984. if (!s2)
  2985. return false;
  2986. return CompareChars(s1, str1->length(), s2, str2->length(), result);
  2987. }
  2988. bool
  2989. CompareStrings(JSContext *cx, JSString *str1, JSString *str2, int32_t *result)
  2990. {
  2991. return CompareStringsImpl(cx, str1, str2, result);
  2992. }
  2993. } /* namespace js */
  2994. namespace js {
  2995. bool
  2996. StringEqualsAscii(JSLinearString *str, const char *asciiBytes)
  2997. {
  2998. size_t length = strlen(asciiBytes);
  2999. #ifdef DEBUG
  3000. for (size_t i = 0; i != length; ++i)
  3001. JS_ASSERT(unsigned(asciiBytes[i]) <= 127);
  3002. #endif
  3003. if (length != str->length())
  3004. return false;
  3005. const jschar *chars = str->chars();
  3006. for (size_t i = 0; i != length; ++i) {
  3007. if (unsigned(asciiBytes[i]) != unsigned(chars[i]))
  3008. return false;
  3009. }
  3010. return true;
  3011. }
  3012. } /* namespacejs */
  3013. size_t
  3014. js_strlen(const jschar *s)
  3015. {
  3016. const jschar *t;
  3017. for (t = s; *t != 0; t++)
  3018. continue;
  3019. return (size_t)(t - s);
  3020. }
  3021. jschar *
  3022. js_strchr(const jschar *s, jschar c)
  3023. {
  3024. while (*s != 0) {
  3025. if (*s == c)
  3026. return (jschar *)s;
  3027. s++;
  3028. }
  3029. return NULL;
  3030. }
  3031. jschar *
  3032. js_strchr_limit(const jschar *s, jschar c, const jschar *limit)
  3033. {
  3034. while (s < limit) {
  3035. if (*s == c)
  3036. return (jschar *)s;
  3037. s++;
  3038. }
  3039. return NULL;
  3040. }
  3041. namespace js {
  3042. jschar *
  3043. InflateString(JSContext *cx, const char *bytes, size_t *lengthp, FlationCoding fc)
  3044. {
  3045. size_t nchars;
  3046. jschar *chars;
  3047. size_t nbytes = *lengthp;
  3048. if (js_CStringsAreUTF8 || fc == CESU8Encoding) {
  3049. if (!InflateUTF8StringToBuffer(cx, bytes, nbytes, NULL, &nchars, fc))
  3050. goto bad;
  3051. chars = (jschar *) cx->malloc_((nchars + 1) * sizeof (jschar));
  3052. if (!chars)
  3053. goto bad;
  3054. JS_ALWAYS_TRUE(InflateUTF8StringToBuffer(cx, bytes, nbytes, chars, &nchars, fc));
  3055. } else {
  3056. nchars = nbytes;
  3057. chars = (jschar *) cx->malloc_((nchars + 1) * sizeof(jschar));
  3058. if (!chars)
  3059. goto bad;
  3060. for (size_t i = 0; i < nchars; i++)
  3061. chars[i] = (unsigned char) bytes[i];
  3062. }
  3063. *lengthp = nchars;
  3064. chars[nchars] = 0;
  3065. return chars;
  3066. bad:
  3067. /*
  3068. * For compatibility with callers of JS_DecodeBytes we must zero lengthp
  3069. * on errors.
  3070. */
  3071. *lengthp = 0;
  3072. return NULL;
  3073. }
  3074. /*
  3075. * May be called with null cx.
  3076. */
  3077. char *
  3078. DeflateString(JSContext *cx, const jschar *chars, size_t nchars)
  3079. {
  3080. size_t nbytes, i;
  3081. char *bytes;
  3082. if (js_CStringsAreUTF8) {
  3083. nbytes = GetDeflatedStringLength(cx, chars, nchars);
  3084. if (nbytes == (size_t) -1)
  3085. return NULL;
  3086. bytes = (char *) (cx ? cx->malloc_(nbytes + 1) : OffTheBooks::malloc_(nbytes + 1));
  3087. if (!bytes)
  3088. return NULL;
  3089. JS_ALWAYS_TRUE(DeflateStringToBuffer(cx, chars, nchars, bytes, &nbytes));
  3090. } else {
  3091. nbytes = nchars;
  3092. bytes = (char *) (cx ? cx->malloc_(nbytes + 1) : OffTheBooks::malloc_(nbytes + 1));
  3093. if (!bytes)
  3094. return NULL;
  3095. for (i = 0; i < nbytes; i++)
  3096. bytes[i] = (char) chars[i];
  3097. }
  3098. bytes[nbytes] = 0;
  3099. return bytes;
  3100. }
  3101. size_t
  3102. GetDeflatedStringLength(JSContext *cx, const jschar *chars, size_t nchars)
  3103. {
  3104. if (!js_CStringsAreUTF8)
  3105. return nchars;
  3106. return GetDeflatedUTF8StringLength(cx, chars, nchars);
  3107. }
  3108. /*
  3109. * May be called with null cx through public API, see below.
  3110. */
  3111. size_t
  3112. GetDeflatedUTF8StringLength(JSContext *cx, const jschar *chars,
  3113. size_t nchars, FlationCoding fc)
  3114. {
  3115. size_t nbytes;
  3116. const jschar *end;
  3117. uintN c, c2;
  3118. char buffer[10];
  3119. bool useCESU8 = fc == CESU8Encoding;
  3120. nbytes = nchars;
  3121. for (end = chars + nchars; chars != end; chars++) {
  3122. c = *chars;
  3123. if (c < 0x80)
  3124. continue;
  3125. if (0xD800 <= c && c <= 0xDFFF && !useCESU8) {
  3126. /* Surrogate pair. */
  3127. chars++;
  3128. /* nbytes sets 1 length since this is surrogate pair. */
  3129. nbytes--;
  3130. if (c >= 0xDC00 || chars == end)
  3131. goto bad_surrogate;
  3132. c2 = *chars;
  3133. if (c2 < 0xDC00 || c2 > 0xDFFF)
  3134. goto bad_surrogate;
  3135. c = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
  3136. }
  3137. c >>= 11;
  3138. nbytes++;
  3139. while (c) {
  3140. c >>= 5;
  3141. nbytes++;
  3142. }
  3143. }
  3144. return nbytes;
  3145. bad_surrogate:
  3146. if (cx) {
  3147. JS_snprintf(buffer, 10, "0x%x", c);
  3148. JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, js_GetErrorMessage,
  3149. NULL, JSMSG_BAD_SURROGATE_CHAR, buffer);
  3150. }
  3151. return (size_t) -1;
  3152. }
  3153. bool
  3154. DeflateStringToBuffer(JSContext *cx, const jschar *src, size_t srclen,
  3155. char *dst, size_t *dstlenp)
  3156. {
  3157. size_t dstlen, i;
  3158. dstlen = *dstlenp;
  3159. if (!js_CStringsAreUTF8) {
  3160. if (srclen > dstlen) {
  3161. for (i = 0; i < dstlen; i++)
  3162. dst[i] = (char) src[i];
  3163. if (cx) {
  3164. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3165. JSMSG_BUFFER_TOO_SMALL);
  3166. }
  3167. return JS_FALSE;
  3168. }
  3169. for (i = 0; i < srclen; i++)
  3170. dst[i] = (char) src[i];
  3171. *dstlenp = srclen;
  3172. return JS_TRUE;
  3173. }
  3174. return DeflateStringToUTF8Buffer(cx, src, srclen, dst, dstlenp);
  3175. }
  3176. bool
  3177. DeflateStringToUTF8Buffer(JSContext *cx, const jschar *src, size_t srclen,
  3178. char *dst, size_t *dstlenp, FlationCoding fc)
  3179. {
  3180. size_t i, utf8Len;
  3181. jschar c, c2;
  3182. uint32_t v;
  3183. uint8_t utf8buf[6];
  3184. bool useCESU8 = fc == CESU8Encoding;
  3185. size_t dstlen = *dstlenp;
  3186. size_t origDstlen = dstlen;
  3187. while (srclen) {
  3188. c = *src++;
  3189. srclen--;
  3190. if ((c >= 0xDC00) && (c <= 0xDFFF) && !useCESU8)
  3191. goto badSurrogate;
  3192. if (c < 0xD800 || c > 0xDBFF || useCESU8) {
  3193. v = c;
  3194. } else {
  3195. if (srclen < 1)
  3196. goto badSurrogate;
  3197. c2 = *src;
  3198. if ((c2 < 0xDC00) || (c2 > 0xDFFF))
  3199. goto badSurrogate;
  3200. src++;
  3201. srclen--;
  3202. v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
  3203. }
  3204. if (v < 0x0080) {
  3205. /* no encoding necessary - performance hack */
  3206. if (dstlen == 0)
  3207. goto bufferTooSmall;
  3208. *dst++ = (char) v;
  3209. utf8Len = 1;
  3210. } else {
  3211. utf8Len = js_OneUcs4ToUtf8Char(utf8buf, v);
  3212. if (utf8Len > dstlen)
  3213. goto bufferTooSmall;
  3214. for (i = 0; i < utf8Len; i++)
  3215. *dst++ = (char) utf8buf[i];
  3216. }
  3217. dstlen -= utf8Len;
  3218. }
  3219. *dstlenp = (origDstlen - dstlen);
  3220. return JS_TRUE;
  3221. badSurrogate:
  3222. *dstlenp = (origDstlen - dstlen);
  3223. /* Delegate error reporting to the measurement function. */
  3224. if (cx)
  3225. GetDeflatedStringLength(cx, src - 1, srclen + 1);
  3226. return JS_FALSE;
  3227. bufferTooSmall:
  3228. *dstlenp = (origDstlen - dstlen);
  3229. if (cx) {
  3230. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3231. JSMSG_BUFFER_TOO_SMALL);
  3232. }
  3233. return JS_FALSE;
  3234. }
  3235. bool
  3236. InflateStringToBuffer(JSContext *cx, const char *src, size_t srclen,
  3237. jschar *dst, size_t *dstlenp)
  3238. {
  3239. size_t dstlen, i;
  3240. if (js_CStringsAreUTF8)
  3241. return InflateUTF8StringToBuffer(cx, src, srclen, dst, dstlenp);
  3242. if (dst) {
  3243. dstlen = *dstlenp;
  3244. if (srclen > dstlen) {
  3245. for (i = 0; i < dstlen; i++)
  3246. dst[i] = (unsigned char) src[i];
  3247. if (cx) {
  3248. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3249. JSMSG_BUFFER_TOO_SMALL);
  3250. }
  3251. return JS_FALSE;
  3252. }
  3253. for (i = 0; i < srclen; i++)
  3254. dst[i] = (unsigned char) src[i];
  3255. }
  3256. *dstlenp = srclen;
  3257. return JS_TRUE;
  3258. }
  3259. bool
  3260. InflateUTF8StringToBuffer(JSContext *cx, const char *src, size_t srclen,
  3261. jschar *dst, size_t *dstlenp, FlationCoding fc)
  3262. {
  3263. size_t dstlen, origDstlen, offset, j, n;
  3264. uint32_t v;
  3265. dstlen = dst ? *dstlenp : (size_t) -1;
  3266. origDstlen = dstlen;
  3267. offset = 0;
  3268. bool useCESU8 = fc == CESU8Encoding;
  3269. while (srclen) {
  3270. v = (uint8_t) *src;
  3271. n = 1;
  3272. if (v & 0x80) {
  3273. while (v & (0x80 >> n))
  3274. n++;
  3275. if (n > srclen)
  3276. goto bufferTooSmall;
  3277. if (n == 1 || n > 4)
  3278. goto badCharacter;
  3279. for (j = 1; j < n; j++) {
  3280. if ((src[j] & 0xC0) != 0x80)
  3281. goto badCharacter;
  3282. }
  3283. v = Utf8ToOneUcs4Char((uint8_t *)src, n);
  3284. if (v >= 0x10000 && !useCESU8) {
  3285. v -= 0x10000;
  3286. if (v > 0xFFFFF || dstlen < 2) {
  3287. *dstlenp = (origDstlen - dstlen);
  3288. if (cx) {
  3289. char buffer[10];
  3290. JS_snprintf(buffer, 10, "0x%x", v + 0x10000);
  3291. JS_ReportErrorFlagsAndNumber(cx,
  3292. JSREPORT_ERROR,
  3293. js_GetErrorMessage, NULL,
  3294. JSMSG_UTF8_CHAR_TOO_LARGE,
  3295. buffer);
  3296. }
  3297. return JS_FALSE;
  3298. }
  3299. if (dst) {
  3300. *dst++ = (jschar)((v >> 10) + 0xD800);
  3301. v = (jschar)((v & 0x3FF) + 0xDC00);
  3302. }
  3303. dstlen--;
  3304. }
  3305. }
  3306. if (!dstlen)
  3307. goto bufferTooSmall;
  3308. if (dst)
  3309. *dst++ = (jschar) v;
  3310. dstlen--;
  3311. offset += n;
  3312. src += n;
  3313. srclen -= n;
  3314. }
  3315. *dstlenp = (origDstlen - dstlen);
  3316. return JS_TRUE;
  3317. badCharacter:
  3318. *dstlenp = (origDstlen - dstlen);
  3319. if (cx) {
  3320. char buffer[10];
  3321. JS_snprintf(buffer, 10, "%d", offset);
  3322. JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR,
  3323. js_GetErrorMessage, NULL,
  3324. JSMSG_MALFORMED_UTF8_CHAR,
  3325. buffer);
  3326. }
  3327. return JS_FALSE;
  3328. bufferTooSmall:
  3329. *dstlenp = (origDstlen - dstlen);
  3330. if (cx) {
  3331. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3332. JSMSG_BUFFER_TOO_SMALL);
  3333. }
  3334. return JS_FALSE;
  3335. }
  3336. } /* namepsace js */
  3337. const jschar js_uriReservedPlusPound_ucstr[] =
  3338. {';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '#', 0};
  3339. const jschar js_uriUnescaped_ucstr[] =
  3340. {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  3341. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
  3342. 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
  3343. 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
  3344. 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
  3345. '-', '_', '.', '!', '~', '*', '\'', '(', ')', 0};
  3346. #define ____ false
  3347. /*
  3348. * Identifier start chars:
  3349. * - 36: $
  3350. * - 65..90: A..Z
  3351. * - 95: _
  3352. * - 97..122: a..z
  3353. */
  3354. const bool js_isidstart[] = {
  3355. /* 0 1 2 3 4 5 6 7 8 9 */
  3356. /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3357. /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3358. /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3359. /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
  3360. /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3361. /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3362. /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
  3363. /* 7 */ true, true, true, true, true, true, true, true, true, true,
  3364. /* 8 */ true, true, true, true, true, true, true, true, true, true,
  3365. /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
  3366. /* 10 */ true, true, true, true, true, true, true, true, true, true,
  3367. /* 11 */ true, true, true, true, true, true, true, true, true, true,
  3368. /* 12 */ true, true, true, ____, ____, ____, ____, ____
  3369. };
  3370. /*
  3371. * Identifier chars:
  3372. * - 36: $
  3373. * - 48..57: 0..9
  3374. * - 65..90: A..Z
  3375. * - 95: _
  3376. * - 97..122: a..z
  3377. */
  3378. const bool js_isident[] = {
  3379. /* 0 1 2 3 4 5 6 7 8 9 */
  3380. /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3381. /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3382. /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3383. /* 3 */ ____, ____, ____, ____, ____, ____, true, ____, ____, ____,
  3384. /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
  3385. /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
  3386. /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
  3387. /* 7 */ true, true, true, true, true, true, true, true, true, true,
  3388. /* 8 */ true, true, true, true, true, true, true, true, true, true,
  3389. /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
  3390. /* 10 */ true, true, true, true, true, true, true, true, true, true,
  3391. /* 11 */ true, true, true, true, true, true, true, true, true, true,
  3392. /* 12 */ true, true, true, ____, ____, ____, ____, ____
  3393. };
  3394. /* Whitespace chars: '\t', '\n', '\v', '\f', '\r', ' '. */
  3395. const bool js_isspace[] = {
  3396. /* 0 1 2 3 4 5 6 7 8 9 */
  3397. /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, true,
  3398. /* 1 */ true, true, true, true, ____, ____, ____, ____, ____, ____,
  3399. /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3400. /* 3 */ ____, ____, true, ____, ____, ____, ____, ____, ____, ____,
  3401. /* 4 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3402. /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3403. /* 6 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3404. /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3405. /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3406. /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3407. /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3408. /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
  3409. /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
  3410. };
  3411. #undef ____
  3412. #define URI_CHUNK 64U
  3413. static inline bool
  3414. TransferBufferToString(JSContext *cx, StringBuffer &sb, Value *rval)
  3415. {
  3416. JSString *str = sb.finishString();
  3417. if (!str)
  3418. return false;
  3419. rval->setString(str);
  3420. return true;
  3421. }
  3422. /*
  3423. * ECMA 3, 15.1.3 URI Handling Function Properties
  3424. *
  3425. * The following are implementations of the algorithms
  3426. * given in the ECMA specification for the hidden functions
  3427. * 'Encode' and 'Decode'.
  3428. */
  3429. static JSBool
  3430. Encode(JSContext *cx, JSString *str, const jschar *unescapedSet,
  3431. const jschar *unescapedSet2, Value *rval)
  3432. {
  3433. static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
  3434. size_t length = str->length();
  3435. const jschar *chars = str->getChars(cx);
  3436. if (!chars)
  3437. return JS_FALSE;
  3438. if (length == 0) {
  3439. rval->setString(cx->runtime->emptyString);
  3440. return JS_TRUE;
  3441. }
  3442. StringBuffer sb(cx);
  3443. jschar hexBuf[4];
  3444. hexBuf[0] = '%';
  3445. hexBuf[3] = 0;
  3446. for (size_t k = 0; k < length; k++) {
  3447. jschar c = chars[k];
  3448. if (js_strchr(unescapedSet, c) ||
  3449. (unescapedSet2 && js_strchr(unescapedSet2, c))) {
  3450. if (!sb.append(c))
  3451. return JS_FALSE;
  3452. } else {
  3453. if ((c >= 0xDC00) && (c <= 0xDFFF)) {
  3454. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3455. JSMSG_BAD_URI, NULL);
  3456. return JS_FALSE;
  3457. }
  3458. uint32_t v;
  3459. if (c < 0xD800 || c > 0xDBFF) {
  3460. v = c;
  3461. } else {
  3462. k++;
  3463. if (k == length) {
  3464. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3465. JSMSG_BAD_URI, NULL);
  3466. return JS_FALSE;
  3467. }
  3468. jschar c2 = chars[k];
  3469. if ((c2 < 0xDC00) || (c2 > 0xDFFF)) {
  3470. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
  3471. JSMSG_BAD_URI, NULL);
  3472. return JS_FALSE;
  3473. }
  3474. v = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
  3475. }
  3476. uint8_t utf8buf[4];
  3477. size_t L = js_OneUcs4ToUtf8Char(utf8buf, v);
  3478. for (size_t j = 0; j < L; j++) {
  3479. hexBuf[1] = HexDigits[utf8buf[j] >> 4];
  3480. hexBuf[2] = HexDigits[utf8buf[j] & 0xf];
  3481. if (!sb.append(hexBuf, 3))
  3482. return JS_FALSE;
  3483. }
  3484. }
  3485. }
  3486. return TransferBufferToString(cx, sb, rval);
  3487. }
  3488. static JSBool
  3489. Decode(JSContext *cx, JSString *str, const jschar *reservedSet, Value *rval)
  3490. {
  3491. size_t length = str->length();
  3492. const jschar *chars = str->getChars(cx);
  3493. if (!chars)
  3494. return JS_FALSE;
  3495. if (length == 0) {
  3496. rval->setString(cx->runtime->emptyString);
  3497. return JS_TRUE;
  3498. }
  3499. StringBuffer sb(cx);
  3500. for (size_t k = 0; k < length; k++) {
  3501. jschar c = chars[k];
  3502. if (c == '%') {
  3503. size_t start = k;
  3504. if ((k + 2) >= length)
  3505. goto report_bad_uri;
  3506. if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
  3507. goto report_bad_uri;
  3508. jsuint B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
  3509. k += 2;
  3510. if (!(B & 0x80)) {
  3511. c = (jschar)B;
  3512. } else {
  3513. intN n = 1;
  3514. while (B & (0x80 >> n))
  3515. n++;
  3516. if (n == 1 || n > 4)
  3517. goto report_bad_uri;
  3518. uint8_t octets[4];
  3519. octets[0] = (uint8_t)B;
  3520. if (k + 3 * (n - 1) >= length)
  3521. goto report_bad_uri;
  3522. for (intN j = 1; j < n; j++) {
  3523. k++;
  3524. if (chars[k] != '%')
  3525. goto report_bad_uri;
  3526. if (!JS7_ISHEX(chars[k+1]) || !JS7_ISHEX(chars[k+2]))
  3527. goto report_bad_uri;
  3528. B = JS7_UNHEX(chars[k+1]) * 16 + JS7_UNHEX(chars[k+2]);
  3529. if ((B & 0xC0) != 0x80)
  3530. goto report_bad_uri;
  3531. k += 2;
  3532. octets[j] = (char)B;
  3533. }
  3534. uint32_t v = Utf8ToOneUcs4Char(octets, n);
  3535. if (v >= 0x10000) {
  3536. v -= 0x10000;
  3537. if (v > 0xFFFFF)
  3538. goto report_bad_uri;
  3539. c = (jschar)((v & 0x3FF) + 0xDC00);
  3540. jschar H = (jschar)((v >> 10) + 0xD800);
  3541. if (!sb.append(H))
  3542. return JS_FALSE;
  3543. } else {
  3544. c = (jschar)v;
  3545. }
  3546. }
  3547. if (js_strchr(reservedSet, c)) {
  3548. if (!sb.append(chars + start, k - start + 1))
  3549. return JS_FALSE;
  3550. } else {
  3551. if (!sb.append(c))
  3552. return JS_FALSE;
  3553. }
  3554. } else {
  3555. if (!sb.append(c))
  3556. return JS_FALSE;
  3557. }
  3558. }
  3559. return TransferBufferToString(cx, sb, rval);
  3560. report_bad_uri:
  3561. JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, JSMSG_BAD_URI);
  3562. /* FALL THROUGH */
  3563. return JS_FALSE;
  3564. }
  3565. static JSBool
  3566. str_decodeURI(JSContext *cx, uintN argc, Value *vp)
  3567. {
  3568. CallArgs args = CallArgsFromVp(argc, vp);
  3569. JSLinearString *str = ArgToRootedString(cx, args, 0);
  3570. if (!str)
  3571. return false;
  3572. Value result;
  3573. if (!Decode(cx, str, js_uriReservedPlusPound_ucstr, &result))
  3574. return false;
  3575. args.rval() = result;
  3576. return true;
  3577. }
  3578. static JSBool
  3579. str_decodeURI_Component(JSContext *cx, uintN argc, Value *vp)
  3580. {
  3581. CallArgs args = CallArgsFromVp(argc, vp);
  3582. JSLinearString *str = ArgToRootedString(cx, args, 0);
  3583. if (!str)
  3584. return false;
  3585. Value result;
  3586. if (!Decode(cx, str, js_empty_ucstr, &result))
  3587. return false;
  3588. args.rval() = result;
  3589. return true;
  3590. }
  3591. static JSBool
  3592. str_encodeURI(JSContext *cx, uintN argc, Value *vp)
  3593. {
  3594. CallArgs args = CallArgsFromVp(argc, vp);
  3595. JSLinearString *str = ArgToRootedString(cx, args, 0);
  3596. if (!str)
  3597. return false;
  3598. Value result;
  3599. if (!Encode(cx, str, js_uriReservedPlusPound_ucstr, js_uriUnescaped_ucstr, &result))
  3600. return false;
  3601. args.rval() = result;
  3602. return true;
  3603. }
  3604. static JSBool
  3605. str_encodeURI_Component(JSContext *cx, uintN argc, Value *vp)
  3606. {
  3607. CallArgs args = CallArgsFromVp(argc, vp);
  3608. JSLinearString *str = ArgToRootedString(cx, args, 0);
  3609. if (!str)
  3610. return false;
  3611. Value result;
  3612. if (!Encode(cx, str, js_uriUnescaped_ucstr, NULL, &result))
  3613. return false;
  3614. args.rval() = result;
  3615. return true;
  3616. }
  3617. /*
  3618. * Convert one UCS-4 char and write it into a UTF-8 buffer, which must be at
  3619. * least 4 bytes long. Return the number of UTF-8 bytes of data written.
  3620. */
  3621. int
  3622. js_OneUcs4ToUtf8Char(uint8_t *utf8Buffer, uint32_t ucs4Char)
  3623. {
  3624. int utf8Length = 1;
  3625. JS_ASSERT(ucs4Char <= 0x10FFFF);
  3626. if (ucs4Char < 0x80) {
  3627. *utf8Buffer = (uint8_t)ucs4Char;
  3628. } else {
  3629. int i;
  3630. uint32_t a = ucs4Char >> 11;
  3631. utf8Length = 2;
  3632. while (a) {
  3633. a >>= 5;
  3634. utf8Length++;
  3635. }
  3636. i = utf8Length;
  3637. while (--i) {
  3638. utf8Buffer[i] = (uint8_t)((ucs4Char & 0x3F) | 0x80);
  3639. ucs4Char >>= 6;
  3640. }
  3641. *utf8Buffer = (uint8_t)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
  3642. }
  3643. return utf8Length;
  3644. }
  3645. /*
  3646. * Convert a utf8 character sequence into a UCS-4 character and return that
  3647. * character. It is assumed that the caller already checked that the sequence
  3648. * is valid.
  3649. */
  3650. static uint32_t
  3651. Utf8ToOneUcs4Char(const uint8_t *utf8Buffer, int utf8Length)
  3652. {
  3653. JS_ASSERT(1 <= utf8Length && utf8Length <= 4);
  3654. if (utf8Length == 1) {
  3655. JS_ASSERT(!(*utf8Buffer & 0x80));
  3656. return *utf8Buffer;
  3657. }
  3658. /* from Unicode 3.1, non-shortest form is illegal */
  3659. static const uint32_t minucs4Table[] = { 0x80, 0x800, 0x10000 };
  3660. JS_ASSERT((*utf8Buffer & (0x100 - (1 << (7 - utf8Length)))) ==
  3661. (0x100 - (1 << (8 - utf8Length))));
  3662. uint32_t ucs4Char = *utf8Buffer++ & ((1 << (7 - utf8Length)) - 1);
  3663. uint32_t minucs4Char = minucs4Table[utf8Length - 2];
  3664. while (--utf8Length) {
  3665. JS_ASSERT((*utf8Buffer & 0xC0) == 0x80);
  3666. ucs4Char = (ucs4Char << 6) | (*utf8Buffer++ & 0x3F);
  3667. }
  3668. if (JS_UNLIKELY(ucs4Char < minucs4Char || (ucs4Char >= 0xD800 && ucs4Char <= 0xDFFF)))
  3669. return INVALID_UTF8;
  3670. return ucs4Char;
  3671. }
  3672. namespace js {
  3673. size_t
  3674. PutEscapedStringImpl(char *buffer, size_t bufferSize, FILE *fp, JSLinearString *str, uint32_t quote)
  3675. {
  3676. enum {
  3677. STOP, FIRST_QUOTE, LAST_QUOTE, CHARS, ESCAPE_START, ESCAPE_MORE
  3678. } state;
  3679. JS_ASSERT(quote == 0 || quote == '\'' || quote == '"');
  3680. JS_ASSERT_IF(!buffer, bufferSize == 0);
  3681. JS_ASSERT_IF(fp, !buffer);
  3682. if (bufferSize == 0)
  3683. buffer = NULL;
  3684. else
  3685. bufferSize--;
  3686. const jschar *chars = str->chars();
  3687. const jschar *charsEnd = chars + str->length();
  3688. size_t n = 0;
  3689. state = FIRST_QUOTE;
  3690. uintN shift = 0;
  3691. uintN hex = 0;
  3692. uintN u = 0;
  3693. char c = 0; /* to quell GCC warnings */
  3694. for (;;) {
  3695. switch (state) {
  3696. case STOP:
  3697. goto stop;
  3698. case FIRST_QUOTE:
  3699. state = CHARS;
  3700. goto do_quote;
  3701. case LAST_QUOTE:
  3702. state = STOP;
  3703. do_quote:
  3704. if (quote == 0)
  3705. continue;
  3706. c = (char)quote;
  3707. break;
  3708. case CHARS:
  3709. if (chars == charsEnd) {
  3710. state = LAST_QUOTE;
  3711. continue;
  3712. }
  3713. u = *chars++;
  3714. if (u < ' ') {
  3715. if (u != 0) {
  3716. const char *escape = strchr(js_EscapeMap, (int)u);
  3717. if (escape) {
  3718. u = escape[1];
  3719. goto do_escape;
  3720. }
  3721. }
  3722. goto do_hex_escape;
  3723. }
  3724. if (u < 127) {
  3725. if (u == quote || u == '\\')
  3726. goto do_escape;
  3727. c = (char)u;
  3728. } else if (u < 0x100) {
  3729. goto do_hex_escape;
  3730. } else {
  3731. shift = 16;
  3732. hex = u;
  3733. u = 'u';
  3734. goto do_escape;
  3735. }
  3736. break;
  3737. do_hex_escape:
  3738. shift = 8;
  3739. hex = u;
  3740. u = 'x';
  3741. do_escape:
  3742. c = '\\';
  3743. state = ESCAPE_START;
  3744. break;
  3745. case ESCAPE_START:
  3746. JS_ASSERT(' ' <= u && u < 127);
  3747. c = (char)u;
  3748. state = ESCAPE_MORE;
  3749. break;
  3750. case ESCAPE_MORE:
  3751. if (shift == 0) {
  3752. state = CHARS;
  3753. continue;
  3754. }
  3755. shift -= 4;
  3756. u = 0xF & (hex >> shift);
  3757. c = (char)(u + (u < 10 ? '0' : 'A' - 10));
  3758. break;
  3759. }
  3760. if (buffer) {
  3761. JS_ASSERT(n <= bufferSize);
  3762. if (n != bufferSize) {
  3763. buffer[n] = c;
  3764. } else {
  3765. buffer[n] = '\0';
  3766. buffer = NULL;
  3767. }
  3768. } else if (fp) {
  3769. if (fputc(c, fp) < 0)
  3770. return size_t(-1);
  3771. }
  3772. n++;
  3773. }
  3774. stop:
  3775. if (buffer)
  3776. buffer[n] = '\0';
  3777. return n;
  3778. }
  3779. } /* namespace js */