PageRenderTime 28ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/packages/closure-library/src/main/resources/com/github/urmuzov/closuremaven/closurelibrarypackage/javascript/goog/string/string.js

https://github.com/urmuzov/closure-maven
JavaScript | 1254 lines | 587 code | 140 blank | 527 comment | 91 complexity | 240a4defbd3fd2ea70cc00f6ae5310cb MD5 | raw file
  1. // Copyright 2006 The Closure Library Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS-IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. /**
  15. * @fileoverview Utilities for string manipulation.
  16. */
  17. /**
  18. * Namespace for string utilities
  19. */
  20. goog.provide('goog.string');
  21. goog.provide('goog.string.Unicode');
  22. /**
  23. * Common Unicode string characters.
  24. * @enum {string}
  25. */
  26. goog.string.Unicode = {
  27. NBSP: '\xa0'
  28. };
  29. /**
  30. * Fast prefix-checker.
  31. * @param {string} str The string to check.
  32. * @param {string} prefix A string to look for at the start of {@code str}.
  33. * @return {boolean} True if {@code str} begins with {@code prefix}.
  34. */
  35. goog.string.startsWith = function(str, prefix) {
  36. return str.lastIndexOf(prefix, 0) == 0;
  37. };
  38. /**
  39. * Fast suffix-checker.
  40. * @param {string} str The string to check.
  41. * @param {string} suffix A string to look for at the end of {@code str}.
  42. * @return {boolean} True if {@code str} ends with {@code suffix}.
  43. */
  44. goog.string.endsWith = function(str, suffix) {
  45. var l = str.length - suffix.length;
  46. return l >= 0 && str.indexOf(suffix, l) == l;
  47. };
  48. /**
  49. * Case-insensitive prefix-checker.
  50. * @param {string} str The string to check.
  51. * @param {string} prefix A string to look for at the end of {@code str}.
  52. * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring
  53. * case).
  54. */
  55. goog.string.caseInsensitiveStartsWith = function(str, prefix) {
  56. return goog.string.caseInsensitiveCompare(
  57. prefix, str.substr(0, prefix.length)) == 0;
  58. };
  59. /**
  60. * Case-insensitive suffix-checker.
  61. * @param {string} str The string to check.
  62. * @param {string} suffix A string to look for at the end of {@code str}.
  63. * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring
  64. * case).
  65. */
  66. goog.string.caseInsensitiveEndsWith = function(str, suffix) {
  67. return goog.string.caseInsensitiveCompare(
  68. suffix, str.substr(str.length - suffix.length, suffix.length)) == 0;
  69. };
  70. /**
  71. * Does simple python-style string substitution.
  72. * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog".
  73. * @param {string} str The string containing the pattern.
  74. * @param {...*} var_args The items to substitute into the pattern.
  75. * @return {string} A copy of {@code str} in which each occurrence of
  76. * {@code %s} has been replaced an argument from {@code var_args}.
  77. */
  78. goog.string.subs = function(str, var_args) {
  79. // This appears to be slow, but testing shows it compares more or less
  80. // equivalent to the regex.exec method.
  81. for (var i = 1; i < arguments.length; i++) {
  82. // We cast to String in case an argument is a Function. Replacing $&, for
  83. // example, with $$$& stops the replace from subsituting the whole match
  84. // into the resultant string. $$$& in the first replace becomes $$& in the
  85. // second, which leaves $& in the resultant string. Also:
  86. // $$, $`, $', $n $nn
  87. var replacement = String(arguments[i]).replace(/\$/g, '$$$$');
  88. str = str.replace(/\%s/, replacement);
  89. }
  90. return str;
  91. };
  92. /**
  93. * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines
  94. * and tabs) to a single space, and strips leading and trailing whitespace.
  95. * @param {string} str Input string.
  96. * @return {string} A copy of {@code str} with collapsed whitespace.
  97. */
  98. goog.string.collapseWhitespace = function(str) {
  99. // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  100. // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  101. // include it in the regexp to enforce consistent cross-browser behavior.
  102. return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
  103. };
  104. /**
  105. * Checks if a string is empty or contains only whitespaces.
  106. * @param {string} str The string to check.
  107. * @return {boolean} True if {@code str} is empty or whitespace only.
  108. */
  109. goog.string.isEmpty = function(str) {
  110. // testing length == 0 first is actually slower in all browsers (about the
  111. // same in Opera).
  112. // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  113. // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  114. // include it in the regexp to enforce consistent cross-browser behavior.
  115. return /^[\s\xa0]*$/.test(str);
  116. };
  117. /**
  118. * Checks if a string is null, empty or contains only whitespaces.
  119. * @param {*} str The string to check.
  120. * @return {boolean} True if{@code str} is null, empty, or whitespace only.
  121. */
  122. goog.string.isEmptySafe = function(str) {
  123. return goog.string.isEmpty(goog.string.makeSafe(str));
  124. };
  125. /**
  126. * Checks if a string is all breaking whitespace.
  127. * @param {string} str The string to check.
  128. * @return {boolean} Whether the string is all breaking whitespace.
  129. */
  130. goog.string.isBreakingWhitespace = function(str) {
  131. return !/[^\t\n\r ]/.test(str);
  132. };
  133. /**
  134. * Checks if a string contains all letters.
  135. * @param {string} str string to check.
  136. * @return {boolean} True if {@code str} consists entirely of letters.
  137. */
  138. goog.string.isAlpha = function(str) {
  139. return !/[^a-zA-Z]/.test(str);
  140. };
  141. /**
  142. * Checks if a string contains only numbers.
  143. * @param {*} str string to check. If not a string, it will be
  144. * casted to one.
  145. * @return {boolean} True if {@code str} is numeric.
  146. */
  147. goog.string.isNumeric = function(str) {
  148. return !/[^0-9]/.test(str);
  149. };
  150. /**
  151. * Checks if a string contains only numbers or letters.
  152. * @param {string} str string to check.
  153. * @return {boolean} True if {@code str} is alphanumeric.
  154. */
  155. goog.string.isAlphaNumeric = function(str) {
  156. return !/[^a-zA-Z0-9]/.test(str);
  157. };
  158. /**
  159. * Checks if a character is a space character.
  160. * @param {string} ch Character to check.
  161. * @return {boolean} True if {code ch} is a space.
  162. */
  163. goog.string.isSpace = function(ch) {
  164. return ch == ' ';
  165. };
  166. /**
  167. * Checks if a character is a valid unicode character.
  168. * @param {string} ch Character to check.
  169. * @return {boolean} True if {code ch} is a valid unicode character.
  170. */
  171. goog.string.isUnicodeChar = function(ch) {
  172. return ch.length == 1 && ch >= ' ' && ch <= '~' ||
  173. ch >= '\u0080' && ch <= '\uFFFD';
  174. };
  175. /**
  176. * Takes a string and replaces newlines with a space. Multiple lines are
  177. * replaced with a single space.
  178. * @param {string} str The string from which to strip newlines.
  179. * @return {string} A copy of {@code str} stripped of newlines.
  180. */
  181. goog.string.stripNewlines = function(str) {
  182. return str.replace(/(\r\n|\r|\n)+/g, ' ');
  183. };
  184. /**
  185. * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n.
  186. * @param {string} str The string to in which to canonicalize newlines.
  187. * @return {string} {@code str} A copy of {@code} with canonicalized newlines.
  188. */
  189. goog.string.canonicalizeNewlines = function(str) {
  190. return str.replace(/(\r\n|\r|\n)/g, '\n');
  191. };
  192. /**
  193. * Normalizes whitespace in a string, replacing all whitespace chars with
  194. * a space.
  195. * @param {string} str The string in which to normalize whitespace.
  196. * @return {string} A copy of {@code str} with all whitespace normalized.
  197. */
  198. goog.string.normalizeWhitespace = function(str) {
  199. return str.replace(/\xa0|\s/g, ' ');
  200. };
  201. /**
  202. * Normalizes spaces in a string, replacing all consecutive spaces and tabs
  203. * with a single space. Replaces non-breaking space with a space.
  204. * @param {string} str The string in which to normalize spaces.
  205. * @return {string} A copy of {@code str} with all consecutive spaces and tabs
  206. * replaced with a single space.
  207. */
  208. goog.string.normalizeSpaces = function(str) {
  209. return str.replace(/\xa0|[ \t]+/g, ' ');
  210. };
  211. /**
  212. * Removes the breaking spaces from the left and right of the string and
  213. * collapses the sequences of breaking spaces in the middle into single spaces.
  214. * The original and the result strings render the same way in HTML.
  215. * @param {string} str A string in which to collapse spaces.
  216. * @return {string} Copy of the string with normalized breaking spaces.
  217. */
  218. goog.string.collapseBreakingSpaces = function(str) {
  219. return str.replace(/[\t\r\n ]+/g, ' ').replace(
  220. /^[\t\r\n ]+|[\t\r\n ]+$/g, '');
  221. };
  222. /**
  223. * Trims white spaces to the left and right of a string.
  224. * @param {string} str The string to trim.
  225. * @return {string} A trimmed copy of {@code str}.
  226. */
  227. goog.string.trim = function(str) {
  228. // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  229. // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  230. // include it in the regexp to enforce consistent cross-browser behavior.
  231. return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, '');
  232. };
  233. /**
  234. * Trims whitespaces at the left end of a string.
  235. * @param {string} str The string to left trim.
  236. * @return {string} A trimmed copy of {@code str}.
  237. */
  238. goog.string.trimLeft = function(str) {
  239. // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  240. // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  241. // include it in the regexp to enforce consistent cross-browser behavior.
  242. return str.replace(/^[\s\xa0]+/, '');
  243. };
  244. /**
  245. * Trims whitespaces at the right end of a string.
  246. * @param {string} str The string to right trim.
  247. * @return {string} A trimmed copy of {@code str}.
  248. */
  249. goog.string.trimRight = function(str) {
  250. // Since IE doesn't include non-breaking-space (0xa0) in their \s character
  251. // class (as required by section 7.2 of the ECMAScript spec), we explicitly
  252. // include it in the regexp to enforce consistent cross-browser behavior.
  253. return str.replace(/[\s\xa0]+$/, '');
  254. };
  255. /**
  256. * A string comparator that ignores case.
  257. * -1 = str1 less than str2
  258. * 0 = str1 equals str2
  259. * 1 = str1 greater than str2
  260. *
  261. * @param {string} str1 The string to compare.
  262. * @param {string} str2 The string to compare {@code str1} to.
  263. * @return {number} The comparator result, as described above.
  264. */
  265. goog.string.caseInsensitiveCompare = function(str1, str2) {
  266. var test1 = String(str1).toLowerCase();
  267. var test2 = String(str2).toLowerCase();
  268. if (test1 < test2) {
  269. return -1;
  270. } else if (test1 == test2) {
  271. return 0;
  272. } else {
  273. return 1;
  274. }
  275. };
  276. /**
  277. * Regular expression used for splitting a string into substrings of fractional
  278. * numbers, integers, and non-numeric characters.
  279. * @type {RegExp}
  280. * @private
  281. */
  282. goog.string.numerateCompareRegExp_ = /(\.\d+)|(\d+)|(\D+)/g;
  283. /**
  284. * String comparison function that handles numbers in a way humans might expect.
  285. * Using this function, the string "File 2.jpg" sorts before "File 10.jpg". The
  286. * comparison is mostly case-insensitive, though strings that are identical
  287. * except for case are sorted with the upper-case strings before lower-case.
  288. *
  289. * This comparison function is significantly slower (about 500x) than either
  290. * the default or the case-insensitive compare. It should not be used in
  291. * time-critical code, but should be fast enough to sort several hundred short
  292. * strings (like filenames) with a reasonable delay.
  293. *
  294. * @param {string} str1 The string to compare in a numerically sensitive way.
  295. * @param {string} str2 The string to compare {@code str1} to.
  296. * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
  297. * 0 if str1 > str2.
  298. */
  299. goog.string.numerateCompare = function(str1, str2) {
  300. if (str1 == str2) {
  301. return 0;
  302. }
  303. if (!str1) {
  304. return -1;
  305. }
  306. if (!str2) {
  307. return 1;
  308. }
  309. // Using match to split the entire string ahead of time turns out to be faster
  310. // for most inputs than using RegExp.exec or iterating over each character.
  311. var tokens1 = str1.toLowerCase().match(goog.string.numerateCompareRegExp_);
  312. var tokens2 = str2.toLowerCase().match(goog.string.numerateCompareRegExp_);
  313. var count = Math.min(tokens1.length, tokens2.length);
  314. for (var i = 0; i < count; i++) {
  315. var a = tokens1[i];
  316. var b = tokens2[i];
  317. // Compare pairs of tokens, returning if one token sorts before the other.
  318. if (a != b) {
  319. // Only if both tokens are integers is a special comparison required.
  320. // Decimal numbers are sorted as strings (e.g., '.09' < '.1').
  321. var num1 = parseInt(a, 10);
  322. if (!isNaN(num1)) {
  323. var num2 = parseInt(b, 10);
  324. if (!isNaN(num2) && num1 - num2) {
  325. return num1 - num2;
  326. }
  327. }
  328. return a < b ? -1 : 1;
  329. }
  330. }
  331. // If one string is a substring of the other, the shorter string sorts first.
  332. if (tokens1.length != tokens2.length) {
  333. return tokens1.length - tokens2.length;
  334. }
  335. // The two strings must be equivalent except for case (perfect equality is
  336. // tested at the head of the function.) Revert to default ASCII-betical string
  337. // comparison to stablize the sort.
  338. return str1 < str2 ? -1 : 1;
  339. };
  340. /**
  341. * Regular expression used for determining if a string needs to be encoded.
  342. * @type {RegExp}
  343. * @private
  344. */
  345. goog.string.encodeUriRegExp_ = /^[a-zA-Z0-9\-_.!~*'()]*$/;
  346. /**
  347. * URL-encodes a string
  348. * @param {*} str The string to url-encode.
  349. * @return {string} An encoded copy of {@code str} that is safe for urls.
  350. * Note that '#', ':', and other characters used to delimit portions
  351. * of URLs *will* be encoded.
  352. */
  353. goog.string.urlEncode = function(str) {
  354. str = String(str);
  355. // Checking if the search matches before calling encodeURIComponent avoids an
  356. // extra allocation in IE6. This adds about 10us time in FF and a similiar
  357. // over head in IE6 for lower working set apps, but for large working set
  358. // apps like Gmail, it saves about 70us per call.
  359. if (!goog.string.encodeUriRegExp_.test(str)) {
  360. return encodeURIComponent(str);
  361. }
  362. return str;
  363. };
  364. /**
  365. * URL-decodes the string. We need to specially handle '+'s because
  366. * the javascript library doesn't convert them to spaces.
  367. * @param {string} str The string to url decode.
  368. * @return {string} The decoded {@code str}.
  369. */
  370. goog.string.urlDecode = function(str) {
  371. return decodeURIComponent(str.replace(/\+/g, ' '));
  372. };
  373. /**
  374. * Converts \n to <br>s or <br />s.
  375. * @param {string} str The string in which to convert newlines.
  376. * @param {boolean=} opt_xml Whether to use XML compatible tags.
  377. * @return {string} A copy of {@code str} with converted newlines.
  378. */
  379. goog.string.newLineToBr = function(str, opt_xml) {
  380. return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '<br />' : '<br>');
  381. };
  382. /**
  383. * Escape double quote '"' characters in addition to '&', '<', and '>' so that a
  384. * string can be included in an HTML tag attribute value within double quotes.
  385. *
  386. * It should be noted that > doesn't need to be escaped for the HTML or XML to
  387. * be valid, but it has been decided to escape it for consistency with other
  388. * implementations.
  389. *
  390. * NOTE(user):
  391. * HtmlEscape is often called during the generation of large blocks of HTML.
  392. * Using statics for the regular expressions and strings is an optimization
  393. * that can more than half the amount of time IE spends in this function for
  394. * large apps, since strings and regexes both contribute to GC allocations.
  395. *
  396. * Testing for the presence of a character before escaping increases the number
  397. * of function calls, but actually provides a speed increase for the average
  398. * case -- since the average case often doesn't require the escaping of all 4
  399. * characters and indexOf() is much cheaper than replace().
  400. * The worst case does suffer slightly from the additional calls, therefore the
  401. * opt_isLikelyToContainHtmlChars option has been included for situations
  402. * where all 4 HTML entities are very likely to be present and need escaping.
  403. *
  404. * Some benchmarks (times tended to fluctuate +-0.05ms):
  405. * FireFox IE6
  406. * (no chars / average (mix of cases) / all 4 chars)
  407. * no checks 0.13 / 0.22 / 0.22 0.23 / 0.53 / 0.80
  408. * indexOf 0.08 / 0.17 / 0.26 0.22 / 0.54 / 0.84
  409. * indexOf + re test 0.07 / 0.17 / 0.28 0.19 / 0.50 / 0.85
  410. *
  411. * An additional advantage of checking if replace actually needs to be called
  412. * is a reduction in the number of object allocations, so as the size of the
  413. * application grows the difference between the various methods would increase.
  414. *
  415. * @param {string} str string to be escaped.
  416. * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see
  417. * if the character needs replacing - use this option if you expect each of
  418. * the characters to appear often. Leave false if you expect few html
  419. * characters to occur in your strings, such as if you are escaping HTML.
  420. * @return {string} An escaped copy of {@code str}.
  421. */
  422. goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) {
  423. if (opt_isLikelyToContainHtmlChars) {
  424. return str.replace(goog.string.amperRe_, '&amp;')
  425. .replace(goog.string.ltRe_, '&lt;')
  426. .replace(goog.string.gtRe_, '&gt;')
  427. .replace(goog.string.quotRe_, '&quot;');
  428. } else {
  429. // quick test helps in the case when there are no chars to replace, in
  430. // worst case this makes barely a difference to the time taken
  431. if (!goog.string.allRe_.test(str)) return str;
  432. // str.indexOf is faster than regex.test in this case
  433. if (str.indexOf('&') != -1) {
  434. str = str.replace(goog.string.amperRe_, '&amp;');
  435. }
  436. if (str.indexOf('<') != -1) {
  437. str = str.replace(goog.string.ltRe_, '&lt;');
  438. }
  439. if (str.indexOf('>') != -1) {
  440. str = str.replace(goog.string.gtRe_, '&gt;');
  441. }
  442. if (str.indexOf('"') != -1) {
  443. str = str.replace(goog.string.quotRe_, '&quot;');
  444. }
  445. return str;
  446. }
  447. };
  448. /**
  449. * Regular expression that matches an ampersand, for use in escaping.
  450. * @type {RegExp}
  451. * @private
  452. */
  453. goog.string.amperRe_ = /&/g;
  454. /**
  455. * Regular expression that matches a less than sign, for use in escaping.
  456. * @type {RegExp}
  457. * @private
  458. */
  459. goog.string.ltRe_ = /</g;
  460. /**
  461. * Regular expression that matches a greater than sign, for use in escaping.
  462. * @type {RegExp}
  463. * @private
  464. */
  465. goog.string.gtRe_ = />/g;
  466. /**
  467. * Regular expression that matches a double quote, for use in escaping.
  468. * @type {RegExp}
  469. * @private
  470. */
  471. goog.string.quotRe_ = /\"/g;
  472. /**
  473. * Regular expression that matches any character that needs to be escaped.
  474. * @type {RegExp}
  475. * @private
  476. */
  477. goog.string.allRe_ = /[&<>\"]/;
  478. /**
  479. * Unescapes an HTML string.
  480. *
  481. * @param {string} str The string to unescape.
  482. * @return {string} An unescaped copy of {@code str}.
  483. */
  484. goog.string.unescapeEntities = function(str) {
  485. if (goog.string.contains(str, '&')) {
  486. // We are careful not to use a DOM if we do not have one. We use the []
  487. // notation so that the JSCompiler will not complain about these objects and
  488. // fields in the case where we have no DOM.
  489. if ('document' in goog.global) {
  490. return goog.string.unescapeEntitiesUsingDom_(str);
  491. } else {
  492. // Fall back on pure XML entities
  493. return goog.string.unescapePureXmlEntities_(str);
  494. }
  495. }
  496. return str;
  497. };
  498. /**
  499. * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric
  500. * entities. This function is XSS-safe and whitespace-preserving.
  501. * @private
  502. * @param {string} str The string to unescape.
  503. * @return {string} The unescaped {@code str} string.
  504. */
  505. goog.string.unescapeEntitiesUsingDom_ = function(str) {
  506. var seen = {'&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'};
  507. var div = document.createElement('div');
  508. // Match as many valid entity characters as possible. If the actual entity
  509. // happens to be shorter, it will still work as innerHTML will return the
  510. // trailing characters unchanged. Since the entity characters do not include
  511. // open angle bracket, there is no chance of XSS from the innerHTML use.
  512. // Since no whitespace is passed to innerHTML, whitespace is preserved.
  513. return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) {
  514. // Check for cached entity.
  515. var value = seen[s];
  516. if (value) {
  517. return value;
  518. }
  519. // Check for numeric entity.
  520. if (entity.charAt(0) == '#') {
  521. // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.
  522. var n = Number('0' + entity.substr(1));
  523. if (!isNaN(n)) {
  524. value = String.fromCharCode(n);
  525. }
  526. }
  527. // Fall back to innerHTML otherwise.
  528. if (!value) {
  529. // Append a non-entity character to avoid a bug in Webkit that parses
  530. // an invalid entity at the end of innerHTML text as the empty string.
  531. div.innerHTML = s + ' ';
  532. // Then remove the trailing character from the result.
  533. value = div.firstChild.nodeValue.slice(0, -1);
  534. }
  535. // Cache and return.
  536. return seen[s] = value;
  537. });
  538. };
  539. /**
  540. * Unescapes XML entities.
  541. * @private
  542. * @param {string} str The string to unescape.
  543. * @return {string} An unescaped copy of {@code str}.
  544. */
  545. goog.string.unescapePureXmlEntities_ = function(str) {
  546. return str.replace(/&([^;]+);/g, function(s, entity) {
  547. switch (entity) {
  548. case 'amp':
  549. return '&';
  550. case 'lt':
  551. return '<';
  552. case 'gt':
  553. return '>';
  554. case 'quot':
  555. return '"';
  556. default:
  557. if (entity.charAt(0) == '#') {
  558. // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex.
  559. var n = Number('0' + entity.substr(1));
  560. if (!isNaN(n)) {
  561. return String.fromCharCode(n);
  562. }
  563. }
  564. // For invalid entities we just return the entity
  565. return s;
  566. }
  567. });
  568. };
  569. /**
  570. * Regular expression that matches an HTML entity.
  571. * See also HTML5: Tokenization / Tokenizing character references.
  572. * @private
  573. * @type {!RegExp}
  574. */
  575. goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g;
  576. /**
  577. * Do escaping of whitespace to preserve spatial formatting. We use character
  578. * entity #160 to make it safer for xml.
  579. * @param {string} str The string in which to escape whitespace.
  580. * @param {boolean=} opt_xml Whether to use XML compatible tags.
  581. * @return {string} An escaped copy of {@code str}.
  582. */
  583. goog.string.whitespaceEscape = function(str, opt_xml) {
  584. return goog.string.newLineToBr(str.replace(/ /g, ' &#160;'), opt_xml);
  585. };
  586. /**
  587. * Strip quote characters around a string. The second argument is a string of
  588. * characters to treat as quotes. This can be a single character or a string of
  589. * multiple character and in that case each of those are treated as possible
  590. * quote characters. For example:
  591. *
  592. * <pre>
  593. * goog.string.stripQuotes('"abc"', '"`') --> 'abc'
  594. * goog.string.stripQuotes('`abc`', '"`') --> 'abc'
  595. * </pre>
  596. *
  597. * @param {string} str The string to strip.
  598. * @param {string} quoteChars The quote characters to strip.
  599. * @return {string} A copy of {@code str} without the quotes.
  600. */
  601. goog.string.stripQuotes = function(str, quoteChars) {
  602. var length = quoteChars.length;
  603. for (var i = 0; i < length; i++) {
  604. var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i);
  605. if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) {
  606. return str.substring(1, str.length - 1);
  607. }
  608. }
  609. return str;
  610. };
  611. /**
  612. * Truncates a string to a certain length and adds '...' if necessary. The
  613. * length also accounts for the ellipsis, so a maximum length of 10 and a string
  614. * 'Hello World!' produces 'Hello W...'.
  615. * @param {string} str The string to truncate.
  616. * @param {number} chars Max number of characters.
  617. * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
  618. * characters from being cut off in the middle.
  619. * @return {string} The truncated {@code str} string.
  620. */
  621. goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) {
  622. if (opt_protectEscapedCharacters) {
  623. str = goog.string.unescapeEntities(str);
  624. }
  625. if (str.length > chars) {
  626. str = str.substring(0, chars - 3) + '...';
  627. }
  628. if (opt_protectEscapedCharacters) {
  629. str = goog.string.htmlEscape(str);
  630. }
  631. return str;
  632. };
  633. /**
  634. * Truncate a string in the middle, adding "..." if necessary,
  635. * and favoring the beginning of the string.
  636. * @param {string} str The string to truncate the middle of.
  637. * @param {number} chars Max number of characters.
  638. * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
  639. * characters from being cutoff in the middle.
  640. * @param {number=} opt_trailingChars Optional number of trailing characters to
  641. * leave at the end of the string, instead of truncating as close to the
  642. * middle as possible.
  643. * @return {string} A truncated copy of {@code str}.
  644. */
  645. goog.string.truncateMiddle = function(str, chars,
  646. opt_protectEscapedCharacters, opt_trailingChars) {
  647. if (opt_protectEscapedCharacters) {
  648. str = goog.string.unescapeEntities(str);
  649. }
  650. if (opt_trailingChars && str.length > chars) {
  651. if (opt_trailingChars > chars) {
  652. opt_trailingChars = chars;
  653. }
  654. var endPoint = str.length - opt_trailingChars;
  655. var startPoint = chars - opt_trailingChars;
  656. str = str.substring(0, startPoint) + '...' + str.substring(endPoint);
  657. } else if (str.length > chars) {
  658. // Favor the beginning of the string:
  659. var half = Math.floor(chars / 2);
  660. var endPos = str.length - half;
  661. half += chars % 2;
  662. str = str.substring(0, half) + '...' + str.substring(endPos);
  663. }
  664. if (opt_protectEscapedCharacters) {
  665. str = goog.string.htmlEscape(str);
  666. }
  667. return str;
  668. };
  669. /**
  670. * Special chars that need to be escaped for goog.string.quote.
  671. * @private
  672. * @type {Object}
  673. */
  674. goog.string.specialEscapeChars_ = {
  675. '\0': '\\0',
  676. '\b': '\\b',
  677. '\f': '\\f',
  678. '\n': '\\n',
  679. '\r': '\\r',
  680. '\t': '\\t',
  681. '\x0B': '\\x0B', // '\v' is not supported in JScript
  682. '"': '\\"',
  683. '\\': '\\\\'
  684. };
  685. /**
  686. * Character mappings used internally for goog.string.escapeChar.
  687. * @private
  688. * @type {Object}
  689. */
  690. goog.string.jsEscapeCache_ = {
  691. '\'': '\\\''
  692. };
  693. /**
  694. * Encloses a string in double quotes and escapes characters so that the
  695. * string is a valid JS string.
  696. * @param {string} s The string to quote.
  697. * @return {string} A copy of {@code s} surrounded by double quotes.
  698. */
  699. goog.string.quote = function(s) {
  700. s = String(s);
  701. if (s.quote) {
  702. return s.quote();
  703. } else {
  704. var sb = ['"'];
  705. for (var i = 0; i < s.length; i++) {
  706. var ch = s.charAt(i);
  707. var cc = ch.charCodeAt(0);
  708. sb[i + 1] = goog.string.specialEscapeChars_[ch] ||
  709. ((cc > 31 && cc < 127) ? ch : goog.string.escapeChar(ch));
  710. }
  711. sb.push('"');
  712. return sb.join('');
  713. }
  714. };
  715. /**
  716. * Takes a string and returns the escaped string for that character.
  717. * @param {string} str The string to escape.
  718. * @return {string} An escaped string representing {@code str}.
  719. */
  720. goog.string.escapeString = function(str) {
  721. var sb = [];
  722. for (var i = 0; i < str.length; i++) {
  723. sb[i] = goog.string.escapeChar(str.charAt(i));
  724. }
  725. return sb.join('');
  726. };
  727. /**
  728. * Takes a character and returns the escaped string for that character. For
  729. * example escapeChar(String.fromCharCode(15)) -> "\\x0E".
  730. * @param {string} c The character to escape.
  731. * @return {string} An escaped string representing {@code c}.
  732. */
  733. goog.string.escapeChar = function(c) {
  734. if (c in goog.string.jsEscapeCache_) {
  735. return goog.string.jsEscapeCache_[c];
  736. }
  737. if (c in goog.string.specialEscapeChars_) {
  738. return goog.string.jsEscapeCache_[c] = goog.string.specialEscapeChars_[c];
  739. }
  740. var rv = c;
  741. var cc = c.charCodeAt(0);
  742. if (cc > 31 && cc < 127) {
  743. rv = c;
  744. } else {
  745. // tab is 9 but handled above
  746. if (cc < 256) {
  747. rv = '\\x';
  748. if (cc < 16 || cc > 256) {
  749. rv += '0';
  750. }
  751. } else {
  752. rv = '\\u';
  753. if (cc < 4096) { // \u1000
  754. rv += '0';
  755. }
  756. }
  757. rv += cc.toString(16).toUpperCase();
  758. }
  759. return goog.string.jsEscapeCache_[c] = rv;
  760. };
  761. /**
  762. * Takes a string and creates a map (Object) in which the keys are the
  763. * characters in the string. The value for the key is set to true. You can
  764. * then use goog.object.map or goog.array.map to change the values.
  765. * @param {string} s The string to build the map from.
  766. * @return {Object} The map of characters used.
  767. */
  768. // TODO(arv): It seems like we should have a generic goog.array.toMap. But do
  769. // we want a dependency on goog.array in goog.string?
  770. goog.string.toMap = function(s) {
  771. var rv = {};
  772. for (var i = 0; i < s.length; i++) {
  773. rv[s.charAt(i)] = true;
  774. }
  775. return rv;
  776. };
  777. /**
  778. * Checks whether a string contains a given character.
  779. * @param {string} s The string to test.
  780. * @param {string} ss The substring to test for.
  781. * @return {boolean} True if {@code s} contains {@code ss}.
  782. */
  783. goog.string.contains = function(s, ss) {
  784. return s.indexOf(ss) != -1;
  785. };
  786. /**
  787. * Returns the non-overlapping occurrences of ss in s.
  788. * If either s or ss evalutes to false, then returns zero.
  789. * @param {string} s The string to look in.
  790. * @param {string} ss The string to look for.
  791. * @return {number} Number of occurrences of ss in s.
  792. */
  793. goog.string.countOf = function(s, ss) {
  794. return s && ss ? s.split(ss).length - 1 : 0;
  795. };
  796. /**
  797. * Removes a substring of a specified length at a specific
  798. * index in a string.
  799. * @param {string} s The base string from which to remove.
  800. * @param {number} index The index at which to remove the substring.
  801. * @param {number} stringLength The length of the substring to remove.
  802. * @return {string} A copy of {@code s} with the substring removed or the full
  803. * string if nothing is removed or the input is invalid.
  804. */
  805. goog.string.removeAt = function(s, index, stringLength) {
  806. var resultStr = s;
  807. // If the index is greater or equal to 0 then remove substring
  808. if (index >= 0 && index < s.length && stringLength > 0) {
  809. resultStr = s.substr(0, index) +
  810. s.substr(index + stringLength, s.length - index - stringLength);
  811. }
  812. return resultStr;
  813. };
  814. /**
  815. * Removes the first occurrence of a substring from a string.
  816. * @param {string} s The base string from which to remove.
  817. * @param {string} ss The string to remove.
  818. * @return {string} A copy of {@code s} with {@code ss} removed or the full
  819. * string if nothing is removed.
  820. */
  821. goog.string.remove = function(s, ss) {
  822. var re = new RegExp(goog.string.regExpEscape(ss), '');
  823. return s.replace(re, '');
  824. };
  825. /**
  826. * Removes all occurrences of a substring from a string.
  827. * @param {string} s The base string from which to remove.
  828. * @param {string} ss The string to remove.
  829. * @return {string} A copy of {@code s} with {@code ss} removed or the full
  830. * string if nothing is removed.
  831. */
  832. goog.string.removeAll = function(s, ss) {
  833. var re = new RegExp(goog.string.regExpEscape(ss), 'g');
  834. return s.replace(re, '');
  835. };
  836. /**
  837. * Escapes characters in the string that are not safe to use in a RegExp.
  838. * @param {*} s The string to escape. If not a string, it will be casted
  839. * to one.
  840. * @return {string} A RegExp safe, escaped copy of {@code s}.
  841. */
  842. goog.string.regExpEscape = function(s) {
  843. return String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1').
  844. replace(/\x08/g, '\\x08');
  845. };
  846. /**
  847. * Repeats a string n times.
  848. * @param {string} string The string to repeat.
  849. * @param {number} length The number of times to repeat.
  850. * @return {string} A string containing {@code length} repetitions of
  851. * {@code string}.
  852. */
  853. goog.string.repeat = function(string, length) {
  854. return new Array(length + 1).join(string);
  855. };
  856. /**
  857. * Pads number to given length and optionally rounds it to a given precision.
  858. * For example:
  859. * <pre>padNumber(1.25, 2, 3) -> '01.250'
  860. * padNumber(1.25, 2) -> '01.25'
  861. * padNumber(1.25, 2, 1) -> '01.3'
  862. * padNumber(1.25, 0) -> '1.25'</pre>
  863. *
  864. * @param {number} num The number to pad.
  865. * @param {number} length The desired length.
  866. * @param {number=} opt_precision The desired precision.
  867. * @return {string} {@code num} as a string with the given options.
  868. */
  869. goog.string.padNumber = function(num, length, opt_precision) {
  870. var s = goog.isDef(opt_precision) ? num.toFixed(opt_precision) : String(num);
  871. var index = s.indexOf('.');
  872. if (index == -1) {
  873. index = s.length;
  874. }
  875. return goog.string.repeat('0', Math.max(0, length - index)) + s;
  876. };
  877. /**
  878. * Returns a string representation of the given object, with
  879. * null and undefined being returned as the empty string.
  880. *
  881. * @param {*} obj The object to convert.
  882. * @return {string} A string representation of the {@code obj}.
  883. */
  884. goog.string.makeSafe = function(obj) {
  885. return obj == null ? '' : String(obj);
  886. };
  887. /**
  888. * Concatenates string expressions. This is useful
  889. * since some browsers are very inefficient when it comes to using plus to
  890. * concat strings. Be careful when using null and undefined here since
  891. * these will not be included in the result. If you need to represent these
  892. * be sure to cast the argument to a String first.
  893. * For example:
  894. * <pre>buildString('a', 'b', 'c', 'd') -> 'abcd'
  895. * buildString(null, undefined) -> ''
  896. * </pre>
  897. * @param {...*} var_args A list of strings to concatenate. If not a string,
  898. * it will be casted to one.
  899. * @return {string} The concatenation of {@code var_args}.
  900. */
  901. goog.string.buildString = function(var_args) {
  902. return Array.prototype.join.call(arguments, '');
  903. };
  904. /**
  905. * Returns a string with at least 64-bits of randomness.
  906. *
  907. * Doesn't trust Javascript's random function entirely. Uses a combination of
  908. * random and current timestamp, and then encodes the string in base-36 to
  909. * make it shorter.
  910. *
  911. * @return {string} A random string, e.g. sn1s7vb4gcic.
  912. */
  913. goog.string.getRandomString = function() {
  914. var x = 2147483648;
  915. return Math.floor(Math.random() * x).toString(36) +
  916. Math.abs(Math.floor(Math.random() * x) ^ goog.now()).toString(36);
  917. };
  918. /**
  919. * Compares two version numbers.
  920. *
  921. * @param {string|number} version1 Version of first item.
  922. * @param {string|number} version2 Version of second item.
  923. *
  924. * @return {number} 1 if {@code version1} is higher.
  925. * 0 if arguments are equal.
  926. * -1 if {@code version2} is higher.
  927. */
  928. goog.string.compareVersions = function(version1, version2) {
  929. var order = 0;
  930. // Trim leading and trailing whitespace and split the versions into
  931. // subversions.
  932. var v1Subs = goog.string.trim(String(version1)).split('.');
  933. var v2Subs = goog.string.trim(String(version2)).split('.');
  934. var subCount = Math.max(v1Subs.length, v2Subs.length);
  935. // Iterate over the subversions, as long as they appear to be equivalent.
  936. for (var subIdx = 0; order == 0 && subIdx < subCount; subIdx++) {
  937. var v1Sub = v1Subs[subIdx] || '';
  938. var v2Sub = v2Subs[subIdx] || '';
  939. // Split the subversions into pairs of numbers and qualifiers (like 'b').
  940. // Two different RegExp objects are needed because they are both using
  941. // the 'g' flag.
  942. var v1CompParser = new RegExp('(\\d*)(\\D*)', 'g');
  943. var v2CompParser = new RegExp('(\\d*)(\\D*)', 'g');
  944. do {
  945. var v1Comp = v1CompParser.exec(v1Sub) || ['', '', ''];
  946. var v2Comp = v2CompParser.exec(v2Sub) || ['', '', ''];
  947. // Break if there are no more matches.
  948. if (v1Comp[0].length == 0 && v2Comp[0].length == 0) {
  949. break;
  950. }
  951. // Parse the numeric part of the subversion. A missing number is
  952. // equivalent to 0.
  953. var v1CompNum = v1Comp[1].length == 0 ? 0 : parseInt(v1Comp[1], 10);
  954. var v2CompNum = v2Comp[1].length == 0 ? 0 : parseInt(v2Comp[1], 10);
  955. // Compare the subversion components. The number has the highest
  956. // precedence. Next, if the numbers are equal, a subversion without any
  957. // qualifier is always higher than a subversion with any qualifier. Next,
  958. // the qualifiers are compared as strings.
  959. order = goog.string.compareElements_(v1CompNum, v2CompNum) ||
  960. goog.string.compareElements_(v1Comp[2].length == 0,
  961. v2Comp[2].length == 0) ||
  962. goog.string.compareElements_(v1Comp[2], v2Comp[2]);
  963. // Stop as soon as an inequality is discovered.
  964. } while (order == 0);
  965. }
  966. return order;
  967. };
  968. /**
  969. * Compares elements of a version number.
  970. *
  971. * @param {string|number|boolean} left An element from a version number.
  972. * @param {string|number|boolean} right An element from a version number.
  973. *
  974. * @return {number} 1 if {@code left} is higher.
  975. * 0 if arguments are equal.
  976. * -1 if {@code right} is higher.
  977. * @private
  978. */
  979. goog.string.compareElements_ = function(left, right) {
  980. if (left < right) {
  981. return -1;
  982. } else if (left > right) {
  983. return 1;
  984. }
  985. return 0;
  986. };
  987. /**
  988. * Maximum value of #goog.string.hashCode, exclusive. 2^32.
  989. * @type {number}
  990. * @private
  991. */
  992. goog.string.HASHCODE_MAX_ = 0x100000000;
  993. /**
  994. * String hash function similar to java.lang.String.hashCode().
  995. * The hash code for a string is computed as
  996. * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
  997. * where s[i] is the ith character of the string and n is the length of
  998. * the string. We mod the result to make it between 0 (inclusive) and 2^32
  999. * (exclusive).
  1000. * @param {string} str A string.
  1001. * @return {number} Hash value for {@code str}, between 0 (inclusive) and 2^32
  1002. * (exclusive). The empty string returns 0.
  1003. */
  1004. goog.string.hashCode = function(str) {
  1005. var result = 0;
  1006. for (var i = 0; i < str.length; ++i) {
  1007. result = 31 * result + str.charCodeAt(i);
  1008. // Normalize to 4 byte range, 0 ... 2^32.
  1009. result %= goog.string.HASHCODE_MAX_;
  1010. }
  1011. return result;
  1012. };
  1013. /**
  1014. * The most recent unique ID. |0 is equivalent to Math.floor in this case.
  1015. * @type {number}
  1016. * @private
  1017. */
  1018. goog.string.uniqueStringCounter_ = Math.random() * 0x80000000 | 0;
  1019. /**
  1020. * Generates and returns a string which is unique in the current document.
  1021. * This is useful, for example, to create unique IDs for DOM elements.
  1022. * @return {string} A unique id.
  1023. */
  1024. goog.string.createUniqueString = function() {
  1025. return 'goog_' + goog.string.uniqueStringCounter_++;
  1026. };
  1027. /**
  1028. * Converts the supplied string to a number, which may be Ininity or NaN.
  1029. * This function strips whitespace: (toNumber(' 123') === 123)
  1030. * This function accepts scientific notation: (toNumber('1e1') === 10)
  1031. *
  1032. * This is better than Javascript's built-in conversions because, sadly:
  1033. * (Number(' ') === 0) and (parseFloat('123a') === 123)
  1034. *
  1035. * @param {string} str The string to convert.
  1036. * @return {number} The number the supplied string represents, or NaN.
  1037. */
  1038. goog.string.toNumber = function(str) {
  1039. var num = Number(str);
  1040. if (num == 0 && goog.string.isEmpty(str)) {
  1041. return NaN;
  1042. }
  1043. return num;
  1044. };
  1045. /**
  1046. * A memoized cache for goog.string.toCamelCase.
  1047. * @type {Object.<string>}
  1048. * @private
  1049. */
  1050. goog.string.toCamelCaseCache_ = {};
  1051. /**
  1052. * Converts a string from selector-case to camelCase (e.g. from
  1053. * "multi-part-string" to "multiPartString"), useful for converting
  1054. * CSS selectors and HTML dataset keys to their equivalent JS properties.
  1055. * @param {string} str The string in selector-case form.
  1056. * @return {string} The string in camelCase form.
  1057. */
  1058. goog.string.toCamelCase = function(str) {
  1059. return goog.string.toCamelCaseCache_[str] ||
  1060. (goog.string.toCamelCaseCache_[str] =
  1061. String(str).replace(/\-([a-z])/g, function(all, match) {
  1062. return match.toUpperCase();
  1063. }));
  1064. };
  1065. /**
  1066. * A memoized cache for goog.string.toSelectorCase.
  1067. * @type {Object.<string>}
  1068. * @private
  1069. */
  1070. goog.string.toSelectorCaseCache_ = {};
  1071. /**
  1072. * Converts a string from camelCase to selector-case (e.g. from
  1073. * "multiPartString" to "multi-part-string"), useful for converting JS
  1074. * style and dataset properties to equivalent CSS selectors and HTML keys.
  1075. * @param {string} str The string in camelCase form.
  1076. * @return {string} The string in selector-case form.
  1077. */
  1078. goog.string.toSelectorCase = function(str) {
  1079. return goog.string.toSelectorCaseCache_[str] ||
  1080. (goog.string.toSelectorCaseCache_[str] =
  1081. String(str).replace(/([A-Z])/g, '-$1').toLowerCase());
  1082. };