PageRenderTime 46ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/indra/llcommon/llstring.cpp

https://bitbucket.org/lindenlab/viewer-beta/
C++ | 1421 lines | 1121 code | 174 blank | 126 comment | 266 complexity | d56a42315a97ec5d8e1f20179d482309 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /**
  2. * @file llstring.cpp
  3. * @brief String utility functions and the std::string class.
  4. *
  5. * $LicenseInfo:firstyear=2001&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #include "linden_common.h"
  27. #include "llstring.h"
  28. #include "llerror.h"
  29. #if LL_WINDOWS
  30. #define WIN32_LEAN_AND_MEAN
  31. #include <winsock2.h>
  32. #include <windows.h>
  33. #include <winnls.h> // for WideCharToMultiByte
  34. #endif
  35. LLFastTimer::DeclareTimer FT_STRING_FORMAT("String Format");
  36. std::string ll_safe_string(const char* in)
  37. {
  38. if(in) return std::string(in);
  39. return std::string();
  40. }
  41. std::string ll_safe_string(const char* in, S32 maxlen)
  42. {
  43. if(in) return std::string(in, maxlen);
  44. return std::string();
  45. }
  46. U8 hex_as_nybble(char hex)
  47. {
  48. if((hex >= '0') && (hex <= '9'))
  49. {
  50. return (U8)(hex - '0');
  51. }
  52. else if((hex >= 'a') && (hex <='f'))
  53. {
  54. return (U8)(10 + hex - 'a');
  55. }
  56. else if((hex >= 'A') && (hex <='F'))
  57. {
  58. return (U8)(10 + hex - 'A');
  59. }
  60. return 0; // uh - oh, not hex any more...
  61. }
  62. bool iswindividual(llwchar elem)
  63. {
  64. U32 cur_char = (U32)elem;
  65. bool result = false;
  66. if (0x2E80<= cur_char && cur_char <= 0x9FFF)
  67. {
  68. result = true;
  69. }
  70. else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
  71. {
  72. result = true;
  73. }
  74. else if (0xF900<= cur_char && cur_char <= 0xFA60 )
  75. {
  76. result = true;
  77. }
  78. return result;
  79. }
  80. bool _read_file_into_string(std::string& str, const std::string& filename)
  81. {
  82. llifstream ifs(filename, llifstream::binary);
  83. if (!ifs.is_open())
  84. {
  85. llinfos << "Unable to open file " << filename << llendl;
  86. return false;
  87. }
  88. std::ostringstream oss;
  89. oss << ifs.rdbuf();
  90. str = oss.str();
  91. ifs.close();
  92. return true;
  93. }
  94. // See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
  95. // for the Unicode implementation - this doesn't match because it was written before finding
  96. // it.
  97. std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
  98. {
  99. std::string utf8_str = wstring_to_utf8str(wstr);
  100. s << utf8_str;
  101. return s;
  102. }
  103. std::string rawstr_to_utf8(const std::string& raw)
  104. {
  105. LLWString wstr(utf8str_to_wstring(raw));
  106. return wstring_to_utf8str(wstr);
  107. }
  108. S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
  109. {
  110. U32 cur_char = (U32)in_char;
  111. char* base = outchars;
  112. if (cur_char < 0x80)
  113. {
  114. *outchars++ = (U8)cur_char;
  115. }
  116. else if (cur_char < 0x800)
  117. {
  118. *outchars++ = 0xC0 | (cur_char >> 6);
  119. *outchars++ = 0x80 | (cur_char & 0x3F);
  120. }
  121. else if (cur_char < 0x10000)
  122. {
  123. *outchars++ = 0xE0 | (cur_char >> 12);
  124. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  125. *outchars++ = 0x80 | (cur_char & 0x3F);
  126. }
  127. else if (cur_char < 0x200000)
  128. {
  129. *outchars++ = 0xF0 | (cur_char >> 18);
  130. *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
  131. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  132. *outchars++ = 0x80 | (cur_char & 0x3F);
  133. }
  134. else if (cur_char < 0x4000000)
  135. {
  136. *outchars++ = 0xF8 | (cur_char >> 24);
  137. *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
  138. *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
  139. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  140. *outchars++ = 0x80 | (cur_char & 0x3F);
  141. }
  142. else if (cur_char < 0x80000000)
  143. {
  144. *outchars++ = 0xFC | (cur_char >> 30);
  145. *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
  146. *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
  147. *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
  148. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  149. *outchars++ = 0x80 | (cur_char & 0x3F);
  150. }
  151. else
  152. {
  153. llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
  154. *outchars++ = LL_UNKNOWN_CHAR;
  155. }
  156. return outchars - base;
  157. }
  158. S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
  159. {
  160. const U16* base = inchars;
  161. U16 cur_char = *inchars++;
  162. llwchar char32 = cur_char;
  163. if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
  164. {
  165. // Surrogates
  166. char32 = ((llwchar)(cur_char - 0xD800)) << 10;
  167. cur_char = *inchars++;
  168. char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
  169. }
  170. else
  171. {
  172. char32 = (llwchar)cur_char;
  173. }
  174. *outchar = char32;
  175. return inchars - base;
  176. }
  177. llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
  178. {
  179. llutf16string out;
  180. S32 i = 0;
  181. while (i < len)
  182. {
  183. U32 cur_char = utf32str[i];
  184. if (cur_char > 0xFFFF)
  185. {
  186. out += (0xD7C0 + (cur_char >> 10));
  187. out += (0xDC00 | (cur_char & 0x3FF));
  188. }
  189. else
  190. {
  191. out += cur_char;
  192. }
  193. i++;
  194. }
  195. return out;
  196. }
  197. llutf16string wstring_to_utf16str(const LLWString &utf32str)
  198. {
  199. const S32 len = (S32)utf32str.length();
  200. return wstring_to_utf16str(utf32str, len);
  201. }
  202. llutf16string utf8str_to_utf16str ( const std::string& utf8str )
  203. {
  204. LLWString wstr = utf8str_to_wstring ( utf8str );
  205. return wstring_to_utf16str ( wstr );
  206. }
  207. LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
  208. {
  209. LLWString wout;
  210. if((len <= 0) || utf16str.empty()) return wout;
  211. S32 i = 0;
  212. // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
  213. const U16* chars16 = &(*(utf16str.begin()));
  214. while (i < len)
  215. {
  216. llwchar cur_char;
  217. i += utf16chars_to_wchar(chars16+i, &cur_char);
  218. wout += cur_char;
  219. }
  220. return wout;
  221. }
  222. LLWString utf16str_to_wstring(const llutf16string &utf16str)
  223. {
  224. const S32 len = (S32)utf16str.length();
  225. return utf16str_to_wstring(utf16str, len);
  226. }
  227. // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
  228. S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
  229. {
  230. S32 surrogate_pairs = 0;
  231. // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
  232. const U16 *const utf16_chars = &(*(utf16str.begin()));
  233. S32 i = 0;
  234. while (i < utf16_len)
  235. {
  236. const U16 c = utf16_chars[i++];
  237. if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16
  238. { // Have first byte of a surrogate pair
  239. if (i >= utf16_len)
  240. {
  241. break;
  242. }
  243. const U16 d = utf16_chars[i];
  244. if (d >= 0xDC00 && d <= 0xDFFF)
  245. { // Have valid second byte of a surrogate pair
  246. surrogate_pairs++;
  247. i++;
  248. }
  249. }
  250. }
  251. return utf16_len - surrogate_pairs;
  252. }
  253. // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
  254. S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
  255. {
  256. const S32 end = llmin((S32)wstr.length(), woffset + wlen);
  257. if (end < woffset)
  258. {
  259. return 0;
  260. }
  261. else
  262. {
  263. S32 length = end - woffset;
  264. for (S32 i = woffset; i < end; i++)
  265. {
  266. if (wstr[i] >= 0x10000)
  267. {
  268. length++;
  269. }
  270. }
  271. return length;
  272. }
  273. }
  274. // Given a wstring and an offset in it, returns the length as wstring (i.e.,
  275. // number of llwchars) of the longest substring that starts at the offset
  276. // and whose equivalent utf-16 string does not exceeds the given utf16_length.
  277. S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
  278. {
  279. const S32 end = wstr.length();
  280. BOOL u = FALSE;
  281. S32 n = woffset + utf16_length;
  282. S32 i = woffset;
  283. while (i < end)
  284. {
  285. if (wstr[i] >= 0x10000)
  286. {
  287. --n;
  288. }
  289. if (i >= n)
  290. {
  291. u = (i > n);
  292. break;
  293. }
  294. i++;
  295. }
  296. if (unaligned)
  297. {
  298. *unaligned = u;
  299. }
  300. return i - woffset;
  301. }
  302. S32 wchar_utf8_length(const llwchar wc)
  303. {
  304. if (wc < 0x80)
  305. {
  306. // This case will also catch negative values which are
  307. // technically invalid.
  308. return 1;
  309. }
  310. else if (wc < 0x800)
  311. {
  312. return 2;
  313. }
  314. else if (wc < 0x10000)
  315. {
  316. return 3;
  317. }
  318. else if (wc < 0x200000)
  319. {
  320. return 4;
  321. }
  322. else if (wc < 0x4000000)
  323. {
  324. return 5;
  325. }
  326. else
  327. {
  328. return 6;
  329. }
  330. }
  331. S32 wstring_utf8_length(const LLWString& wstr)
  332. {
  333. S32 len = 0;
  334. for (S32 i = 0; i < (S32)wstr.length(); i++)
  335. {
  336. len += wchar_utf8_length(wstr[i]);
  337. }
  338. return len;
  339. }
  340. LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
  341. {
  342. LLWString wout;
  343. S32 i = 0;
  344. while (i < len)
  345. {
  346. llwchar unichar;
  347. U8 cur_char = utf8str[i];
  348. if (cur_char < 0x80)
  349. {
  350. // Ascii character, just add it
  351. unichar = cur_char;
  352. }
  353. else
  354. {
  355. S32 cont_bytes = 0;
  356. if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32
  357. {
  358. unichar = (0x1F&cur_char);
  359. cont_bytes = 1;
  360. }
  361. else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32
  362. {
  363. unichar = (0x0F&cur_char);
  364. cont_bytes = 2;
  365. }
  366. else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32
  367. {
  368. unichar = (0x07&cur_char);
  369. cont_bytes = 3;
  370. }
  371. else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32
  372. {
  373. unichar = (0x03&cur_char);
  374. cont_bytes = 4;
  375. }
  376. else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32
  377. {
  378. unichar = (0x01&cur_char);
  379. cont_bytes = 5;
  380. }
  381. else
  382. {
  383. wout += LL_UNKNOWN_CHAR;
  384. ++i;
  385. continue;
  386. }
  387. // Check that this character doesn't go past the end of the string
  388. S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
  389. do
  390. {
  391. ++i;
  392. cur_char = utf8str[i];
  393. if ( (cur_char >> 6) == 0x2 )
  394. {
  395. unichar <<= 6;
  396. unichar += (0x3F&cur_char);
  397. }
  398. else
  399. {
  400. // Malformed sequence - roll back to look at this as a new char
  401. unichar = LL_UNKNOWN_CHAR;
  402. --i;
  403. break;
  404. }
  405. } while(i < end);
  406. // Handle overlong characters and NULL characters
  407. if ( ((cont_bytes == 1) && (unichar < 0x80))
  408. || ((cont_bytes == 2) && (unichar < 0x800))
  409. || ((cont_bytes == 3) && (unichar < 0x10000))
  410. || ((cont_bytes == 4) && (unichar < 0x200000))
  411. || ((cont_bytes == 5) && (unichar < 0x4000000)) )
  412. {
  413. unichar = LL_UNKNOWN_CHAR;
  414. }
  415. }
  416. wout += unichar;
  417. ++i;
  418. }
  419. return wout;
  420. }
  421. LLWString utf8str_to_wstring(const std::string& utf8str)
  422. {
  423. const S32 len = (S32)utf8str.length();
  424. return utf8str_to_wstring(utf8str, len);
  425. }
  426. std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
  427. {
  428. std::string out;
  429. S32 i = 0;
  430. while (i < len)
  431. {
  432. char tchars[8]; /* Flawfinder: ignore */
  433. S32 n = wchar_to_utf8chars(utf32str[i], tchars);
  434. tchars[n] = 0;
  435. out += tchars;
  436. i++;
  437. }
  438. return out;
  439. }
  440. std::string wstring_to_utf8str(const LLWString& utf32str)
  441. {
  442. const S32 len = (S32)utf32str.length();
  443. return wstring_to_utf8str(utf32str, len);
  444. }
  445. std::string utf16str_to_utf8str(const llutf16string& utf16str)
  446. {
  447. return wstring_to_utf8str(utf16str_to_wstring(utf16str));
  448. }
  449. std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
  450. {
  451. return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
  452. }
  453. std::string utf8str_trim(const std::string& utf8str)
  454. {
  455. LLWString wstr = utf8str_to_wstring(utf8str);
  456. LLWStringUtil::trim(wstr);
  457. return wstring_to_utf8str(wstr);
  458. }
  459. std::string utf8str_tolower(const std::string& utf8str)
  460. {
  461. LLWString out_str = utf8str_to_wstring(utf8str);
  462. LLWStringUtil::toLower(out_str);
  463. return wstring_to_utf8str(out_str);
  464. }
  465. S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
  466. {
  467. LLWString wlhs = utf8str_to_wstring(lhs);
  468. LLWString wrhs = utf8str_to_wstring(rhs);
  469. return LLWStringUtil::compareInsensitive(wlhs, wrhs);
  470. }
  471. std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
  472. {
  473. if (0 == max_len)
  474. {
  475. return std::string();
  476. }
  477. if ((S32)utf8str.length() <= max_len)
  478. {
  479. return utf8str;
  480. }
  481. else
  482. {
  483. S32 cur_char = max_len;
  484. // If we're ASCII, we don't need to do anything
  485. if ((U8)utf8str[cur_char] > 0x7f)
  486. {
  487. // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back
  488. // to the first character
  489. while (0x80 == (0xc0 & utf8str[cur_char]))
  490. {
  491. cur_char--;
  492. // Keep moving forward until we hit the first char;
  493. if (cur_char == 0)
  494. {
  495. // Make sure we don't trash memory if we've got a bogus string.
  496. break;
  497. }
  498. }
  499. }
  500. // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
  501. return utf8str.substr(0, cur_char);
  502. }
  503. }
  504. std::string utf8str_substChar(
  505. const std::string& utf8str,
  506. const llwchar target_char,
  507. const llwchar replace_char)
  508. {
  509. LLWString wstr = utf8str_to_wstring(utf8str);
  510. LLWStringUtil::replaceChar(wstr, target_char, replace_char);
  511. //wstr = wstring_substChar(wstr, target_char, replace_char);
  512. return wstring_to_utf8str(wstr);
  513. }
  514. std::string utf8str_makeASCII(const std::string& utf8str)
  515. {
  516. LLWString wstr = utf8str_to_wstring(utf8str);
  517. LLWStringUtil::_makeASCII(wstr);
  518. return wstring_to_utf8str(wstr);
  519. }
  520. std::string mbcsstring_makeASCII(const std::string& wstr)
  521. {
  522. // Replace non-ASCII chars with replace_char
  523. std::string out_str = wstr;
  524. for (S32 i = 0; i < (S32)out_str.length(); i++)
  525. {
  526. if ((U8)out_str[i] > 0x7f)
  527. {
  528. out_str[i] = LL_UNKNOWN_CHAR;
  529. }
  530. }
  531. return out_str;
  532. }
  533. std::string utf8str_removeCRLF(const std::string& utf8str)
  534. {
  535. if (0 == utf8str.length())
  536. {
  537. return std::string();
  538. }
  539. const char CR = 13;
  540. std::string out;
  541. out.reserve(utf8str.length());
  542. const S32 len = (S32)utf8str.length();
  543. for( S32 i = 0; i < len; i++ )
  544. {
  545. if( utf8str[i] != CR )
  546. {
  547. out.push_back(utf8str[i]);
  548. }
  549. }
  550. return out;
  551. }
  552. #if LL_WINDOWS
  553. // documentation moved to header. Phoenix 2007-11-27
  554. namespace snprintf_hack
  555. {
  556. int snprintf(char *str, size_t size, const char *format, ...)
  557. {
  558. va_list args;
  559. va_start(args, format);
  560. int num_written = _vsnprintf(str, size, format, args); /* Flawfinder: ignore */
  561. va_end(args);
  562. str[size-1] = '\0'; // always null terminate
  563. return num_written;
  564. }
  565. }
  566. std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
  567. {
  568. std::string out;
  569. if(in)
  570. {
  571. int len_in = wcslen(in);
  572. int len_out = WideCharToMultiByte(
  573. code_page,
  574. 0,
  575. in,
  576. len_in,
  577. NULL,
  578. 0,
  579. 0,
  580. 0);
  581. // We will need two more bytes for the double NULL ending
  582. // created in WideCharToMultiByte().
  583. char* pout = new char [len_out + 2];
  584. memset(pout, 0, len_out + 2);
  585. if(pout)
  586. {
  587. WideCharToMultiByte(
  588. code_page,
  589. 0,
  590. in,
  591. len_in,
  592. pout,
  593. len_out,
  594. 0,
  595. 0);
  596. out.assign(pout);
  597. delete[] pout;
  598. }
  599. }
  600. return out;
  601. }
  602. wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
  603. {
  604. // From review:
  605. // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
  606. // plus one for a null terminator, and be guaranteed to not overflow.
  607. // Normally, I'd call that sort of thing premature optimization,
  608. // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
  609. // int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);
  610. // reserve place to NULL terminator
  611. int output_str_len = in.length();
  612. wchar_t* w_out = new wchar_t[output_str_len + 1];
  613. memset(w_out, 0, output_str_len + 1);
  614. int real_output_str_len = MultiByteToWideChar (code_page, 0, in.c_str(), in.length(), w_out, output_str_len);
  615. //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
  616. w_out[real_output_str_len] = 0;
  617. return w_out;
  618. }
  619. std::string ll_convert_string_to_utf8_string(const std::string& in)
  620. {
  621. wchar_t* w_mesg = ll_convert_string_to_wide(in, CP_ACP);
  622. std::string out_utf8(ll_convert_wide_to_string(w_mesg, CP_UTF8));
  623. delete[] w_mesg;
  624. return out_utf8;
  625. }
  626. #endif // LL_WINDOWS
  627. long LLStringOps::sPacificTimeOffset = 0;
  628. long LLStringOps::sLocalTimeOffset = 0;
  629. bool LLStringOps::sPacificDaylightTime = 0;
  630. std::map<std::string, std::string> LLStringOps::datetimeToCodes;
  631. std::vector<std::string> LLStringOps::sWeekDayList;
  632. std::vector<std::string> LLStringOps::sWeekDayShortList;
  633. std::vector<std::string> LLStringOps::sMonthList;
  634. std::vector<std::string> LLStringOps::sMonthShortList;
  635. std::string LLStringOps::sDayFormat;
  636. std::string LLStringOps::sAM;
  637. std::string LLStringOps::sPM;
  638. S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
  639. {
  640. #if LL_WINDOWS
  641. // in Windows, wide string functions operator on 16-bit strings,
  642. // not the proper 32 bit wide string
  643. return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
  644. #else
  645. return wcscoll(a, b);
  646. #endif
  647. }
  648. void LLStringOps::setupDatetimeInfo (bool daylight)
  649. {
  650. time_t nowT, localT, gmtT;
  651. struct tm * tmpT;
  652. nowT = time (NULL);
  653. tmpT = gmtime (&nowT);
  654. gmtT = mktime (tmpT);
  655. tmpT = localtime (&nowT);
  656. localT = mktime (tmpT);
  657. sLocalTimeOffset = (long) (gmtT - localT);
  658. if (tmpT->tm_isdst)
  659. {
  660. sLocalTimeOffset -= 60 * 60; // 1 hour
  661. }
  662. sPacificDaylightTime = daylight;
  663. sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;
  664. datetimeToCodes["wkday"] = "%a"; // Thu
  665. datetimeToCodes["weekday"] = "%A"; // Thursday
  666. datetimeToCodes["year4"] = "%Y"; // 2009
  667. datetimeToCodes["year"] = "%Y"; // 2009
  668. datetimeToCodes["year2"] = "%y"; // 09
  669. datetimeToCodes["mth"] = "%b"; // Aug
  670. datetimeToCodes["month"] = "%B"; // August
  671. datetimeToCodes["mthnum"] = "%m"; // 08
  672. datetimeToCodes["day"] = "%d"; // 31
  673. datetimeToCodes["sday"] = "%-d"; // 9
  674. datetimeToCodes["hour24"] = "%H"; // 14
  675. datetimeToCodes["hour"] = "%H"; // 14
  676. datetimeToCodes["hour12"] = "%I"; // 02
  677. datetimeToCodes["min"] = "%M"; // 59
  678. datetimeToCodes["ampm"] = "%p"; // AM
  679. datetimeToCodes["second"] = "%S"; // 59
  680. datetimeToCodes["timezone"] = "%Z"; // PST
  681. }
  682. void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output)
  683. {
  684. output.clear();
  685. size_t length = data.size();
  686. // tokenize it and put it in the array
  687. std::string cur_word;
  688. for(size_t i = 0; i < length; ++i)
  689. {
  690. if(data[i] == ':')
  691. {
  692. output.push_back(cur_word);
  693. cur_word.clear();
  694. }
  695. else
  696. {
  697. cur_word.append(1, data[i]);
  698. }
  699. }
  700. output.push_back(cur_word);
  701. }
  702. void LLStringOps::setupWeekDaysNames(const std::string& data)
  703. {
  704. tokenizeStringToArray(data,sWeekDayList);
  705. }
  706. void LLStringOps::setupWeekDaysShortNames(const std::string& data)
  707. {
  708. tokenizeStringToArray(data,sWeekDayShortList);
  709. }
  710. void LLStringOps::setupMonthNames(const std::string& data)
  711. {
  712. tokenizeStringToArray(data,sMonthList);
  713. }
  714. void LLStringOps::setupMonthShortNames(const std::string& data)
  715. {
  716. tokenizeStringToArray(data,sMonthShortList);
  717. }
  718. void LLStringOps::setupDayFormat(const std::string& data)
  719. {
  720. sDayFormat = data;
  721. }
  722. std::string LLStringOps::getDatetimeCode (std::string key)
  723. {
  724. std::map<std::string, std::string>::iterator iter;
  725. iter = datetimeToCodes.find (key);
  726. if (iter != datetimeToCodes.end())
  727. {
  728. return iter->second;
  729. }
  730. else
  731. {
  732. return std::string("");
  733. }
  734. }
  735. namespace LLStringFn
  736. {
  737. // NOTE - this restricts output to ascii
  738. void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
  739. {
  740. const char MIN = 0x20;
  741. std::basic_string<char>::size_type len = string.size();
  742. for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
  743. {
  744. if(string[ii] < MIN)
  745. {
  746. string[ii] = replacement;
  747. }
  748. }
  749. }
  750. // NOTE - this restricts output to ascii
  751. void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
  752. char replacement)
  753. {
  754. const char MIN = 0x20;
  755. const char PIPE = 0x7c;
  756. std::basic_string<char>::size_type len = str.size();
  757. for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
  758. {
  759. if( (str[ii] < MIN) || (str[ii] == PIPE) )
  760. {
  761. str[ii] = replacement;
  762. }
  763. }
  764. }
  765. // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
  766. // allowable code points for XML. Specifically, they are:
  767. // 0x09, 0x0a, 0x0d, and 0x20 on up. JC
  768. std::string strip_invalid_xml(const std::string& instr)
  769. {
  770. std::string output;
  771. output.reserve( instr.size() );
  772. std::string::const_iterator it = instr.begin();
  773. while (it != instr.end())
  774. {
  775. // Must compare as unsigned for >=
  776. // Test most likely match first
  777. const unsigned char c = (unsigned char)*it;
  778. if ( c >= (unsigned char)0x20 // SPACE
  779. || c == (unsigned char)0x09 // TAB
  780. || c == (unsigned char)0x0a // LINE_FEED
  781. || c == (unsigned char)0x0d ) // CARRIAGE_RETURN
  782. {
  783. output.push_back(c);
  784. }
  785. ++it;
  786. }
  787. return output;
  788. }
  789. /**
  790. * @brief Replace all control characters (c < 0x20) with replacement in
  791. * string.
  792. */
  793. void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
  794. {
  795. const unsigned char MIN = 0x20;
  796. std::basic_string<char>::size_type len = string.size();
  797. for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
  798. {
  799. const unsigned char c = (unsigned char) string[ii];
  800. if(c < MIN)
  801. {
  802. string[ii] = replacement;
  803. }
  804. }
  805. }
  806. }
  807. ////////////////////////////////////////////////////////////
  808. // Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime.
  809. template<>
  810. S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
  811. //static
  812. template<>
  813. void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
  814. {
  815. std::string currToken;
  816. std::string::size_type begIdx, endIdx;
  817. begIdx = instr.find_first_not_of (delims);
  818. while (begIdx != std::string::npos)
  819. {
  820. endIdx = instr.find_first_of (delims, begIdx);
  821. if (endIdx == std::string::npos)
  822. {
  823. endIdx = instr.length();
  824. }
  825. currToken = instr.substr(begIdx, endIdx - begIdx);
  826. LLStringUtil::trim (currToken);
  827. tokens.push_back(currToken);
  828. begIdx = instr.find_first_not_of (delims, endIdx);
  829. }
  830. }
  831. template<>
  832. LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens)
  833. {
  834. const std::string delims (",");
  835. // Find the first [
  836. size_type pos1 = instr.find('[', start);
  837. if (pos1 == std::string::npos)
  838. return std::string::npos;
  839. //Find the first ] after the initial [
  840. size_type pos2 = instr.find(']', pos1);
  841. if (pos2 == std::string::npos)
  842. return std::string::npos;
  843. // Find the last [ before ] in case of nested [[]]
  844. pos1 = instr.find_last_of('[', pos2-1);
  845. if (pos1 == std::string::npos || pos1 < start)
  846. return std::string::npos;
  847. getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims);
  848. start = pos2+1;
  849. return pos1;
  850. }
  851. // static
  852. template<>
  853. bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions)
  854. {
  855. // see if we have a replacement for the bracketed string (without the brackets)
  856. // test first using has() because if we just look up with operator[] we get back an
  857. // empty string even if the value is missing. We want to distinguish between
  858. // missing replacements and deliberately empty replacement strings.
  859. format_map_t::const_iterator iter = substitutions.find(token);
  860. if (iter != substitutions.end())
  861. {
  862. replacement = iter->second;
  863. return true;
  864. }
  865. // if not, see if there's one WITH brackets
  866. iter = substitutions.find(std::string("[" + token + "]"));
  867. if (iter != substitutions.end())
  868. {
  869. replacement = iter->second;
  870. return true;
  871. }
  872. return false;
  873. }
  874. // static
  875. template<>
  876. bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions)
  877. {
  878. // see if we have a replacement for the bracketed string (without the brackets)
  879. // test first using has() because if we just look up with operator[] we get back an
  880. // empty string even if the value is missing. We want to distinguish between
  881. // missing replacements and deliberately empty replacement strings.
  882. if (substitutions.has(token))
  883. {
  884. replacement = substitutions[token].asString();
  885. return true;
  886. }
  887. // if not, see if there's one WITH brackets
  888. else if (substitutions.has(std::string("[" + token + "]")))
  889. {
  890. replacement = substitutions[std::string("[" + token + "]")].asString();
  891. return true;
  892. }
  893. return false;
  894. }
  895. //static
  896. template<>
  897. void LLStringUtil::setLocale(std::string inLocale)
  898. {
  899. sLocale = inLocale;
  900. };
  901. //static
  902. template<>
  903. std::string LLStringUtil::getLocale(void)
  904. {
  905. return sLocale;
  906. };
  907. // static
  908. template<>
  909. void LLStringUtil::formatNumber(std::string& numStr, std::string decimals)
  910. {
  911. std::stringstream strStream;
  912. S32 intDecimals = 0;
  913. convertToS32 (decimals, intDecimals);
  914. if (!sLocale.empty())
  915. {
  916. // std::locale() throws if the locale is unknown! (EXT-7926)
  917. try
  918. {
  919. strStream.imbue(std::locale(sLocale.c_str()));
  920. } catch (const std::exception &)
  921. {
  922. LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL;
  923. }
  924. }
  925. if (!intDecimals)
  926. {
  927. S32 intStr;
  928. if (convertToS32(numStr, intStr))
  929. {
  930. strStream << intStr;
  931. numStr = strStream.str();
  932. }
  933. }
  934. else
  935. {
  936. F32 floatStr;
  937. if (convertToF32(numStr, floatStr))
  938. {
  939. strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr;
  940. numStr = strStream.str();
  941. }
  942. }
  943. }
  944. // static
  945. template<>
  946. bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
  947. std::string param, S32 secFromEpoch)
  948. {
  949. if (param == "local") // local
  950. {
  951. secFromEpoch -= LLStringOps::getLocalTimeOffset();
  952. }
  953. else if (param != "utc") // slt
  954. {
  955. secFromEpoch -= LLStringOps::getPacificTimeOffset();
  956. }
  957. // if never fell into those two ifs above, param must be utc
  958. if (secFromEpoch < 0) secFromEpoch = 0;
  959. LLDate datetime((F64)secFromEpoch);
  960. std::string code = LLStringOps::getDatetimeCode (token);
  961. // special case to handle timezone
  962. if (code == "%Z") {
  963. if (param == "utc")
  964. {
  965. replacement = "GMT";
  966. }
  967. else if (param == "local")
  968. {
  969. replacement = ""; // user knows their own timezone
  970. }
  971. else
  972. {
  973. // "slt" = Second Life Time, which is deprecated.
  974. // If not utc or user local time, fallback to Pacific time
  975. replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST";
  976. }
  977. return true;
  978. }
  979. //EXT-7013
  980. //few codes are not suppotred by strtime function (example - weekdays for Japanise)
  981. //so use predefined ones
  982. //if sWeekDayList is not empty than current locale doesn't support
  983. //weekday name.
  984. time_t loc_seconds = (time_t) secFromEpoch;
  985. if(LLStringOps::sWeekDayList.size() == 7 && code == "%A")
  986. {
  987. struct tm * gmt = gmtime (&loc_seconds);
  988. replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
  989. }
  990. else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
  991. {
  992. struct tm * gmt = gmtime (&loc_seconds);
  993. replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
  994. }
  995. else if(LLStringOps::sMonthList.size() == 12 && code == "%B")
  996. {
  997. struct tm * gmt = gmtime (&loc_seconds);
  998. replacement = LLStringOps::sMonthList[gmt->tm_mon];
  999. }
  1000. else if( !LLStringOps::sDayFormat.empty() && code == "%d" )
  1001. {
  1002. struct tm * gmt = gmtime (&loc_seconds);
  1003. LLStringUtil::format_map_t args;
  1004. args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
  1005. replacement = LLStringOps::sDayFormat;
  1006. LLStringUtil::format(replacement, args);
  1007. }
  1008. else if (code == "%-d")
  1009. {
  1010. struct tm * gmt = gmtime (&loc_seconds);
  1011. replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero
  1012. }
  1013. else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" )
  1014. {
  1015. struct tm * gmt = gmtime (&loc_seconds);
  1016. if(gmt->tm_hour<12)
  1017. {
  1018. replacement = LLStringOps::sAM;
  1019. }
  1020. else
  1021. {
  1022. replacement = LLStringOps::sPM;
  1023. }
  1024. }
  1025. else
  1026. {
  1027. replacement = datetime.toHTTPDateString(code);
  1028. }
  1029. // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format
  1030. // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738).
  1031. // We could have used '%l' format instead, but it's not supported by Windows.
  1032. if(code == "%I" && token == "hour12" && replacement.at(0) == '0')
  1033. {
  1034. replacement = replacement.at(1);
  1035. }
  1036. return !code.empty();
  1037. }
  1038. // LLStringUtil::format recogizes the following patterns.
  1039. // All substitutions *must* be encased in []'s in the input string.
  1040. // The []'s are optional in the substitution map.
  1041. // [FOO_123]
  1042. // [FOO,number,precision]
  1043. // [FOO,datetime,format]
  1044. // static
  1045. template<>
  1046. S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
  1047. {
  1048. LLFastTimer ft(FT_STRING_FORMAT);
  1049. S32 res = 0;
  1050. std::string output;
  1051. std::vector<std::string> tokens;
  1052. std::string::size_type start = 0;
  1053. std::string::size_type prev_start = 0;
  1054. std::string::size_type key_start = 0;
  1055. while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
  1056. {
  1057. output += std::string(s, prev_start, key_start-prev_start);
  1058. prev_start = start;
  1059. bool found_replacement = false;
  1060. std::string replacement;
  1061. if (tokens.size() == 0)
  1062. {
  1063. found_replacement = false;
  1064. }
  1065. else if (tokens.size() == 1)
  1066. {
  1067. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1068. }
  1069. else if (tokens[1] == "number")
  1070. {
  1071. std::string param = "0";
  1072. if (tokens.size() > 2) param = tokens[2];
  1073. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1074. if (found_replacement) formatNumber (replacement, param);
  1075. }
  1076. else if (tokens[1] == "datetime")
  1077. {
  1078. std::string param;
  1079. if (tokens.size() > 2) param = tokens[2];
  1080. format_map_t::const_iterator iter = substitutions.find("datetime");
  1081. if (iter != substitutions.end())
  1082. {
  1083. S32 secFromEpoch = 0;
  1084. BOOL r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
  1085. if (r)
  1086. {
  1087. found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
  1088. }
  1089. }
  1090. }
  1091. if (found_replacement)
  1092. {
  1093. output += replacement;
  1094. res++;
  1095. }
  1096. else
  1097. {
  1098. // we had no replacement, use the string as is
  1099. // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
  1100. output += std::string(s, key_start, start-key_start);
  1101. }
  1102. tokens.clear();
  1103. }
  1104. // send the remainder of the string (with no further matches for bracketed names)
  1105. output += std::string(s, start);
  1106. s = output;
  1107. return res;
  1108. }
  1109. //static
  1110. template<>
  1111. S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
  1112. {
  1113. LLFastTimer ft(FT_STRING_FORMAT);
  1114. S32 res = 0;
  1115. if (!substitutions.isMap())
  1116. {
  1117. return res;
  1118. }
  1119. std::string output;
  1120. std::vector<std::string> tokens;
  1121. std::string::size_type start = 0;
  1122. std::string::size_type prev_start = 0;
  1123. std::string::size_type key_start = 0;
  1124. while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
  1125. {
  1126. output += std::string(s, prev_start, key_start-prev_start);
  1127. prev_start = start;
  1128. bool found_replacement = false;
  1129. std::string replacement;
  1130. if (tokens.size() == 0)
  1131. {
  1132. found_replacement = false;
  1133. }
  1134. else if (tokens.size() == 1)
  1135. {
  1136. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1137. }
  1138. else if (tokens[1] == "number")
  1139. {
  1140. std::string param = "0";
  1141. if (tokens.size() > 2) param = tokens[2];
  1142. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1143. if (found_replacement) formatNumber (replacement, param);
  1144. }
  1145. else if (tokens[1] == "datetime")
  1146. {
  1147. std::string param;
  1148. if (tokens.size() > 2) param = tokens[2];
  1149. S32 secFromEpoch = (S32) substitutions["datetime"].asInteger();
  1150. found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch);
  1151. }
  1152. if (found_replacement)
  1153. {
  1154. output += replacement;
  1155. res++;
  1156. }
  1157. else
  1158. {
  1159. // we had no replacement, use the string as is
  1160. // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
  1161. output += std::string(s, key_start, start-key_start);
  1162. }
  1163. tokens.clear();
  1164. }
  1165. // send the remainder of the string (with no further matches for bracketed names)
  1166. output += std::string(s, start);
  1167. s = output;
  1168. return res;
  1169. }
  1170. ////////////////////////////////////////////////////////////
  1171. // Testing
  1172. #ifdef _DEBUG
  1173. template<class T>
  1174. void LLStringUtilBase<T>::testHarness()
  1175. {
  1176. std::string s1;
  1177. llassert( s1.c_str() == NULL );
  1178. llassert( s1.size() == 0 );
  1179. llassert( s1.empty() );
  1180. std::string s2( "hello");
  1181. llassert( !strcmp( s2.c_str(), "hello" ) );
  1182. llassert( s2.size() == 5 );
  1183. llassert( !s2.empty() );
  1184. std::string s3( s2 );
  1185. llassert( "hello" == s2 );
  1186. llassert( s2 == "hello" );
  1187. llassert( s2 > "gello" );
  1188. llassert( "gello" < s2 );
  1189. llassert( "gello" != s2 );
  1190. llassert( s2 != "gello" );
  1191. std::string s4 = s2;
  1192. llassert( !s4.empty() );
  1193. s4.empty();
  1194. llassert( s4.empty() );
  1195. std::string s5("");
  1196. llassert( s5.empty() );
  1197. llassert( isValidIndex(s5, 0) );
  1198. llassert( !isValidIndex(s5, 1) );
  1199. s3 = s2;
  1200. s4 = "hello again";
  1201. s4 += "!";
  1202. s4 += s4;
  1203. llassert( s4 == "hello again!hello again!" );
  1204. std::string s6 = s2 + " " + s2;
  1205. std::string s7 = s6;
  1206. llassert( s6 == s7 );
  1207. llassert( !( s6 != s7) );
  1208. llassert( !(s6 < s7) );
  1209. llassert( !(s6 > s7) );
  1210. llassert( !(s6 == "hi"));
  1211. llassert( s6 == "hello hello");
  1212. llassert( s6 < "hi");
  1213. llassert( s6[1] == 'e' );
  1214. s6[1] = 'f';
  1215. llassert( s6[1] == 'f' );
  1216. s2.erase( 4, 1 );
  1217. llassert( s2 == "hell");
  1218. s2.insert( 0, 'y' );
  1219. llassert( s2 == "yhell");
  1220. s2.erase( 1, 3 );
  1221. llassert( s2 == "yl");
  1222. s2.insert( 1, "awn, don't yel");
  1223. llassert( s2 == "yawn, don't yell");
  1224. std::string s8 = s2.substr( 6, 5 );
  1225. llassert( s8 == "don't" );
  1226. std::string s9 = " \t\ntest \t\t\n ";
  1227. trim(s9);
  1228. llassert( s9 == "test" );
  1229. s8 = "abc123&*(ABC";
  1230. s9 = s8;
  1231. toUpper(s9);
  1232. llassert( s9 == "ABC123&*(ABC" );
  1233. s9 = s8;
  1234. toLower(s9);
  1235. llassert( s9 == "abc123&*(abc" );
  1236. std::string s10( 10, 'x' );
  1237. llassert( s10 == "xxxxxxxxxx" );
  1238. std::string s11( "monkey in the middle", 7, 2 );
  1239. llassert( s11 == "in" );
  1240. std::string s12; //empty
  1241. s12 += "foo";
  1242. llassert( s12 == "foo" );
  1243. std::string s13; //empty
  1244. s13 += 'f';
  1245. llassert( s13 == "f" );
  1246. }
  1247. #endif // _DEBUG