PageRenderTime 87ms CodeModel.GetById 24ms RepoModel.GetById 2ms app.codeStats 1ms

/indra/llcommon/llstring.cpp

https://bitbucket.org/marinekelley/rlv/
C++ | 1441 lines | 1134 code | 175 blank | 132 comment | 275 complexity | f5f56aadbd19c89d5e707913c44aea67 MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause
  1. /**
  2. * @file llstring.cpp
  3. * @brief String utility functions and the std::string class.
  4. *
  5. * $LicenseInfo:firstyear=2001&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #include "linden_common.h"
  27. #include "llstring.h"
  28. #include "llerror.h"
  29. #include "llfasttimer.h"
  30. #include "llsd.h"
  31. #if LL_WINDOWS
  32. #include "llwin32headerslean.h"
  33. #include <winnls.h> // for WideCharToMultiByte
  34. #endif
  35. LLTrace::BlockTimerStatHandle FT_STRING_FORMAT("String Format");
  36. std::string ll_safe_string(const char* in)
  37. {
  38. if(in) return std::string(in);
  39. return std::string();
  40. }
  41. std::string ll_safe_string(const char* in, S32 maxlen)
  42. {
  43. if(in && maxlen > 0 ) return std::string(in, maxlen);
  44. return std::string();
  45. }
  46. bool is_char_hex(char hex)
  47. {
  48. if((hex >= '0') && (hex <= '9'))
  49. {
  50. return true;
  51. }
  52. else if((hex >= 'a') && (hex <='f'))
  53. {
  54. return true;
  55. }
  56. else if((hex >= 'A') && (hex <='F'))
  57. {
  58. return true;
  59. }
  60. return false; // uh - oh, not hex any more...
  61. }
  62. U8 hex_as_nybble(char hex)
  63. {
  64. if((hex >= '0') && (hex <= '9'))
  65. {
  66. return (U8)(hex - '0');
  67. }
  68. else if((hex >= 'a') && (hex <='f'))
  69. {
  70. return (U8)(10 + hex - 'a');
  71. }
  72. else if((hex >= 'A') && (hex <='F'))
  73. {
  74. return (U8)(10 + hex - 'A');
  75. }
  76. return 0; // uh - oh, not hex any more...
  77. }
  78. bool iswindividual(llwchar elem)
  79. {
  80. U32 cur_char = (U32)elem;
  81. bool result = false;
  82. if (0x2E80<= cur_char && cur_char <= 0x9FFF)
  83. {
  84. result = true;
  85. }
  86. else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
  87. {
  88. result = true;
  89. }
  90. else if (0xF900<= cur_char && cur_char <= 0xFA60 )
  91. {
  92. result = true;
  93. }
  94. return result;
  95. }
  96. bool _read_file_into_string(std::string& str, const std::string& filename)
  97. {
  98. llifstream ifs(filename.c_str(), llifstream::binary);
  99. if (!ifs.is_open())
  100. {
  101. LL_INFOS() << "Unable to open file " << filename << LL_ENDL;
  102. return false;
  103. }
  104. std::ostringstream oss;
  105. oss << ifs.rdbuf();
  106. str = oss.str();
  107. ifs.close();
  108. return true;
  109. }
  110. // See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
  111. // for the Unicode implementation - this doesn't match because it was written before finding
  112. // it.
  113. std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
  114. {
  115. std::string utf8_str = wstring_to_utf8str(wstr);
  116. s << utf8_str;
  117. return s;
  118. }
  119. std::string rawstr_to_utf8(const std::string& raw)
  120. {
  121. LLWString wstr(utf8str_to_wstring(raw));
  122. return wstring_to_utf8str(wstr);
  123. }
  124. S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
  125. {
  126. U32 cur_char = (U32)in_char;
  127. char* base = outchars;
  128. if (cur_char < 0x80)
  129. {
  130. *outchars++ = (U8)cur_char;
  131. }
  132. else if (cur_char < 0x800)
  133. {
  134. *outchars++ = 0xC0 | (cur_char >> 6);
  135. *outchars++ = 0x80 | (cur_char & 0x3F);
  136. }
  137. else if (cur_char < 0x10000)
  138. {
  139. *outchars++ = 0xE0 | (cur_char >> 12);
  140. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  141. *outchars++ = 0x80 | (cur_char & 0x3F);
  142. }
  143. else if (cur_char < 0x200000)
  144. {
  145. *outchars++ = 0xF0 | (cur_char >> 18);
  146. *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
  147. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  148. *outchars++ = 0x80 | (cur_char & 0x3F);
  149. }
  150. else if (cur_char < 0x4000000)
  151. {
  152. *outchars++ = 0xF8 | (cur_char >> 24);
  153. *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
  154. *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
  155. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  156. *outchars++ = 0x80 | (cur_char & 0x3F);
  157. }
  158. else if (cur_char < 0x80000000)
  159. {
  160. *outchars++ = 0xFC | (cur_char >> 30);
  161. *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
  162. *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
  163. *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
  164. *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
  165. *outchars++ = 0x80 | (cur_char & 0x3F);
  166. }
  167. else
  168. {
  169. LL_WARNS() << "Invalid Unicode character " << cur_char << "!" << LL_ENDL;
  170. *outchars++ = LL_UNKNOWN_CHAR;
  171. }
  172. return outchars - base;
  173. }
  174. S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
  175. {
  176. const U16* base = inchars;
  177. U16 cur_char = *inchars++;
  178. llwchar char32 = cur_char;
  179. if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
  180. {
  181. // Surrogates
  182. char32 = ((llwchar)(cur_char - 0xD800)) << 10;
  183. cur_char = *inchars++;
  184. char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
  185. }
  186. else
  187. {
  188. char32 = (llwchar)cur_char;
  189. }
  190. *outchar = char32;
  191. return inchars - base;
  192. }
  193. llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
  194. {
  195. llutf16string out;
  196. S32 i = 0;
  197. while (i < len)
  198. {
  199. U32 cur_char = utf32str[i];
  200. if (cur_char > 0xFFFF)
  201. {
  202. out += (0xD7C0 + (cur_char >> 10));
  203. out += (0xDC00 | (cur_char & 0x3FF));
  204. }
  205. else
  206. {
  207. out += cur_char;
  208. }
  209. i++;
  210. }
  211. return out;
  212. }
  213. llutf16string wstring_to_utf16str(const LLWString &utf32str)
  214. {
  215. const S32 len = (S32)utf32str.length();
  216. return wstring_to_utf16str(utf32str, len);
  217. }
  218. llutf16string utf8str_to_utf16str ( const std::string& utf8str )
  219. {
  220. LLWString wstr = utf8str_to_wstring ( utf8str );
  221. return wstring_to_utf16str ( wstr );
  222. }
  223. LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
  224. {
  225. LLWString wout;
  226. if((len <= 0) || utf16str.empty()) return wout;
  227. S32 i = 0;
  228. // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
  229. const U16* chars16 = &(*(utf16str.begin()));
  230. while (i < len)
  231. {
  232. llwchar cur_char;
  233. i += utf16chars_to_wchar(chars16+i, &cur_char);
  234. wout += cur_char;
  235. }
  236. return wout;
  237. }
  238. LLWString utf16str_to_wstring(const llutf16string &utf16str)
  239. {
  240. const S32 len = (S32)utf16str.length();
  241. return utf16str_to_wstring(utf16str, len);
  242. }
  243. // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
  244. S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
  245. {
  246. S32 surrogate_pairs = 0;
  247. // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
  248. const U16 *const utf16_chars = &(*(utf16str.begin()));
  249. S32 i = 0;
  250. while (i < utf16_len)
  251. {
  252. const U16 c = utf16_chars[i++];
  253. if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16
  254. { // Have first byte of a surrogate pair
  255. if (i >= utf16_len)
  256. {
  257. break;
  258. }
  259. const U16 d = utf16_chars[i];
  260. if (d >= 0xDC00 && d <= 0xDFFF)
  261. { // Have valid second byte of a surrogate pair
  262. surrogate_pairs++;
  263. i++;
  264. }
  265. }
  266. }
  267. return utf16_len - surrogate_pairs;
  268. }
  269. // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
  270. S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
  271. {
  272. const S32 end = llmin((S32)wstr.length(), woffset + wlen);
  273. if (end < woffset)
  274. {
  275. return 0;
  276. }
  277. else
  278. {
  279. S32 length = end - woffset;
  280. for (S32 i = woffset; i < end; i++)
  281. {
  282. if (wstr[i] >= 0x10000)
  283. {
  284. length++;
  285. }
  286. }
  287. return length;
  288. }
  289. }
  290. // Given a wstring and an offset in it, returns the length as wstring (i.e.,
  291. // number of llwchars) of the longest substring that starts at the offset
  292. // and whose equivalent utf-16 string does not exceeds the given utf16_length.
  293. S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
  294. {
  295. const S32 end = wstr.length();
  296. BOOL u = FALSE;
  297. S32 n = woffset + utf16_length;
  298. S32 i = woffset;
  299. while (i < end)
  300. {
  301. if (wstr[i] >= 0x10000)
  302. {
  303. --n;
  304. }
  305. if (i >= n)
  306. {
  307. u = (i > n);
  308. break;
  309. }
  310. i++;
  311. }
  312. if (unaligned)
  313. {
  314. *unaligned = u;
  315. }
  316. return i - woffset;
  317. }
  318. S32 wchar_utf8_length(const llwchar wc)
  319. {
  320. if (wc < 0x80)
  321. {
  322. // This case will also catch negative values which are
  323. // technically invalid.
  324. return 1;
  325. }
  326. else if (wc < 0x800)
  327. {
  328. return 2;
  329. }
  330. else if (wc < 0x10000)
  331. {
  332. return 3;
  333. }
  334. else if (wc < 0x200000)
  335. {
  336. return 4;
  337. }
  338. else if (wc < 0x4000000)
  339. {
  340. return 5;
  341. }
  342. else
  343. {
  344. return 6;
  345. }
  346. }
  347. S32 wstring_utf8_length(const LLWString& wstr)
  348. {
  349. S32 len = 0;
  350. for (S32 i = 0; i < (S32)wstr.length(); i++)
  351. {
  352. len += wchar_utf8_length(wstr[i]);
  353. }
  354. return len;
  355. }
  356. LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
  357. {
  358. LLWString wout;
  359. S32 i = 0;
  360. while (i < len)
  361. {
  362. llwchar unichar;
  363. U8 cur_char = utf8str[i];
  364. if (cur_char < 0x80)
  365. {
  366. // Ascii character, just add it
  367. unichar = cur_char;
  368. }
  369. else
  370. {
  371. S32 cont_bytes = 0;
  372. if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32
  373. {
  374. unichar = (0x1F&cur_char);
  375. cont_bytes = 1;
  376. }
  377. else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32
  378. {
  379. unichar = (0x0F&cur_char);
  380. cont_bytes = 2;
  381. }
  382. else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32
  383. {
  384. unichar = (0x07&cur_char);
  385. cont_bytes = 3;
  386. }
  387. else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32
  388. {
  389. unichar = (0x03&cur_char);
  390. cont_bytes = 4;
  391. }
  392. else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32
  393. {
  394. unichar = (0x01&cur_char);
  395. cont_bytes = 5;
  396. }
  397. else
  398. {
  399. wout += LL_UNKNOWN_CHAR;
  400. ++i;
  401. continue;
  402. }
  403. // Check that this character doesn't go past the end of the string
  404. S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
  405. do
  406. {
  407. ++i;
  408. cur_char = utf8str[i];
  409. if ( (cur_char >> 6) == 0x2 )
  410. {
  411. unichar <<= 6;
  412. unichar += (0x3F&cur_char);
  413. }
  414. else
  415. {
  416. // Malformed sequence - roll back to look at this as a new char
  417. unichar = LL_UNKNOWN_CHAR;
  418. --i;
  419. break;
  420. }
  421. } while(i < end);
  422. // Handle overlong characters and NULL characters
  423. if ( ((cont_bytes == 1) && (unichar < 0x80))
  424. || ((cont_bytes == 2) && (unichar < 0x800))
  425. || ((cont_bytes == 3) && (unichar < 0x10000))
  426. || ((cont_bytes == 4) && (unichar < 0x200000))
  427. || ((cont_bytes == 5) && (unichar < 0x4000000)) )
  428. {
  429. unichar = LL_UNKNOWN_CHAR;
  430. }
  431. }
  432. wout += unichar;
  433. ++i;
  434. }
  435. return wout;
  436. }
  437. LLWString utf8str_to_wstring(const std::string& utf8str)
  438. {
  439. const S32 len = (S32)utf8str.length();
  440. return utf8str_to_wstring(utf8str, len);
  441. }
  442. std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
  443. {
  444. std::string out;
  445. S32 i = 0;
  446. while (i < len)
  447. {
  448. char tchars[8]; /* Flawfinder: ignore */
  449. S32 n = wchar_to_utf8chars(utf32str[i], tchars);
  450. tchars[n] = 0;
  451. out += tchars;
  452. i++;
  453. }
  454. return out;
  455. }
  456. std::string wstring_to_utf8str(const LLWString& utf32str)
  457. {
  458. const S32 len = (S32)utf32str.length();
  459. return wstring_to_utf8str(utf32str, len);
  460. }
  461. std::string utf16str_to_utf8str(const llutf16string& utf16str)
  462. {
  463. return wstring_to_utf8str(utf16str_to_wstring(utf16str));
  464. }
  465. std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
  466. {
  467. return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
  468. }
  469. std::string utf8str_trim(const std::string& utf8str)
  470. {
  471. LLWString wstr = utf8str_to_wstring(utf8str);
  472. LLWStringUtil::trim(wstr);
  473. return wstring_to_utf8str(wstr);
  474. }
  475. std::string utf8str_tolower(const std::string& utf8str)
  476. {
  477. LLWString out_str = utf8str_to_wstring(utf8str);
  478. LLWStringUtil::toLower(out_str);
  479. return wstring_to_utf8str(out_str);
  480. }
  481. S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
  482. {
  483. LLWString wlhs = utf8str_to_wstring(lhs);
  484. LLWString wrhs = utf8str_to_wstring(rhs);
  485. return LLWStringUtil::compareInsensitive(wlhs, wrhs);
  486. }
  487. std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
  488. {
  489. if (0 == max_len)
  490. {
  491. return std::string();
  492. }
  493. if ((S32)utf8str.length() <= max_len)
  494. {
  495. return utf8str;
  496. }
  497. else
  498. {
  499. S32 cur_char = max_len;
  500. // If we're ASCII, we don't need to do anything
  501. if ((U8)utf8str[cur_char] > 0x7f)
  502. {
  503. // If first two bits are (10), it's the tail end of a multibyte char. We need to shift back
  504. // to the first character
  505. while (0x80 == (0xc0 & utf8str[cur_char]))
  506. {
  507. cur_char--;
  508. // Keep moving forward until we hit the first char;
  509. if (cur_char == 0)
  510. {
  511. // Make sure we don't trash memory if we've got a bogus string.
  512. break;
  513. }
  514. }
  515. }
  516. // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
  517. return utf8str.substr(0, cur_char);
  518. }
  519. }
  520. std::string utf8str_substChar(
  521. const std::string& utf8str,
  522. const llwchar target_char,
  523. const llwchar replace_char)
  524. {
  525. LLWString wstr = utf8str_to_wstring(utf8str);
  526. LLWStringUtil::replaceChar(wstr, target_char, replace_char);
  527. //wstr = wstring_substChar(wstr, target_char, replace_char);
  528. return wstring_to_utf8str(wstr);
  529. }
  530. std::string utf8str_makeASCII(const std::string& utf8str)
  531. {
  532. LLWString wstr = utf8str_to_wstring(utf8str);
  533. LLWStringUtil::_makeASCII(wstr);
  534. return wstring_to_utf8str(wstr);
  535. }
  536. std::string mbcsstring_makeASCII(const std::string& wstr)
  537. {
  538. // Replace non-ASCII chars with replace_char
  539. std::string out_str = wstr;
  540. for (S32 i = 0; i < (S32)out_str.length(); i++)
  541. {
  542. if ((U8)out_str[i] > 0x7f)
  543. {
  544. out_str[i] = LL_UNKNOWN_CHAR;
  545. }
  546. }
  547. return out_str;
  548. }
  549. std::string utf8str_removeCRLF(const std::string& utf8str)
  550. {
  551. if (0 == utf8str.length())
  552. {
  553. return std::string();
  554. }
  555. const char CR = 13;
  556. std::string out;
  557. out.reserve(utf8str.length());
  558. const S32 len = (S32)utf8str.length();
  559. for( S32 i = 0; i < len; i++ )
  560. {
  561. if( utf8str[i] != CR )
  562. {
  563. out.push_back(utf8str[i]);
  564. }
  565. }
  566. return out;
  567. }
  568. #if LL_WINDOWS
  569. // documentation moved to header. Phoenix 2007-11-27
  570. namespace snprintf_hack
  571. {
  572. int snprintf(char *str, size_t size, const char *format, ...)
  573. {
  574. va_list args;
  575. va_start(args, format);
  576. int num_written = _vsnprintf(str, size, format, args); /* Flawfinder: ignore */
  577. va_end(args);
  578. str[size-1] = '\0'; // always null terminate
  579. return num_written;
  580. }
  581. }
  582. std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
  583. {
  584. std::string out;
  585. if(in)
  586. {
  587. int len_in = wcslen(in);
  588. int len_out = WideCharToMultiByte(
  589. code_page,
  590. 0,
  591. in,
  592. len_in,
  593. NULL,
  594. 0,
  595. 0,
  596. 0);
  597. // We will need two more bytes for the double NULL ending
  598. // created in WideCharToMultiByte().
  599. char* pout = new char [len_out + 2];
  600. memset(pout, 0, len_out + 2);
  601. if(pout)
  602. {
  603. WideCharToMultiByte(
  604. code_page,
  605. 0,
  606. in,
  607. len_in,
  608. pout,
  609. len_out,
  610. 0,
  611. 0);
  612. out.assign(pout);
  613. delete[] pout;
  614. }
  615. }
  616. return out;
  617. }
  618. wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
  619. {
  620. // From review:
  621. // We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
  622. // plus one for a null terminator, and be guaranteed to not overflow.
  623. // Normally, I'd call that sort of thing premature optimization,
  624. // but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
  625. // int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);
  626. // reserve place to NULL terminator
  627. int output_str_len = in.length();
  628. wchar_t* w_out = new wchar_t[output_str_len + 1];
  629. memset(w_out, 0, output_str_len + 1);
  630. int real_output_str_len = MultiByteToWideChar (code_page, 0, in.c_str(), in.length(), w_out, output_str_len);
  631. //looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
  632. w_out[real_output_str_len] = 0;
  633. return w_out;
  634. }
  635. std::string ll_convert_string_to_utf8_string(const std::string& in)
  636. {
  637. wchar_t* w_mesg = ll_convert_string_to_wide(in, CP_ACP);
  638. std::string out_utf8(ll_convert_wide_to_string(w_mesg, CP_UTF8));
  639. delete[] w_mesg;
  640. return out_utf8;
  641. }
  642. #endif // LL_WINDOWS
  643. long LLStringOps::sPacificTimeOffset = 0;
  644. long LLStringOps::sLocalTimeOffset = 0;
  645. bool LLStringOps::sPacificDaylightTime = 0;
  646. std::map<std::string, std::string> LLStringOps::datetimeToCodes;
  647. std::vector<std::string> LLStringOps::sWeekDayList;
  648. std::vector<std::string> LLStringOps::sWeekDayShortList;
  649. std::vector<std::string> LLStringOps::sMonthList;
  650. std::vector<std::string> LLStringOps::sMonthShortList;
  651. std::string LLStringOps::sDayFormat;
  652. std::string LLStringOps::sAM;
  653. std::string LLStringOps::sPM;
  654. S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
  655. {
  656. #if LL_WINDOWS
  657. // in Windows, wide string functions operator on 16-bit strings,
  658. // not the proper 32 bit wide string
  659. return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
  660. #else
  661. return wcscoll(a, b);
  662. #endif
  663. }
  664. void LLStringOps::setupDatetimeInfo (bool daylight)
  665. {
  666. time_t nowT, localT, gmtT;
  667. struct tm * tmpT;
  668. nowT = time (NULL);
  669. tmpT = gmtime (&nowT);
  670. gmtT = mktime (tmpT);
  671. tmpT = localtime (&nowT);
  672. localT = mktime (tmpT);
  673. sLocalTimeOffset = (long) (gmtT - localT);
  674. if (tmpT->tm_isdst)
  675. {
  676. sLocalTimeOffset -= 60 * 60; // 1 hour
  677. }
  678. sPacificDaylightTime = daylight;
  679. sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;
  680. datetimeToCodes["wkday"] = "%a"; // Thu
  681. datetimeToCodes["weekday"] = "%A"; // Thursday
  682. datetimeToCodes["year4"] = "%Y"; // 2009
  683. datetimeToCodes["year"] = "%Y"; // 2009
  684. datetimeToCodes["year2"] = "%y"; // 09
  685. datetimeToCodes["mth"] = "%b"; // Aug
  686. datetimeToCodes["month"] = "%B"; // August
  687. datetimeToCodes["mthnum"] = "%m"; // 08
  688. datetimeToCodes["day"] = "%d"; // 31
  689. datetimeToCodes["sday"] = "%-d"; // 9
  690. datetimeToCodes["hour24"] = "%H"; // 14
  691. datetimeToCodes["hour"] = "%H"; // 14
  692. datetimeToCodes["hour12"] = "%I"; // 02
  693. datetimeToCodes["min"] = "%M"; // 59
  694. datetimeToCodes["ampm"] = "%p"; // AM
  695. datetimeToCodes["second"] = "%S"; // 59
  696. datetimeToCodes["timezone"] = "%Z"; // PST
  697. }
  698. void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output)
  699. {
  700. output.clear();
  701. size_t length = data.size();
  702. // tokenize it and put it in the array
  703. std::string cur_word;
  704. for(size_t i = 0; i < length; ++i)
  705. {
  706. if(data[i] == ':')
  707. {
  708. output.push_back(cur_word);
  709. cur_word.clear();
  710. }
  711. else
  712. {
  713. cur_word.append(1, data[i]);
  714. }
  715. }
  716. output.push_back(cur_word);
  717. }
  718. void LLStringOps::setupWeekDaysNames(const std::string& data)
  719. {
  720. tokenizeStringToArray(data,sWeekDayList);
  721. }
  722. void LLStringOps::setupWeekDaysShortNames(const std::string& data)
  723. {
  724. tokenizeStringToArray(data,sWeekDayShortList);
  725. }
  726. void LLStringOps::setupMonthNames(const std::string& data)
  727. {
  728. tokenizeStringToArray(data,sMonthList);
  729. }
  730. void LLStringOps::setupMonthShortNames(const std::string& data)
  731. {
  732. tokenizeStringToArray(data,sMonthShortList);
  733. }
  734. void LLStringOps::setupDayFormat(const std::string& data)
  735. {
  736. sDayFormat = data;
  737. }
  738. std::string LLStringOps::getDatetimeCode (std::string key)
  739. {
  740. std::map<std::string, std::string>::iterator iter;
  741. iter = datetimeToCodes.find (key);
  742. if (iter != datetimeToCodes.end())
  743. {
  744. return iter->second;
  745. }
  746. else
  747. {
  748. return std::string("");
  749. }
  750. }
  751. namespace LLStringFn
  752. {
  753. // NOTE - this restricts output to ascii
  754. void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
  755. {
  756. const char MIN = 0x20;
  757. std::basic_string<char>::size_type len = string.size();
  758. for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
  759. {
  760. if(string[ii] < MIN)
  761. {
  762. string[ii] = replacement;
  763. }
  764. }
  765. }
  766. // NOTE - this restricts output to ascii
  767. void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
  768. char replacement)
  769. {
  770. const char MIN = 0x20;
  771. const char PIPE = 0x7c;
  772. std::basic_string<char>::size_type len = str.size();
  773. for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
  774. {
  775. if( (str[ii] < MIN) || (str[ii] == PIPE) )
  776. {
  777. str[ii] = replacement;
  778. }
  779. }
  780. }
  781. // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
  782. // allowable code points for XML. Specifically, they are:
  783. // 0x09, 0x0a, 0x0d, and 0x20 on up. JC
  784. std::string strip_invalid_xml(const std::string& instr)
  785. {
  786. std::string output;
  787. output.reserve( instr.size() );
  788. std::string::const_iterator it = instr.begin();
  789. while (it != instr.end())
  790. {
  791. // Must compare as unsigned for >=
  792. // Test most likely match first
  793. const unsigned char c = (unsigned char)*it;
  794. if ( c >= (unsigned char)0x20 // SPACE
  795. || c == (unsigned char)0x09 // TAB
  796. || c == (unsigned char)0x0a // LINE_FEED
  797. || c == (unsigned char)0x0d ) // CARRIAGE_RETURN
  798. {
  799. output.push_back(c);
  800. }
  801. ++it;
  802. }
  803. return output;
  804. }
  805. /**
  806. * @brief Replace all control characters (c < 0x20) with replacement in
  807. * string.
  808. */
  809. void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
  810. {
  811. const unsigned char MIN = 0x20;
  812. std::basic_string<char>::size_type len = string.size();
  813. for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
  814. {
  815. const unsigned char c = (unsigned char) string[ii];
  816. if(c < MIN)
  817. {
  818. string[ii] = replacement;
  819. }
  820. }
  821. }
  822. }
  823. ////////////////////////////////////////////////////////////
  824. // Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime.
  825. template<>
  826. S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
  827. //static
  828. template<>
  829. void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
  830. {
  831. // Starting at offset 0, scan forward for the next non-delimiter. We're
  832. // done when the only characters left in 'instr' are delimiters.
  833. for (std::string::size_type begIdx, endIdx = 0;
  834. (begIdx = instr.find_first_not_of (delims, endIdx)) != std::string::npos; )
  835. {
  836. // Found a non-delimiter. After that, find the next delimiter.
  837. endIdx = instr.find_first_of (delims, begIdx);
  838. if (endIdx == std::string::npos)
  839. {
  840. // No more delimiters: this token extends to the end of the string.
  841. endIdx = instr.length();
  842. }
  843. // extract the token between begIdx and endIdx; substr() needs length
  844. std::string currToken(instr.substr(begIdx, endIdx - begIdx));
  845. LLStringUtil::trim (currToken);
  846. tokens.push_back(currToken);
  847. // next scan past delimiters starts at endIdx
  848. }
  849. }
  850. template<>
  851. LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens)
  852. {
  853. const std::string delims (",");
  854. // Find the first [
  855. size_type pos1 = instr.find('[', start);
  856. if (pos1 == std::string::npos)
  857. return std::string::npos;
  858. //Find the first ] after the initial [
  859. size_type pos2 = instr.find(']', pos1);
  860. if (pos2 == std::string::npos)
  861. return std::string::npos;
  862. // Find the last [ before ] in case of nested [[]]
  863. pos1 = instr.find_last_of('[', pos2-1);
  864. if (pos1 == std::string::npos || pos1 < start)
  865. return std::string::npos;
  866. getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims);
  867. start = pos2+1;
  868. return pos1;
  869. }
  870. // static
  871. template<>
  872. bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions)
  873. {
  874. // see if we have a replacement for the bracketed string (without the brackets)
  875. // test first using has() because if we just look up with operator[] we get back an
  876. // empty string even if the value is missing. We want to distinguish between
  877. // missing replacements and deliberately empty replacement strings.
  878. format_map_t::const_iterator iter = substitutions.find(token);
  879. if (iter != substitutions.end())
  880. {
  881. replacement = iter->second;
  882. return true;
  883. }
  884. // if not, see if there's one WITH brackets
  885. iter = substitutions.find(std::string("[" + token + "]"));
  886. if (iter != substitutions.end())
  887. {
  888. replacement = iter->second;
  889. return true;
  890. }
  891. return false;
  892. }
  893. // static
  894. template<>
  895. bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions)
  896. {
  897. // see if we have a replacement for the bracketed string (without the brackets)
  898. // test first using has() because if we just look up with operator[] we get back an
  899. // empty string even if the value is missing. We want to distinguish between
  900. // missing replacements and deliberately empty replacement strings.
  901. if (substitutions.has(token))
  902. {
  903. replacement = substitutions[token].asString();
  904. return true;
  905. }
  906. // if not, see if there's one WITH brackets
  907. else if (substitutions.has(std::string("[" + token + "]")))
  908. {
  909. replacement = substitutions[std::string("[" + token + "]")].asString();
  910. return true;
  911. }
  912. return false;
  913. }
  914. //static
  915. template<>
  916. void LLStringUtil::setLocale(std::string inLocale)
  917. {
  918. sLocale = inLocale;
  919. };
  920. //static
  921. template<>
  922. std::string LLStringUtil::getLocale(void)
  923. {
  924. return sLocale;
  925. };
  926. // static
  927. template<>
  928. void LLStringUtil::formatNumber(std::string& numStr, std::string decimals)
  929. {
  930. std::stringstream strStream;
  931. S32 intDecimals = 0;
  932. convertToS32 (decimals, intDecimals);
  933. if (!sLocale.empty())
  934. {
  935. // std::locale() throws if the locale is unknown! (EXT-7926)
  936. try
  937. {
  938. strStream.imbue(std::locale(sLocale.c_str()));
  939. } catch (const std::exception &)
  940. {
  941. LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL;
  942. }
  943. }
  944. if (!intDecimals)
  945. {
  946. S32 intStr;
  947. if (convertToS32(numStr, intStr))
  948. {
  949. strStream << intStr;
  950. numStr = strStream.str();
  951. }
  952. }
  953. else
  954. {
  955. F32 floatStr;
  956. if (convertToF32(numStr, floatStr))
  957. {
  958. strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr;
  959. numStr = strStream.str();
  960. }
  961. }
  962. }
  963. // static
  964. template<>
  965. bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
  966. std::string param, S32 secFromEpoch)
  967. {
  968. if (param == "local") // local
  969. {
  970. secFromEpoch -= LLStringOps::getLocalTimeOffset();
  971. }
  972. else if (param != "utc") // slt
  973. {
  974. secFromEpoch -= LLStringOps::getPacificTimeOffset();
  975. }
  976. // if never fell into those two ifs above, param must be utc
  977. if (secFromEpoch < 0) secFromEpoch = 0;
  978. LLDate datetime((F64)secFromEpoch);
  979. std::string code = LLStringOps::getDatetimeCode (token);
  980. // special case to handle timezone
  981. if (code == "%Z") {
  982. if (param == "utc")
  983. {
  984. replacement = "GMT";
  985. }
  986. else if (param == "local")
  987. {
  988. replacement = ""; // user knows their own timezone
  989. }
  990. else
  991. {
  992. // "slt" = Second Life Time, which is deprecated.
  993. // If not utc or user local time, fallback to Pacific time
  994. replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST";
  995. }
  996. return true;
  997. }
  998. //EXT-7013
  999. //few codes are not suppotred by strtime function (example - weekdays for Japanise)
  1000. //so use predefined ones
  1001. //if sWeekDayList is not empty than current locale doesn't support
  1002. //weekday name.
  1003. time_t loc_seconds = (time_t) secFromEpoch;
  1004. if(LLStringOps::sWeekDayList.size() == 7 && code == "%A")
  1005. {
  1006. struct tm * gmt = gmtime (&loc_seconds);
  1007. replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
  1008. }
  1009. else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
  1010. {
  1011. struct tm * gmt = gmtime (&loc_seconds);
  1012. replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
  1013. }
  1014. else if(LLStringOps::sMonthList.size() == 12 && code == "%B")
  1015. {
  1016. struct tm * gmt = gmtime (&loc_seconds);
  1017. replacement = LLStringOps::sMonthList[gmt->tm_mon];
  1018. }
  1019. else if( !LLStringOps::sDayFormat.empty() && code == "%d" )
  1020. {
  1021. struct tm * gmt = gmtime (&loc_seconds);
  1022. LLStringUtil::format_map_t args;
  1023. args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
  1024. replacement = LLStringOps::sDayFormat;
  1025. LLStringUtil::format(replacement, args);
  1026. }
  1027. else if (code == "%-d")
  1028. {
  1029. struct tm * gmt = gmtime (&loc_seconds);
  1030. replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero
  1031. }
  1032. else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" )
  1033. {
  1034. struct tm * gmt = gmtime (&loc_seconds);
  1035. if(gmt->tm_hour<12)
  1036. {
  1037. replacement = LLStringOps::sAM;
  1038. }
  1039. else
  1040. {
  1041. replacement = LLStringOps::sPM;
  1042. }
  1043. }
  1044. else
  1045. {
  1046. replacement = datetime.toHTTPDateString(code);
  1047. }
  1048. // *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format
  1049. // to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738).
  1050. // We could have used '%l' format instead, but it's not supported by Windows.
  1051. if(code == "%I" && token == "hour12" && replacement.at(0) == '0')
  1052. {
  1053. replacement = replacement.at(1);
  1054. }
  1055. return !code.empty();
  1056. }
  1057. // LLStringUtil::format recogizes the following patterns.
  1058. // All substitutions *must* be encased in []'s in the input string.
  1059. // The []'s are optional in the substitution map.
  1060. // [FOO_123]
  1061. // [FOO,number,precision]
  1062. // [FOO,datetime,format]
  1063. // static
  1064. template<>
  1065. S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
  1066. {
  1067. LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT);
  1068. S32 res = 0;
  1069. std::string output;
  1070. std::vector<std::string> tokens;
  1071. std::string::size_type start = 0;
  1072. std::string::size_type prev_start = 0;
  1073. std::string::size_type key_start = 0;
  1074. while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
  1075. {
  1076. output += std::string(s, prev_start, key_start-prev_start);
  1077. prev_start = start;
  1078. bool found_replacement = false;
  1079. std::string replacement;
  1080. if (tokens.size() == 0)
  1081. {
  1082. found_replacement = false;
  1083. }
  1084. else if (tokens.size() == 1)
  1085. {
  1086. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1087. }
  1088. else if (tokens[1] == "number")
  1089. {
  1090. std::string param = "0";
  1091. if (tokens.size() > 2) param = tokens[2];
  1092. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1093. if (found_replacement) formatNumber (replacement, param);
  1094. }
  1095. else if (tokens[1] == "datetime")
  1096. {
  1097. std::string param;
  1098. if (tokens.size() > 2) param = tokens[2];
  1099. format_map_t::const_iterator iter = substitutions.find("datetime");
  1100. if (iter != substitutions.end())
  1101. {
  1102. S32 secFromEpoch = 0;
  1103. BOOL r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
  1104. if (r)
  1105. {
  1106. found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
  1107. }
  1108. }
  1109. }
  1110. if (found_replacement)
  1111. {
  1112. output += replacement;
  1113. res++;
  1114. }
  1115. else
  1116. {
  1117. // we had no replacement, use the string as is
  1118. // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
  1119. output += std::string(s, key_start, start-key_start);
  1120. }
  1121. tokens.clear();
  1122. }
  1123. // send the remainder of the string (with no further matches for bracketed names)
  1124. output += std::string(s, start);
  1125. s = output;
  1126. return res;
  1127. }
  1128. //static
  1129. template<>
  1130. S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
  1131. {
  1132. LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT);
  1133. S32 res = 0;
  1134. if (!substitutions.isMap())
  1135. {
  1136. return res;
  1137. }
  1138. std::string output;
  1139. std::vector<std::string> tokens;
  1140. std::string::size_type start = 0;
  1141. std::string::size_type prev_start = 0;
  1142. std::string::size_type key_start = 0;
  1143. while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
  1144. {
  1145. output += std::string(s, prev_start, key_start-prev_start);
  1146. prev_start = start;
  1147. bool found_replacement = false;
  1148. std::string replacement;
  1149. if (tokens.size() == 0)
  1150. {
  1151. found_replacement = false;
  1152. }
  1153. else if (tokens.size() == 1)
  1154. {
  1155. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1156. }
  1157. else if (tokens[1] == "number")
  1158. {
  1159. std::string param = "0";
  1160. if (tokens.size() > 2) param = tokens[2];
  1161. found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
  1162. if (found_replacement) formatNumber (replacement, param);
  1163. }
  1164. else if (tokens[1] == "datetime")
  1165. {
  1166. std::string param;
  1167. if (tokens.size() > 2) param = tokens[2];
  1168. S32 secFromEpoch = (S32) substitutions["datetime"].asInteger();
  1169. found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch);
  1170. }
  1171. if (found_replacement)
  1172. {
  1173. output += replacement;
  1174. res++;
  1175. }
  1176. else
  1177. {
  1178. // we had no replacement, use the string as is
  1179. // e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
  1180. output += std::string(s, key_start, start-key_start);
  1181. }
  1182. tokens.clear();
  1183. }
  1184. // send the remainder of the string (with no further matches for bracketed names)
  1185. output += std::string(s, start);
  1186. s = output;
  1187. return res;
  1188. }
  1189. ////////////////////////////////////////////////////////////
  1190. // Testing
  1191. #ifdef _DEBUG
  1192. template<class T>
  1193. void LLStringUtilBase<T>::testHarness()
  1194. {
  1195. std::string s1;
  1196. llassert( s1.c_str() == NULL );
  1197. llassert( s1.size() == 0 );
  1198. llassert( s1.empty() );
  1199. std::string s2( "hello");
  1200. llassert( !strcmp( s2.c_str(), "hello" ) );
  1201. llassert( s2.size() == 5 );
  1202. llassert( !s2.empty() );
  1203. std::string s3( s2 );
  1204. llassert( "hello" == s2 );
  1205. llassert( s2 == "hello" );
  1206. llassert( s2 > "gello" );
  1207. llassert( "gello" < s2 );
  1208. llassert( "gello" != s2 );
  1209. llassert( s2 != "gello" );
  1210. std::string s4 = s2;
  1211. llassert( !s4.empty() );
  1212. s4.empty();
  1213. llassert( s4.empty() );
  1214. std::string s5("");
  1215. llassert( s5.empty() );
  1216. llassert( isValidIndex(s5, 0) );
  1217. llassert( !isValidIndex(s5, 1) );
  1218. s3 = s2;
  1219. s4 = "hello again";
  1220. s4 += "!";
  1221. s4 += s4;
  1222. llassert( s4 == "hello again!hello again!" );
  1223. std::string s6 = s2 + " " + s2;
  1224. std::string s7 = s6;
  1225. llassert( s6 == s7 );
  1226. llassert( !( s6 != s7) );
  1227. llassert( !(s6 < s7) );
  1228. llassert( !(s6 > s7) );
  1229. llassert( !(s6 == "hi"));
  1230. llassert( s6 == "hello hello");
  1231. llassert( s6 < "hi");
  1232. llassert( s6[1] == 'e' );
  1233. s6[1] = 'f';
  1234. llassert( s6[1] == 'f' );
  1235. s2.erase( 4, 1 );
  1236. llassert( s2 == "hell");
  1237. s2.insert( 0, "y" );
  1238. llassert( s2 == "yhell");
  1239. s2.erase( 1, 3 );
  1240. llassert( s2 == "yl");
  1241. s2.insert( 1, "awn, don't yel");
  1242. llassert( s2 == "yawn, don't yell");
  1243. std::string s8 = s2.substr( 6, 5 );
  1244. llassert( s8 == "don't" );
  1245. std::string s9 = " \t\ntest \t\t\n ";
  1246. trim(s9);
  1247. llassert( s9 == "test" );
  1248. s8 = "abc123&*(ABC";
  1249. s9 = s8;
  1250. toUpper(s9);
  1251. llassert( s9 == "ABC123&*(ABC" );
  1252. s9 = s8;
  1253. toLower(s9);
  1254. llassert( s9 == "abc123&*(abc" );
  1255. std::string s10( 10, 'x' );
  1256. llassert( s10 == "xxxxxxxxxx" );
  1257. std::string s11( "monkey in the middle", 7, 2 );
  1258. llassert( s11 == "in" );
  1259. std::string s12; //empty
  1260. s12 += "foo";
  1261. llassert( s12 == "foo" );
  1262. std::string s13; //empty
  1263. s13 += 'f';
  1264. llassert( s13 == "f" );
  1265. }
  1266. #endif // _DEBUG