PageRenderTime 92ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/indra/llcommon/llstring.h

https://bitbucket.org/lindenlab/viewer-beta/
C Header | 1326 lines | 876 code | 182 blank | 268 comment | 167 complexity | 5bedd0755951dd9dd854114c1f01fc70 MD5 | raw file
Possible License(s): LGPL-2.1
  1. /**
  2. * @file llstring.h
  3. * @brief String utility functions and std::string class.
  4. *
  5. * $LicenseInfo:firstyear=2001&license=viewerlgpl$
  6. * Second Life Viewer Source Code
  7. * Copyright (C) 2010, Linden Research, Inc.
  8. *
  9. * This library is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation;
  12. * version 2.1 of the License only.
  13. *
  14. * This library is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with this library; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. *
  23. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  24. * $/LicenseInfo$
  25. */
  26. #ifndef LL_LLSTRING_H
  27. #define LL_LLSTRING_H
  28. #include <string>
  29. #include <cstdio>
  30. #include <locale>
  31. #include <iomanip>
  32. #include "llsd.h"
  33. #include "llfasttimer.h"
  34. #if LL_LINUX || LL_SOLARIS
  35. #include <wctype.h>
  36. #include <wchar.h>
  37. #endif
  38. #include <string.h>
  39. #if LL_SOLARIS
  40. // stricmp and strnicmp do not exist on Solaris:
  41. #define stricmp strcasecmp
  42. #define strnicmp strncasecmp
  43. #endif
  44. const char LL_UNKNOWN_CHAR = '?';
  45. #if LL_DARWIN || LL_LINUX || LL_SOLARIS
  46. // Template specialization of char_traits for U16s. Only necessary on Mac and Linux (exists on Windows already)
  47. #include <cstring>
  48. namespace std
  49. {
  50. template<>
  51. struct char_traits<U16>
  52. {
  53. typedef U16 char_type;
  54. typedef int int_type;
  55. typedef streampos pos_type;
  56. typedef streamoff off_type;
  57. typedef mbstate_t state_type;
  58. static void
  59. assign(char_type& __c1, const char_type& __c2)
  60. { __c1 = __c2; }
  61. static bool
  62. eq(const char_type& __c1, const char_type& __c2)
  63. { return __c1 == __c2; }
  64. static bool
  65. lt(const char_type& __c1, const char_type& __c2)
  66. { return __c1 < __c2; }
  67. static int
  68. compare(const char_type* __s1, const char_type* __s2, size_t __n)
  69. { return memcmp(__s1, __s2, __n * sizeof(char_type)); }
  70. static size_t
  71. length(const char_type* __s)
  72. {
  73. const char_type *cur_char = __s;
  74. while (*cur_char != 0)
  75. {
  76. ++cur_char;
  77. }
  78. return cur_char - __s;
  79. }
  80. static const char_type*
  81. find(const char_type* __s, size_t __n, const char_type& __a)
  82. { return static_cast<const char_type*>(memchr(__s, __a, __n * sizeof(char_type))); }
  83. static char_type*
  84. move(char_type* __s1, const char_type* __s2, size_t __n)
  85. { return static_cast<char_type*>(memmove(__s1, __s2, __n * sizeof(char_type))); }
  86. static char_type*
  87. copy(char_type* __s1, const char_type* __s2, size_t __n)
  88. { return static_cast<char_type*>(memcpy(__s1, __s2, __n * sizeof(char_type))); } /* Flawfinder: ignore */
  89. static char_type*
  90. assign(char_type* __s, size_t __n, char_type __a)
  91. {
  92. // This isn't right.
  93. //return static_cast<char_type*>(memset(__s, __a, __n * sizeof(char_type)));
  94. // I don't think there's a standard 'memset' for 16-bit values.
  95. // Do this the old-fashioned way.
  96. size_t __i;
  97. for(__i = 0; __i < __n; __i++)
  98. {
  99. __s[__i] = __a;
  100. }
  101. return __s;
  102. }
  103. static char_type
  104. to_char_type(const int_type& __c)
  105. { return static_cast<char_type>(__c); }
  106. static int_type
  107. to_int_type(const char_type& __c)
  108. { return static_cast<int_type>(__c); }
  109. static bool
  110. eq_int_type(const int_type& __c1, const int_type& __c2)
  111. { return __c1 == __c2; }
  112. static int_type
  113. eof() { return static_cast<int_type>(EOF); }
  114. static int_type
  115. not_eof(const int_type& __c)
  116. { return (__c == eof()) ? 0 : __c; }
  117. };
  118. };
  119. #endif
  120. class LL_COMMON_API LLStringOps
  121. {
  122. private:
  123. static long sPacificTimeOffset;
  124. static long sLocalTimeOffset;
  125. static bool sPacificDaylightTime;
  126. static std::map<std::string, std::string> datetimeToCodes;
  127. public:
  128. static std::vector<std::string> sWeekDayList;
  129. static std::vector<std::string> sWeekDayShortList;
  130. static std::vector<std::string> sMonthList;
  131. static std::vector<std::string> sMonthShortList;
  132. static std::string sDayFormat;
  133. static std::string sAM;
  134. static std::string sPM;
  135. static char toUpper(char elem) { return toupper((unsigned char)elem); }
  136. static llwchar toUpper(llwchar elem) { return towupper(elem); }
  137. static char toLower(char elem) { return tolower((unsigned char)elem); }
  138. static llwchar toLower(llwchar elem) { return towlower(elem); }
  139. static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; }
  140. static bool isSpace(llwchar elem) { return iswspace(elem) != 0; }
  141. static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; }
  142. static bool isUpper(llwchar elem) { return iswupper(elem) != 0; }
  143. static bool isLower(char elem) { return islower((unsigned char)elem) != 0; }
  144. static bool isLower(llwchar elem) { return iswlower(elem) != 0; }
  145. static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; }
  146. static bool isDigit(llwchar a) { return iswdigit(a) != 0; }
  147. static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; }
  148. static bool isPunct(llwchar a) { return iswpunct(a) != 0; }
  149. static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; }
  150. static bool isAlnum(llwchar a) { return iswalnum(a) != 0; }
  151. static S32 collate(const char* a, const char* b) { return strcoll(a, b); }
  152. static S32 collate(const llwchar* a, const llwchar* b);
  153. static void setupDatetimeInfo(bool pacific_daylight_time);
  154. static void setupWeekDaysNames(const std::string& data);
  155. static void setupWeekDaysShortNames(const std::string& data);
  156. static void setupMonthNames(const std::string& data);
  157. static void setupMonthShortNames(const std::string& data);
  158. static void setupDayFormat(const std::string& data);
  159. static long getPacificTimeOffset(void) { return sPacificTimeOffset;}
  160. static long getLocalTimeOffset(void) { return sLocalTimeOffset;}
  161. // Is the Pacific time zone (aka server time zone)
  162. // currently in daylight savings time?
  163. static bool getPacificDaylightTime(void) { return sPacificDaylightTime;}
  164. static std::string getDatetimeCode (std::string key);
  165. };
  166. /**
  167. * @brief Return a string constructed from in without crashing if the
  168. * pointer is NULL.
  169. */
  170. LL_COMMON_API std::string ll_safe_string(const char* in);
  171. LL_COMMON_API std::string ll_safe_string(const char* in, S32 maxlen);
  172. // Allowing assignments from non-strings into format_map_t is apparently
  173. // *really* error-prone, so subclass std::string with just basic c'tors.
  174. class LLFormatMapString
  175. {
  176. public:
  177. LLFormatMapString() {};
  178. LLFormatMapString(const char* s) : mString(ll_safe_string(s)) {};
  179. LLFormatMapString(const std::string& s) : mString(s) {};
  180. operator std::string() const { return mString; }
  181. bool operator<(const LLFormatMapString& rhs) const { return mString < rhs.mString; }
  182. std::size_t length() const { return mString.length(); }
  183. private:
  184. std::string mString;
  185. };
  186. template <class T>
  187. class LLStringUtilBase
  188. {
  189. private:
  190. static std::string sLocale;
  191. public:
  192. typedef typename std::basic_string<T>::size_type size_type;
  193. public:
  194. /////////////////////////////////////////////////////////////////////////////////////////
  195. // Static Utility functions that operate on std::strings
  196. static const std::basic_string<T> null;
  197. typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
  198. LL_COMMON_API static void getTokens(const std::basic_string<T>& instr, std::vector<std::basic_string<T> >& tokens, const std::basic_string<T>& delims);
  199. LL_COMMON_API static void formatNumber(std::basic_string<T>& numStr, std::basic_string<T> decimals);
  200. LL_COMMON_API static bool formatDatetime(std::basic_string<T>& replacement, std::basic_string<T> token, std::basic_string<T> param, S32 secFromEpoch);
  201. LL_COMMON_API static S32 format(std::basic_string<T>& s, const format_map_t& substitutions);
  202. LL_COMMON_API static S32 format(std::basic_string<T>& s, const LLSD& substitutions);
  203. LL_COMMON_API static bool simpleReplacement(std::basic_string<T>& replacement, std::basic_string<T> token, const format_map_t& substitutions);
  204. LL_COMMON_API static bool simpleReplacement(std::basic_string<T>& replacement, std::basic_string<T> token, const LLSD& substitutions);
  205. LL_COMMON_API static void setLocale (std::string inLocale);
  206. LL_COMMON_API static std::string getLocale (void);
  207. static bool isValidIndex(const std::basic_string<T>& string, size_type i)
  208. {
  209. return !string.empty() && (0 <= i) && (i <= string.size());
  210. }
  211. static void trimHead(std::basic_string<T>& string);
  212. static void trimTail(std::basic_string<T>& string);
  213. static void trim(std::basic_string<T>& string) { trimHead(string); trimTail(string); }
  214. static void truncate(std::basic_string<T>& string, size_type count);
  215. static void toUpper(std::basic_string<T>& string);
  216. static void toLower(std::basic_string<T>& string);
  217. // True if this is the head of s.
  218. static BOOL isHead( const std::basic_string<T>& string, const T* s );
  219. /**
  220. * @brief Returns true if string starts with substr
  221. *
  222. * If etither string or substr are empty, this method returns false.
  223. */
  224. static bool startsWith(
  225. const std::basic_string<T>& string,
  226. const std::basic_string<T>& substr);
  227. /**
  228. * @brief Returns true if string ends in substr
  229. *
  230. * If etither string or substr are empty, this method returns false.
  231. */
  232. static bool endsWith(
  233. const std::basic_string<T>& string,
  234. const std::basic_string<T>& substr);
  235. static void addCRLF(std::basic_string<T>& string);
  236. static void removeCRLF(std::basic_string<T>& string);
  237. static void replaceTabsWithSpaces( std::basic_string<T>& string, size_type spaces_per_tab );
  238. static void replaceNonstandardASCII( std::basic_string<T>& string, T replacement );
  239. static void replaceChar( std::basic_string<T>& string, T target, T replacement );
  240. static void replaceString( std::basic_string<T>& string, std::basic_string<T> target, std::basic_string<T> replacement );
  241. static BOOL containsNonprintable(const std::basic_string<T>& string);
  242. static void stripNonprintable(std::basic_string<T>& string);
  243. /**
  244. * @brief Unsafe way to make ascii characters. You should probably
  245. * only call this when interacting with the host operating system.
  246. * The 1 byte std::string does not work correctly.
  247. * The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII
  248. * should work.
  249. */
  250. static void _makeASCII(std::basic_string<T>& string);
  251. // Conversion to other data types
  252. static BOOL convertToBOOL(const std::basic_string<T>& string, BOOL& value);
  253. static BOOL convertToU8(const std::basic_string<T>& string, U8& value);
  254. static BOOL convertToS8(const std::basic_string<T>& string, S8& value);
  255. static BOOL convertToS16(const std::basic_string<T>& string, S16& value);
  256. static BOOL convertToU16(const std::basic_string<T>& string, U16& value);
  257. static BOOL convertToU32(const std::basic_string<T>& string, U32& value);
  258. static BOOL convertToS32(const std::basic_string<T>& string, S32& value);
  259. static BOOL convertToF32(const std::basic_string<T>& string, F32& value);
  260. static BOOL convertToF64(const std::basic_string<T>& string, F64& value);
  261. /////////////////////////////////////////////////////////////////////////////////////////
  262. // Utility functions for working with char*'s and strings
  263. // Like strcmp but also handles empty strings. Uses
  264. // current locale.
  265. static S32 compareStrings(const T* lhs, const T* rhs);
  266. static S32 compareStrings(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs);
  267. // case insensitive version of above. Uses current locale on
  268. // Win32, and falls back to a non-locale aware comparison on
  269. // Linux.
  270. static S32 compareInsensitive(const T* lhs, const T* rhs);
  271. static S32 compareInsensitive(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs);
  272. // Case sensitive comparison with good handling of numbers. Does not use current locale.
  273. // a.k.a. strdictcmp()
  274. static S32 compareDict(const std::basic_string<T>& a, const std::basic_string<T>& b);
  275. // Case *in*sensitive comparison with good handling of numbers. Does not use current locale.
  276. // a.k.a. strdictcmp()
  277. static S32 compareDictInsensitive(const std::basic_string<T>& a, const std::basic_string<T>& b);
  278. // Puts compareDict() in a form appropriate for LL container classes to use for sorting.
  279. static BOOL precedesDict( const std::basic_string<T>& a, const std::basic_string<T>& b );
  280. // A replacement for strncpy.
  281. // If the dst buffer is dst_size bytes long or more, ensures that dst is null terminated and holds
  282. // up to dst_size-1 characters of src.
  283. static void copy(T* dst, const T* src, size_type dst_size);
  284. // Copies src into dst at a given offset.
  285. static void copyInto(std::basic_string<T>& dst, const std::basic_string<T>& src, size_type offset);
  286. static bool isPartOfWord(T c) { return (c == (T)'_') || LLStringOps::isAlnum(c); }
  287. #ifdef _DEBUG
  288. LL_COMMON_API static void testHarness();
  289. #endif
  290. private:
  291. LL_COMMON_API static size_type getSubstitution(const std::basic_string<T>& instr, size_type& start, std::vector<std::basic_string<T> >& tokens);
  292. };
  293. template<class T> const std::basic_string<T> LLStringUtilBase<T>::null;
  294. template<class T> std::string LLStringUtilBase<T>::sLocale;
  295. typedef LLStringUtilBase<char> LLStringUtil;
  296. typedef LLStringUtilBase<llwchar> LLWStringUtil;
  297. typedef std::basic_string<llwchar> LLWString;
  298. //@ Use this where we want to disallow input in the form of "foo"
  299. // This is used to catch places where english text is embedded in the code
  300. // instead of in a translatable XUI file.
  301. class LLStringExplicit : public std::string
  302. {
  303. public:
  304. explicit LLStringExplicit(const char* s) : std::string(s) {}
  305. LLStringExplicit(const std::string& s) : std::string(s) {}
  306. LLStringExplicit(const std::string& s, size_type pos, size_type n = std::string::npos) : std::string(s, pos, n) {}
  307. };
  308. struct LLDictionaryLess
  309. {
  310. public:
  311. bool operator()(const std::string& a, const std::string& b)
  312. {
  313. return (LLStringUtil::precedesDict(a, b) ? true : false);
  314. }
  315. };
  316. /**
  317. * Simple support functions
  318. */
  319. /**
  320. * @brief chop off the trailing characters in a string.
  321. *
  322. * This function works on bytes rather than glyphs, so this will
  323. * incorrectly truncate non-single byte strings.
  324. * Use utf8str_truncate() for utf8 strings
  325. * @return a copy of in string minus the trailing count bytes.
  326. */
  327. inline std::string chop_tail_copy(
  328. const std::string& in,
  329. std::string::size_type count)
  330. {
  331. return std::string(in, 0, in.length() - count);
  332. }
  333. /**
  334. * @brief This translates a nybble stored as a hex value from 0-f back
  335. * to a nybble in the low order bits of the return byte.
  336. */
  337. LL_COMMON_API U8 hex_as_nybble(char hex);
  338. /**
  339. * @brief read the contents of a file into a string.
  340. *
  341. * Since this function has no concept of character encoding, most
  342. * anything you do with this method ill-advised. Please avoid.
  343. * @param str [out] The string which will have.
  344. * @param filename The full name of the file to read.
  345. * @return Returns true on success. If false, str is unmodified.
  346. */
  347. LL_COMMON_API bool _read_file_into_string(std::string& str, const std::string& filename);
  348. LL_COMMON_API bool iswindividual(llwchar elem);
  349. /**
  350. * Unicode support
  351. */
  352. // Make the incoming string a utf8 string. Replaces any unknown glyph
  353. // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
  354. // of the data may not be recovered.
  355. LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
  356. //
  357. // We should never use UTF16 except when communicating with Win32!
  358. //
  359. typedef std::basic_string<U16> llutf16string;
  360. LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
  361. LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
  362. LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
  363. LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
  364. LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
  365. LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
  366. LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
  367. LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
  368. // Same function, better name. JC
  369. inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
  370. //
  371. LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
  372. LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
  373. LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
  374. LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
  375. LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
  376. // Length of this UTF32 string in bytes when transformed to UTF8
  377. LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr);
  378. // Length in bytes of this wide char in a UTF8 string
  379. LL_COMMON_API S32 wchar_utf8_length(const llwchar wc);
  380. LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str);
  381. // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
  382. LL_COMMON_API S32 utf16str_wstring_length(const llutf16string &utf16str, S32 len);
  383. // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
  384. LL_COMMON_API S32 wstring_utf16_length(const LLWString & wstr, S32 woffset, S32 wlen);
  385. // Length in wstring (i.e., llwchar count) of a part of a wstring specified by utf16 length (i.e., utf16 units.)
  386. LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, S32 woffset, S32 utf16_length, BOOL *unaligned = NULL);
  387. /**
  388. * @brief Properly truncate a utf8 string to a maximum byte count.
  389. *
  390. * The returned string may be less than max_len if the truncation
  391. * happens in the middle of a glyph. If max_len is longer than the
  392. * string passed in, the return value == utf8str.
  393. * @param utf8str A valid utf8 string to truncate.
  394. * @param max_len The maximum number of bytes in the return value.
  395. * @return Returns a valid utf8 string with byte count <= max_len.
  396. */
  397. LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len);
  398. LL_COMMON_API std::string utf8str_trim(const std::string& utf8str);
  399. LL_COMMON_API S32 utf8str_compare_insensitive(
  400. const std::string& lhs,
  401. const std::string& rhs);
  402. /**
  403. * @brief Replace all occurences of target_char with replace_char
  404. *
  405. * @param utf8str A utf8 string to process.
  406. * @param target_char The wchar to be replaced
  407. * @param replace_char The wchar which is written on replace
  408. */
  409. LL_COMMON_API std::string utf8str_substChar(
  410. const std::string& utf8str,
  411. const llwchar target_char,
  412. const llwchar replace_char);
  413. LL_COMMON_API std::string utf8str_makeASCII(const std::string& utf8str);
  414. // Hack - used for evil notecards.
  415. LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str);
  416. LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
  417. #if LL_WINDOWS
  418. /* @name Windows string helpers
  419. */
  420. //@{
  421. /**
  422. * @brief Implementation the expected snprintf interface.
  423. *
  424. * If the size of the passed in buffer is not large enough to hold the string,
  425. * two bad things happen:
  426. * 1. resulting formatted string is NOT null terminated
  427. * 2. Depending on the platform, the return value could be a) the required
  428. * size of the buffer to copy the entire formatted string or b) -1.
  429. * On Windows with VS.Net 2003, it returns -1 e.g.
  430. *
  431. * safe_snprintf always adds a NULL terminator so that the caller does not
  432. * need to check for return value or need to add the NULL terminator.
  433. * It does not, however change the return value - to let the caller know
  434. * that the passed in buffer size was not large enough to hold the
  435. * formatted string.
  436. *
  437. */
  438. // Deal with the differeneces on Windows
  439. namespace snprintf_hack
  440. {
  441. LL_COMMON_API int snprintf(char *str, size_t size, const char *format, ...);
  442. }
  443. using snprintf_hack::snprintf;
  444. /**
  445. * @brief Convert a wide string to std::string
  446. *
  447. * This replaces the unsafe W2A macro from ATL.
  448. */
  449. LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
  450. /**
  451. * Converts a string to wide string.
  452. *
  453. * It will allocate memory for result string with "new []". Don't forget to release it with "delete []".
  454. */
  455. LL_COMMON_API wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page);
  456. /**
  457. * Converts incoming string into urf8 string
  458. *
  459. */
  460. LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in);
  461. //@}
  462. #endif // LL_WINDOWS
  463. /**
  464. * Many of the 'strip' and 'replace' methods of LLStringUtilBase need
  465. * specialization to work with the signed char type.
  466. * Sadly, it is not possible (AFAIK) to specialize a single method of
  467. * a template class.
  468. * That stuff should go here.
  469. */
  470. namespace LLStringFn
  471. {
  472. /**
  473. * @brief Replace all non-printable characters with replacement in
  474. * string.
  475. * NOTE - this will zap non-ascii
  476. *
  477. * @param [in,out] string the to modify. out value is the string
  478. * with zero non-printable characters.
  479. * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
  480. */
  481. LL_COMMON_API void replace_nonprintable_in_ascii(
  482. std::basic_string<char>& string,
  483. char replacement);
  484. /**
  485. * @brief Replace all non-printable characters and pipe characters
  486. * with replacement in a string.
  487. * NOTE - this will zap non-ascii
  488. *
  489. * @param [in,out] the string to modify. out value is the string
  490. * with zero non-printable characters and zero pipe characters.
  491. * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
  492. */
  493. LL_COMMON_API void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
  494. char replacement);
  495. /**
  496. * @brief Remove all characters that are not allowed in XML 1.0.
  497. * Returns a copy of the string with those characters removed.
  498. * Works with US ASCII and UTF-8 encoded strings. JC
  499. */
  500. LL_COMMON_API std::string strip_invalid_xml(const std::string& input);
  501. /**
  502. * @brief Replace all control characters (0 <= c < 0x20) with replacement in
  503. * string. This is safe for utf-8
  504. *
  505. * @param [in,out] string the to modify. out value is the string
  506. * with zero non-printable characters.
  507. * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
  508. */
  509. LL_COMMON_API void replace_ascii_controlchars(
  510. std::basic_string<char>& string,
  511. char replacement);
  512. }
  513. ////////////////////////////////////////////////////////////
  514. // NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.
  515. // There is no LLWStringUtil::format implementation currently.
  516. // Calling thse for anything other than LLStringUtil will produce link errors.
  517. ////////////////////////////////////////////////////////////
  518. // static
  519. template<class T>
  520. S32 LLStringUtilBase<T>::compareStrings(const T* lhs, const T* rhs)
  521. {
  522. S32 result;
  523. if( lhs == rhs )
  524. {
  525. result = 0;
  526. }
  527. else
  528. if ( !lhs || !lhs[0] )
  529. {
  530. result = ((!rhs || !rhs[0]) ? 0 : 1);
  531. }
  532. else
  533. if ( !rhs || !rhs[0])
  534. {
  535. result = -1;
  536. }
  537. else
  538. {
  539. result = LLStringOps::collate(lhs, rhs);
  540. }
  541. return result;
  542. }
  543. //static
  544. template<class T>
  545. S32 LLStringUtilBase<T>::compareStrings(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs)
  546. {
  547. return LLStringOps::collate(lhs.c_str(), rhs.c_str());
  548. }
  549. // static
  550. template<class T>
  551. S32 LLStringUtilBase<T>::compareInsensitive(const T* lhs, const T* rhs )
  552. {
  553. S32 result;
  554. if( lhs == rhs )
  555. {
  556. result = 0;
  557. }
  558. else
  559. if ( !lhs || !lhs[0] )
  560. {
  561. result = ((!rhs || !rhs[0]) ? 0 : 1);
  562. }
  563. else
  564. if ( !rhs || !rhs[0] )
  565. {
  566. result = -1;
  567. }
  568. else
  569. {
  570. std::basic_string<T> lhs_string(lhs);
  571. std::basic_string<T> rhs_string(rhs);
  572. LLStringUtilBase<T>::toUpper(lhs_string);
  573. LLStringUtilBase<T>::toUpper(rhs_string);
  574. result = LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
  575. }
  576. return result;
  577. }
  578. //static
  579. template<class T>
  580. S32 LLStringUtilBase<T>::compareInsensitive(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs)
  581. {
  582. std::basic_string<T> lhs_string(lhs);
  583. std::basic_string<T> rhs_string(rhs);
  584. LLStringUtilBase<T>::toUpper(lhs_string);
  585. LLStringUtilBase<T>::toUpper(rhs_string);
  586. return LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
  587. }
  588. // Case sensitive comparison with good handling of numbers. Does not use current locale.
  589. // a.k.a. strdictcmp()
  590. //static
  591. template<class T>
  592. S32 LLStringUtilBase<T>::compareDict(const std::basic_string<T>& astr, const std::basic_string<T>& bstr)
  593. {
  594. const T* a = astr.c_str();
  595. const T* b = bstr.c_str();
  596. T ca, cb;
  597. S32 ai, bi, cnt = 0;
  598. S32 bias = 0;
  599. ca = *(a++);
  600. cb = *(b++);
  601. while( ca && cb ){
  602. if( bias==0 ){
  603. if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); bias--; }
  604. if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); bias++; }
  605. }else{
  606. if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); }
  607. if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); }
  608. }
  609. if( LLStringOps::isDigit(ca) ){
  610. if( cnt-->0 ){
  611. if( cb!=ca ) break;
  612. }else{
  613. if( !LLStringOps::isDigit(cb) ) break;
  614. for(ai=0; LLStringOps::isDigit(a[ai]); ai++);
  615. for(bi=0; LLStringOps::isDigit(b[bi]); bi++);
  616. if( ai<bi ){ ca=0; break; }
  617. if( bi<ai ){ cb=0; break; }
  618. if( ca!=cb ) break;
  619. cnt = ai;
  620. }
  621. }else if( ca!=cb ){ break;
  622. }
  623. ca = *(a++);
  624. cb = *(b++);
  625. }
  626. if( ca==cb ) ca += bias;
  627. return ca-cb;
  628. }
  629. // static
  630. template<class T>
  631. S32 LLStringUtilBase<T>::compareDictInsensitive(const std::basic_string<T>& astr, const std::basic_string<T>& bstr)
  632. {
  633. const T* a = astr.c_str();
  634. const T* b = bstr.c_str();
  635. T ca, cb;
  636. S32 ai, bi, cnt = 0;
  637. ca = *(a++);
  638. cb = *(b++);
  639. while( ca && cb ){
  640. if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); }
  641. if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); }
  642. if( LLStringOps::isDigit(ca) ){
  643. if( cnt-->0 ){
  644. if( cb!=ca ) break;
  645. }else{
  646. if( !LLStringOps::isDigit(cb) ) break;
  647. for(ai=0; LLStringOps::isDigit(a[ai]); ai++);
  648. for(bi=0; LLStringOps::isDigit(b[bi]); bi++);
  649. if( ai<bi ){ ca=0; break; }
  650. if( bi<ai ){ cb=0; break; }
  651. if( ca!=cb ) break;
  652. cnt = ai;
  653. }
  654. }else if( ca!=cb ){ break;
  655. }
  656. ca = *(a++);
  657. cb = *(b++);
  658. }
  659. return ca-cb;
  660. }
  661. // Puts compareDict() in a form appropriate for LL container classes to use for sorting.
  662. // static
  663. template<class T>
  664. BOOL LLStringUtilBase<T>::precedesDict( const std::basic_string<T>& a, const std::basic_string<T>& b )
  665. {
  666. if( a.size() && b.size() )
  667. {
  668. return (LLStringUtilBase<T>::compareDict(a.c_str(), b.c_str()) < 0);
  669. }
  670. else
  671. {
  672. return (!b.empty());
  673. }
  674. }
  675. //static
  676. template<class T>
  677. void LLStringUtilBase<T>::toUpper(std::basic_string<T>& string)
  678. {
  679. if( !string.empty() )
  680. {
  681. std::transform(
  682. string.begin(),
  683. string.end(),
  684. string.begin(),
  685. (T(*)(T)) &LLStringOps::toUpper);
  686. }
  687. }
  688. //static
  689. template<class T>
  690. void LLStringUtilBase<T>::toLower(std::basic_string<T>& string)
  691. {
  692. if( !string.empty() )
  693. {
  694. std::transform(
  695. string.begin(),
  696. string.end(),
  697. string.begin(),
  698. (T(*)(T)) &LLStringOps::toLower);
  699. }
  700. }
  701. //static
  702. template<class T>
  703. void LLStringUtilBase<T>::trimHead(std::basic_string<T>& string)
  704. {
  705. if( !string.empty() )
  706. {
  707. size_type i = 0;
  708. while( i < string.length() && LLStringOps::isSpace( string[i] ) )
  709. {
  710. i++;
  711. }
  712. string.erase(0, i);
  713. }
  714. }
  715. //static
  716. template<class T>
  717. void LLStringUtilBase<T>::trimTail(std::basic_string<T>& string)
  718. {
  719. if( string.size() )
  720. {
  721. size_type len = string.length();
  722. size_type i = len;
  723. while( i > 0 && LLStringOps::isSpace( string[i-1] ) )
  724. {
  725. i--;
  726. }
  727. string.erase( i, len - i );
  728. }
  729. }
  730. // Replace line feeds with carriage return-line feed pairs.
  731. //static
  732. template<class T>
  733. void LLStringUtilBase<T>::addCRLF(std::basic_string<T>& string)
  734. {
  735. const T LF = 10;
  736. const T CR = 13;
  737. // Count the number of line feeds
  738. size_type count = 0;
  739. size_type len = string.size();
  740. size_type i;
  741. for( i = 0; i < len; i++ )
  742. {
  743. if( string[i] == LF )
  744. {
  745. count++;
  746. }
  747. }
  748. // Insert a carriage return before each line feed
  749. if( count )
  750. {
  751. size_type size = len + count;
  752. T *t = new T[size];
  753. size_type j = 0;
  754. for( i = 0; i < len; ++i )
  755. {
  756. if( string[i] == LF )
  757. {
  758. t[j] = CR;
  759. ++j;
  760. }
  761. t[j] = string[i];
  762. ++j;
  763. }
  764. string.assign(t, size);
  765. delete[] t;
  766. }
  767. }
  768. // Remove all carriage returns
  769. //static
  770. template<class T>
  771. void LLStringUtilBase<T>::removeCRLF(std::basic_string<T>& string)
  772. {
  773. const T CR = 13;
  774. size_type cr_count = 0;
  775. size_type len = string.size();
  776. size_type i;
  777. for( i = 0; i < len - cr_count; i++ )
  778. {
  779. if( string[i+cr_count] == CR )
  780. {
  781. cr_count++;
  782. }
  783. string[i] = string[i+cr_count];
  784. }
  785. string.erase(i, cr_count);
  786. }
  787. //static
  788. template<class T>
  789. void LLStringUtilBase<T>::replaceChar( std::basic_string<T>& string, T target, T replacement )
  790. {
  791. size_type found_pos = 0;
  792. while( (found_pos = string.find(target, found_pos)) != std::basic_string<T>::npos )
  793. {
  794. string[found_pos] = replacement;
  795. found_pos++; // avoid infinite defeat if target == replacement
  796. }
  797. }
  798. //static
  799. template<class T>
  800. void LLStringUtilBase<T>::replaceString( std::basic_string<T>& string, std::basic_string<T> target, std::basic_string<T> replacement )
  801. {
  802. size_type found_pos = 0;
  803. while( (found_pos = string.find(target, found_pos)) != std::basic_string<T>::npos )
  804. {
  805. string.replace( found_pos, target.length(), replacement );
  806. found_pos += replacement.length(); // avoid infinite defeat if replacement contains target
  807. }
  808. }
  809. //static
  810. template<class T>
  811. void LLStringUtilBase<T>::replaceNonstandardASCII( std::basic_string<T>& string, T replacement )
  812. {
  813. const char LF = 10;
  814. const S8 MIN = 32;
  815. // const S8 MAX = 127;
  816. size_type len = string.size();
  817. for( size_type i = 0; i < len; i++ )
  818. {
  819. // No need to test MAX < mText[i] because we treat mText[i] as a signed char,
  820. // which has a max value of 127.
  821. if( ( S8(string[i]) < MIN ) && (string[i] != LF) )
  822. {
  823. string[i] = replacement;
  824. }
  825. }
  826. }
  827. //static
  828. template<class T>
  829. void LLStringUtilBase<T>::replaceTabsWithSpaces( std::basic_string<T>& str, size_type spaces_per_tab )
  830. {
  831. const T TAB = '\t';
  832. const T SPACE = ' ';
  833. std::basic_string<T> out_str;
  834. // Replace tabs with spaces
  835. for (size_type i = 0; i < str.length(); i++)
  836. {
  837. if (str[i] == TAB)
  838. {
  839. for (size_type j = 0; j < spaces_per_tab; j++)
  840. out_str += SPACE;
  841. }
  842. else
  843. {
  844. out_str += str[i];
  845. }
  846. }
  847. str = out_str;
  848. }
  849. //static
  850. template<class T>
  851. BOOL LLStringUtilBase<T>::containsNonprintable(const std::basic_string<T>& string)
  852. {
  853. const char MIN = 32;
  854. BOOL rv = FALSE;
  855. for (size_type i = 0; i < string.size(); i++)
  856. {
  857. if(string[i] < MIN)
  858. {
  859. rv = TRUE;
  860. break;
  861. }
  862. }
  863. return rv;
  864. }
  865. //static
  866. template<class T>
  867. void LLStringUtilBase<T>::stripNonprintable(std::basic_string<T>& string)
  868. {
  869. const char MIN = 32;
  870. size_type j = 0;
  871. if (string.empty())
  872. {
  873. return;
  874. }
  875. size_t src_size = string.size();
  876. char* c_string = new char[src_size + 1];
  877. if(c_string == NULL)
  878. {
  879. return;
  880. }
  881. copy(c_string, string.c_str(), src_size+1);
  882. char* write_head = &c_string[0];
  883. for (size_type i = 0; i < src_size; i++)
  884. {
  885. char* read_head = &string[i];
  886. write_head = &c_string[j];
  887. if(!(*read_head < MIN))
  888. {
  889. *write_head = *read_head;
  890. ++j;
  891. }
  892. }
  893. c_string[j]= '\0';
  894. string = c_string;
  895. delete []c_string;
  896. }
  897. template<class T>
  898. void LLStringUtilBase<T>::_makeASCII(std::basic_string<T>& string)
  899. {
  900. // Replace non-ASCII chars with LL_UNKNOWN_CHAR
  901. for (size_type i = 0; i < string.length(); i++)
  902. {
  903. if (string[i] > 0x7f)
  904. {
  905. string[i] = LL_UNKNOWN_CHAR;
  906. }
  907. }
  908. }
  909. // static
  910. template<class T>
  911. void LLStringUtilBase<T>::copy( T* dst, const T* src, size_type dst_size )
  912. {
  913. if( dst_size > 0 )
  914. {
  915. size_type min_len = 0;
  916. if( src )
  917. {
  918. min_len = llmin( dst_size - 1, strlen( src ) ); /* Flawfinder: ignore */
  919. memcpy(dst, src, min_len * sizeof(T)); /* Flawfinder: ignore */
  920. }
  921. dst[min_len] = '\0';
  922. }
  923. }
  924. // static
  925. template<class T>
  926. void LLStringUtilBase<T>::copyInto(std::basic_string<T>& dst, const std::basic_string<T>& src, size_type offset)
  927. {
  928. if ( offset == dst.length() )
  929. {
  930. // special case - append to end of string and avoid expensive
  931. // (when strings are large) string manipulations
  932. dst += src;
  933. }
  934. else
  935. {
  936. std::basic_string<T> tail = dst.substr(offset);
  937. dst = dst.substr(0, offset);
  938. dst += src;
  939. dst += tail;
  940. };
  941. }
  942. // True if this is the head of s.
  943. //static
  944. template<class T>
  945. BOOL LLStringUtilBase<T>::isHead( const std::basic_string<T>& string, const T* s )
  946. {
  947. if( string.empty() )
  948. {
  949. // Early exit
  950. return FALSE;
  951. }
  952. else
  953. {
  954. return (strncmp( s, string.c_str(), string.size() ) == 0);
  955. }
  956. }
  957. // static
  958. template<class T>
  959. bool LLStringUtilBase<T>::startsWith(
  960. const std::basic_string<T>& string,
  961. const std::basic_string<T>& substr)
  962. {
  963. if(string.empty() || (substr.empty())) return false;
  964. if(0 == string.find(substr)) return true;
  965. return false;
  966. }
  967. // static
  968. template<class T>
  969. bool LLStringUtilBase<T>::endsWith(
  970. const std::basic_string<T>& string,
  971. const std::basic_string<T>& substr)
  972. {
  973. if(string.empty() || (substr.empty())) return false;
  974. std::string::size_type idx = string.rfind(substr);
  975. if(std::string::npos == idx) return false;
  976. return (idx == (string.size() - substr.size()));
  977. }
  978. template<class T>
  979. BOOL LLStringUtilBase<T>::convertToBOOL(const std::basic_string<T>& string, BOOL& value)
  980. {
  981. if( string.empty() )
  982. {
  983. return FALSE;
  984. }
  985. std::basic_string<T> temp( string );
  986. trim(temp);
  987. if(
  988. (temp == "1") ||
  989. (temp == "T") ||
  990. (temp == "t") ||
  991. (temp == "TRUE") ||
  992. (temp == "true") ||
  993. (temp == "True") )
  994. {
  995. value = TRUE;
  996. return TRUE;
  997. }
  998. else
  999. if(
  1000. (temp == "0") ||
  1001. (temp == "F") ||
  1002. (temp == "f") ||
  1003. (temp == "FALSE") ||
  1004. (temp == "false") ||
  1005. (temp == "False") )
  1006. {
  1007. value = FALSE;
  1008. return TRUE;
  1009. }
  1010. return FALSE;
  1011. }
  1012. template<class T>
  1013. BOOL LLStringUtilBase<T>::convertToU8(const std::basic_string<T>& string, U8& value)
  1014. {
  1015. S32 value32 = 0;
  1016. BOOL success = convertToS32(string, value32);
  1017. if( success && (U8_MIN <= value32) && (value32 <= U8_MAX) )
  1018. {
  1019. value = (U8) value32;
  1020. return TRUE;
  1021. }
  1022. return FALSE;
  1023. }
  1024. template<class T>
  1025. BOOL LLStringUtilBase<T>::convertToS8(const std::basic_string<T>& string, S8& value)
  1026. {
  1027. S32 value32 = 0;
  1028. BOOL success = convertToS32(string, value32);
  1029. if( success && (S8_MIN <= value32) && (value32 <= S8_MAX) )
  1030. {
  1031. value = (S8) value32;
  1032. return TRUE;
  1033. }
  1034. return FALSE;
  1035. }
  1036. template<class T>
  1037. BOOL LLStringUtilBase<T>::convertToS16(const std::basic_string<T>& string, S16& value)
  1038. {
  1039. S32 value32 = 0;
  1040. BOOL success = convertToS32(string, value32);
  1041. if( success && (S16_MIN <= value32) && (value32 <= S16_MAX) )
  1042. {
  1043. value = (S16) value32;
  1044. return TRUE;
  1045. }
  1046. return FALSE;
  1047. }
  1048. template<class T>
  1049. BOOL LLStringUtilBase<T>::convertToU16(const std::basic_string<T>& string, U16& value)
  1050. {
  1051. S32 value32 = 0;
  1052. BOOL success = convertToS32(string, value32);
  1053. if( success && (U16_MIN <= value32) && (value32 <= U16_MAX) )
  1054. {
  1055. value = (U16) value32;
  1056. return TRUE;
  1057. }
  1058. return FALSE;
  1059. }
  1060. template<class T>
  1061. BOOL LLStringUtilBase<T>::convertToU32(const std::basic_string<T>& string, U32& value)
  1062. {
  1063. if( string.empty() )
  1064. {
  1065. return FALSE;
  1066. }
  1067. std::basic_string<T> temp( string );
  1068. trim(temp);
  1069. U32 v;
  1070. std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
  1071. if(i_stream >> v)
  1072. {
  1073. value = v;
  1074. return TRUE;
  1075. }
  1076. return FALSE;
  1077. }
  1078. template<class T>
  1079. BOOL LLStringUtilBase<T>::convertToS32(const std::basic_string<T>& string, S32& value)
  1080. {
  1081. if( string.empty() )
  1082. {
  1083. return FALSE;
  1084. }
  1085. std::basic_string<T> temp( string );
  1086. trim(temp);
  1087. S32 v;
  1088. std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
  1089. if(i_stream >> v)
  1090. {
  1091. //TODO: figure out overflow and underflow reporting here
  1092. //if((LONG_MAX == v) || (LONG_MIN == v))
  1093. //{
  1094. // // Underflow or overflow
  1095. // return FALSE;
  1096. //}
  1097. value = v;
  1098. return TRUE;
  1099. }
  1100. return FALSE;
  1101. }
  1102. template<class T>
  1103. BOOL LLStringUtilBase<T>::convertToF32(const std::basic_string<T>& string, F32& value)
  1104. {
  1105. F64 value64 = 0.0;
  1106. BOOL success = convertToF64(string, value64);
  1107. if( success && (-F32_MAX <= value64) && (value64 <= F32_MAX) )
  1108. {
  1109. value = (F32) value64;
  1110. return TRUE;
  1111. }
  1112. return FALSE;
  1113. }
  1114. template<class T>
  1115. BOOL LLStringUtilBase<T>::convertToF64(const std::basic_string<T>& string, F64& value)
  1116. {
  1117. if( string.empty() )
  1118. {
  1119. return FALSE;
  1120. }
  1121. std::basic_string<T> temp( string );
  1122. trim(temp);
  1123. F64 v;
  1124. std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
  1125. if(i_stream >> v)
  1126. {
  1127. //TODO: figure out overflow and underflow reporting here
  1128. //if( ((-HUGE_VAL == v) || (HUGE_VAL == v))) )
  1129. //{
  1130. // // Underflow or overflow
  1131. // return FALSE;
  1132. //}
  1133. value = v;
  1134. return TRUE;
  1135. }
  1136. return FALSE;
  1137. }
  1138. template<class T>
  1139. void LLStringUtilBase<T>::truncate(std::basic_string<T>& string, size_type count)
  1140. {
  1141. size_type cur_size = string.size();
  1142. string.resize(count < cur_size ? count : cur_size);
  1143. }
  1144. #endif // LL_STRING_H