PageRenderTime 86ms CodeModel.GetById 12ms app.highlight 64ms RepoModel.GetById 1ms app.codeStats 0ms

/indra/llcommon/llstring.h

https://bitbucket.org/lindenlab/viewer-beta/
C++ Header | 1326 lines | 876 code | 182 blank | 268 comment | 167 complexity | 5bedd0755951dd9dd854114c1f01fc70 MD5 | raw file
   1/** 
   2 * @file llstring.h
   3 * @brief String utility functions and std::string class.
   4 *
   5 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
   6 * Second Life Viewer Source Code
   7 * Copyright (C) 2010, Linden Research, Inc.
   8 * 
   9 * This library is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU Lesser General Public
  11 * License as published by the Free Software Foundation;
  12 * version 2.1 of the License only.
  13 * 
  14 * This library is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * Lesser General Public License for more details.
  18 * 
  19 * You should have received a copy of the GNU Lesser General Public
  20 * License along with this library; if not, write to the Free Software
  21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  22 * 
  23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
  24 * $/LicenseInfo$
  25 */
  26
  27#ifndef LL_LLSTRING_H
  28#define LL_LLSTRING_H
  29
  30#include <string>
  31#include <cstdio>
  32#include <locale>
  33#include <iomanip>
  34#include "llsd.h"
  35#include "llfasttimer.h"
  36
  37#if LL_LINUX || LL_SOLARIS
  38#include <wctype.h>
  39#include <wchar.h>
  40#endif
  41
  42#include <string.h>
  43
  44#if LL_SOLARIS
  45// stricmp and strnicmp do not exist on Solaris:
  46#define stricmp strcasecmp
  47#define strnicmp strncasecmp
  48#endif
  49
  50const char LL_UNKNOWN_CHAR = '?';
  51
  52#if LL_DARWIN || LL_LINUX || LL_SOLARIS
  53// Template specialization of char_traits for U16s. Only necessary on Mac and Linux (exists on Windows already)
  54#include <cstring>
  55
  56namespace std
  57{
  58template<>
  59struct char_traits<U16>
  60{
  61	typedef U16 		char_type;
  62	typedef int 	    int_type;
  63	typedef streampos 	pos_type;
  64	typedef streamoff 	off_type;
  65	typedef mbstate_t 	state_type;
  66	
  67	static void 
  68		assign(char_type& __c1, const char_type& __c2)
  69	{ __c1 = __c2; }
  70	
  71	static bool 
  72		eq(const char_type& __c1, const char_type& __c2)
  73	{ return __c1 == __c2; }
  74	
  75	static bool 
  76		lt(const char_type& __c1, const char_type& __c2)
  77	{ return __c1 < __c2; }
  78	
  79	static int 
  80		compare(const char_type* __s1, const char_type* __s2, size_t __n)
  81	{ return memcmp(__s1, __s2, __n * sizeof(char_type)); }
  82	
  83	static size_t
  84		length(const char_type* __s)
  85	{
  86		const char_type *cur_char = __s;
  87		while (*cur_char != 0)
  88		{
  89			++cur_char;
  90		}
  91		return cur_char - __s;
  92	}
  93	
  94	static const char_type* 
  95		find(const char_type* __s, size_t __n, const char_type& __a)
  96	{ return static_cast<const char_type*>(memchr(__s, __a, __n * sizeof(char_type))); }
  97	
  98	static char_type* 
  99		move(char_type* __s1, const char_type* __s2, size_t __n)
 100	{ return static_cast<char_type*>(memmove(__s1, __s2, __n * sizeof(char_type))); }
 101	
 102	static char_type* 
 103		copy(char_type* __s1, const char_type* __s2, size_t __n)
 104	{  return static_cast<char_type*>(memcpy(__s1, __s2, __n * sizeof(char_type))); }	/* Flawfinder: ignore */
 105	
 106	static char_type* 
 107		assign(char_type* __s, size_t __n, char_type __a)
 108	{ 
 109		// This isn't right.
 110		//return static_cast<char_type*>(memset(__s, __a, __n * sizeof(char_type))); 
 111		
 112		// I don't think there's a standard 'memset' for 16-bit values.
 113		// Do this the old-fashioned way.
 114		
 115		size_t __i;
 116		for(__i = 0; __i < __n; __i++)
 117		{
 118			__s[__i] = __a;
 119		}
 120		return __s; 
 121	}
 122	
 123	static char_type 
 124		to_char_type(const int_type& __c)
 125	{ return static_cast<char_type>(__c); }
 126	
 127	static int_type 
 128		to_int_type(const char_type& __c)
 129	{ return static_cast<int_type>(__c); }
 130	
 131	static bool 
 132		eq_int_type(const int_type& __c1, const int_type& __c2)
 133	{ return __c1 == __c2; }
 134	
 135	static int_type 
 136		eof() { return static_cast<int_type>(EOF); }
 137	
 138	static int_type 
 139		not_eof(const int_type& __c)
 140      { return (__c == eof()) ? 0 : __c; }
 141  };
 142};
 143#endif
 144
 145class LL_COMMON_API LLStringOps
 146{
 147private:
 148	static long sPacificTimeOffset;
 149	static long sLocalTimeOffset;
 150	static bool sPacificDaylightTime;
 151
 152	static std::map<std::string, std::string> datetimeToCodes;
 153
 154public:
 155	static std::vector<std::string> sWeekDayList;
 156	static std::vector<std::string> sWeekDayShortList;
 157	static std::vector<std::string> sMonthList;
 158	static std::vector<std::string> sMonthShortList;
 159	static std::string sDayFormat;
 160
 161	static std::string sAM;
 162	static std::string sPM;
 163
 164	static char toUpper(char elem) { return toupper((unsigned char)elem); }
 165	static llwchar toUpper(llwchar elem) { return towupper(elem); }
 166	
 167	static char toLower(char elem) { return tolower((unsigned char)elem); }
 168	static llwchar toLower(llwchar elem) { return towlower(elem); }
 169
 170	static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; }
 171	static bool isSpace(llwchar elem) { return iswspace(elem) != 0; }
 172
 173	static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; }
 174	static bool isUpper(llwchar elem) { return iswupper(elem) != 0; }
 175
 176	static bool isLower(char elem) { return islower((unsigned char)elem) != 0; }
 177	static bool isLower(llwchar elem) { return iswlower(elem) != 0; }
 178
 179	static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; }
 180	static bool isDigit(llwchar a) { return iswdigit(a) != 0; }
 181
 182	static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; }
 183	static bool isPunct(llwchar a) { return iswpunct(a) != 0; }
 184
 185	static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; }
 186	static bool isAlnum(llwchar a) { return iswalnum(a) != 0; }
 187
 188	static S32	collate(const char* a, const char* b) { return strcoll(a, b); }
 189	static S32	collate(const llwchar* a, const llwchar* b);
 190
 191	static void setupDatetimeInfo(bool pacific_daylight_time);
 192
 193	static void setupWeekDaysNames(const std::string& data);
 194	static void setupWeekDaysShortNames(const std::string& data);
 195	static void setupMonthNames(const std::string& data);
 196	static void setupMonthShortNames(const std::string& data);
 197	static void setupDayFormat(const std::string& data);
 198
 199
 200	static long getPacificTimeOffset(void) { return sPacificTimeOffset;}
 201	static long getLocalTimeOffset(void) { return sLocalTimeOffset;}
 202	// Is the Pacific time zone (aka server time zone)
 203	// currently in daylight savings time?
 204	static bool getPacificDaylightTime(void) { return sPacificDaylightTime;}
 205
 206	static std::string getDatetimeCode (std::string key);
 207};
 208
 209/**
 210 * @brief Return a string constructed from in without crashing if the
 211 * pointer is NULL.
 212 */
 213LL_COMMON_API std::string ll_safe_string(const char* in);
 214LL_COMMON_API std::string ll_safe_string(const char* in, S32 maxlen);
 215
 216
 217// Allowing assignments from non-strings into format_map_t is apparently
 218// *really* error-prone, so subclass std::string with just basic c'tors.
 219class LLFormatMapString
 220{
 221public:
 222	LLFormatMapString() {};
 223	LLFormatMapString(const char* s) : mString(ll_safe_string(s)) {};
 224	LLFormatMapString(const std::string& s) : mString(s) {};
 225	operator std::string() const { return mString; }
 226	bool operator<(const LLFormatMapString& rhs) const { return mString < rhs.mString; }
 227	std::size_t length() const { return mString.length(); }
 228	
 229private:
 230	std::string mString;
 231};
 232
 233template <class T>
 234class LLStringUtilBase
 235{
 236private:
 237	static std::string sLocale;
 238
 239public:
 240	typedef typename std::basic_string<T>::size_type size_type;
 241	
 242public:
 243	/////////////////////////////////////////////////////////////////////////////////////////
 244	// Static Utility functions that operate on std::strings
 245
 246	static const std::basic_string<T> null;
 247	
 248	typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
 249	LL_COMMON_API static void getTokens(const std::basic_string<T>& instr, std::vector<std::basic_string<T> >& tokens, const std::basic_string<T>& delims);
 250	LL_COMMON_API static void formatNumber(std::basic_string<T>& numStr, std::basic_string<T> decimals);
 251	LL_COMMON_API static bool formatDatetime(std::basic_string<T>& replacement, std::basic_string<T> token, std::basic_string<T> param, S32 secFromEpoch);
 252	LL_COMMON_API static S32 format(std::basic_string<T>& s, const format_map_t& substitutions);
 253	LL_COMMON_API static S32 format(std::basic_string<T>& s, const LLSD& substitutions);
 254	LL_COMMON_API static bool simpleReplacement(std::basic_string<T>& replacement, std::basic_string<T> token, const format_map_t& substitutions);
 255	LL_COMMON_API static bool simpleReplacement(std::basic_string<T>& replacement, std::basic_string<T> token, const LLSD& substitutions);
 256	LL_COMMON_API static void setLocale (std::string inLocale);
 257	LL_COMMON_API static std::string getLocale (void);
 258	
 259	static bool isValidIndex(const std::basic_string<T>& string, size_type i)
 260	{
 261		return !string.empty() && (0 <= i) && (i <= string.size());
 262	}
 263
 264	static void	trimHead(std::basic_string<T>& string);
 265	static void	trimTail(std::basic_string<T>& string);
 266	static void	trim(std::basic_string<T>& string)	{ trimHead(string); trimTail(string); }
 267	static void truncate(std::basic_string<T>& string, size_type count);
 268
 269	static void	toUpper(std::basic_string<T>& string);
 270	static void	toLower(std::basic_string<T>& string);
 271	
 272	// True if this is the head of s.
 273	static BOOL	isHead( const std::basic_string<T>& string, const T* s ); 
 274
 275	/**
 276	 * @brief Returns true if string starts with substr
 277	 *
 278	 * If etither string or substr are empty, this method returns false.
 279	 */
 280	static bool startsWith(
 281		const std::basic_string<T>& string,
 282		const std::basic_string<T>& substr);
 283
 284	/**
 285	 * @brief Returns true if string ends in substr
 286	 *
 287	 * If etither string or substr are empty, this method returns false.
 288	 */
 289	static bool endsWith(
 290		const std::basic_string<T>& string,
 291		const std::basic_string<T>& substr);
 292
 293	static void	addCRLF(std::basic_string<T>& string);
 294	static void	removeCRLF(std::basic_string<T>& string);
 295
 296	static void	replaceTabsWithSpaces( std::basic_string<T>& string, size_type spaces_per_tab );
 297	static void	replaceNonstandardASCII( std::basic_string<T>& string, T replacement );
 298	static void	replaceChar( std::basic_string<T>& string, T target, T replacement );
 299	static void replaceString( std::basic_string<T>& string, std::basic_string<T> target, std::basic_string<T> replacement );
 300	
 301	static BOOL	containsNonprintable(const std::basic_string<T>& string);
 302	static void	stripNonprintable(std::basic_string<T>& string);
 303
 304	/**
 305	 * @brief Unsafe way to make ascii characters. You should probably
 306	 * only call this when interacting with the host operating system.
 307	 * The 1 byte std::string does not work correctly.
 308	 * The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII
 309	 * should work.
 310	 */
 311	static void _makeASCII(std::basic_string<T>& string);
 312
 313	// Conversion to other data types
 314	static BOOL	convertToBOOL(const std::basic_string<T>& string, BOOL& value);
 315	static BOOL	convertToU8(const std::basic_string<T>& string, U8& value);
 316	static BOOL	convertToS8(const std::basic_string<T>& string, S8& value);
 317	static BOOL	convertToS16(const std::basic_string<T>& string, S16& value);
 318	static BOOL	convertToU16(const std::basic_string<T>& string, U16& value);
 319	static BOOL	convertToU32(const std::basic_string<T>& string, U32& value);
 320	static BOOL	convertToS32(const std::basic_string<T>& string, S32& value);
 321	static BOOL	convertToF32(const std::basic_string<T>& string, F32& value);
 322	static BOOL	convertToF64(const std::basic_string<T>& string, F64& value);
 323
 324	/////////////////////////////////////////////////////////////////////////////////////////
 325	// Utility functions for working with char*'s and strings
 326
 327	// Like strcmp but also handles empty strings. Uses
 328	// current locale.
 329	static S32		compareStrings(const T* lhs, const T* rhs);
 330	static S32		compareStrings(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs);
 331	
 332	// case insensitive version of above. Uses current locale on
 333	// Win32, and falls back to a non-locale aware comparison on
 334	// Linux.
 335	static S32		compareInsensitive(const T* lhs, const T* rhs);
 336	static S32		compareInsensitive(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs);
 337
 338	// Case sensitive comparison with good handling of numbers.  Does not use current locale.
 339	// a.k.a. strdictcmp()
 340	static S32		compareDict(const std::basic_string<T>& a, const std::basic_string<T>& b);
 341
 342	// Case *in*sensitive comparison with good handling of numbers.  Does not use current locale.
 343	// a.k.a. strdictcmp()
 344	static S32		compareDictInsensitive(const std::basic_string<T>& a, const std::basic_string<T>& b);
 345
 346	// Puts compareDict() in a form appropriate for LL container classes to use for sorting.
 347	static BOOL		precedesDict( const std::basic_string<T>& a, const std::basic_string<T>& b );
 348
 349	// A replacement for strncpy.
 350	// If the dst buffer is dst_size bytes long or more, ensures that dst is null terminated and holds
 351	// up to dst_size-1 characters of src.
 352	static void		copy(T* dst, const T* src, size_type dst_size);
 353	
 354	// Copies src into dst at a given offset.  
 355	static void		copyInto(std::basic_string<T>& dst, const std::basic_string<T>& src, size_type offset);
 356	
 357	static bool		isPartOfWord(T c) { return (c == (T)'_') || LLStringOps::isAlnum(c); }
 358
 359
 360#ifdef _DEBUG	
 361	LL_COMMON_API static void		testHarness();
 362#endif
 363
 364private:
 365	LL_COMMON_API static size_type getSubstitution(const std::basic_string<T>& instr, size_type& start, std::vector<std::basic_string<T> >& tokens);
 366};
 367
 368template<class T> const std::basic_string<T> LLStringUtilBase<T>::null;
 369template<class T> std::string LLStringUtilBase<T>::sLocale;
 370
 371typedef LLStringUtilBase<char> LLStringUtil;
 372typedef LLStringUtilBase<llwchar> LLWStringUtil;
 373typedef std::basic_string<llwchar> LLWString;
 374
 375//@ Use this where we want to disallow input in the form of "foo"
 376//  This is used to catch places where english text is embedded in the code
 377//  instead of in a translatable XUI file.
 378class LLStringExplicit : public std::string
 379{
 380public:
 381	explicit LLStringExplicit(const char* s) : std::string(s) {}
 382	LLStringExplicit(const std::string& s) : std::string(s) {}
 383	LLStringExplicit(const std::string& s, size_type pos, size_type n = std::string::npos) : std::string(s, pos, n) {}
 384};
 385
 386struct LLDictionaryLess
 387{
 388public:
 389	bool operator()(const std::string& a, const std::string& b)
 390	{
 391		return (LLStringUtil::precedesDict(a, b) ? true : false);
 392	}
 393};
 394
 395
 396/**
 397 * Simple support functions
 398 */
 399
 400/**
 401 * @brief chop off the trailing characters in a string.
 402 *
 403 * This function works on bytes rather than glyphs, so this will
 404 * incorrectly truncate non-single byte strings.
 405 * Use utf8str_truncate() for utf8 strings
 406 * @return a copy of in string minus the trailing count bytes.
 407 */
 408inline std::string chop_tail_copy(
 409	const std::string& in,
 410	std::string::size_type count)
 411{
 412	return std::string(in, 0, in.length() - count);
 413}
 414
 415/**
 416 * @brief This translates a nybble stored as a hex value from 0-f back
 417 * to a nybble in the low order bits of the return byte.
 418 */
 419LL_COMMON_API U8 hex_as_nybble(char hex);
 420
 421/**
 422 * @brief read the contents of a file into a string.
 423 *
 424 * Since this function has no concept of character encoding, most
 425 * anything you do with this method ill-advised. Please avoid.
 426 * @param str [out] The string which will have.
 427 * @param filename The full name of the file to read.
 428 * @return Returns true on success. If false, str is unmodified.
 429 */
 430LL_COMMON_API bool _read_file_into_string(std::string& str, const std::string& filename);
 431LL_COMMON_API bool iswindividual(llwchar elem);
 432
 433/**
 434 * Unicode support
 435 */
 436
 437// Make the incoming string a utf8 string. Replaces any unknown glyph
 438// with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
 439// of the data may not be recovered.
 440LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
 441
 442//
 443// We should never use UTF16 except when communicating with Win32!
 444//
 445typedef std::basic_string<U16> llutf16string;
 446
 447LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
 448LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
 449
 450LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
 451LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
 452
 453LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
 454LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
 455
 456LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
 457LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
 458// Same function, better name. JC
 459inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
 460
 461//
 462LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);
 463
 464LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
 465LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
 466
 467LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
 468LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
 469
 470// Length of this UTF32 string in bytes when transformed to UTF8
 471LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); 
 472
 473// Length in bytes of this wide char in a UTF8 string
 474LL_COMMON_API S32 wchar_utf8_length(const llwchar wc); 
 475
 476LL_COMMON_API std::string utf8str_tolower(const std::string& utf8str);
 477
 478// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
 479LL_COMMON_API S32 utf16str_wstring_length(const llutf16string &utf16str, S32 len);
 480
 481// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
 482LL_COMMON_API S32 wstring_utf16_length(const LLWString & wstr, S32 woffset, S32 wlen);
 483
 484// Length in wstring (i.e., llwchar count) of a part of a wstring specified by utf16 length (i.e., utf16 units.)
 485LL_COMMON_API S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, S32 woffset, S32 utf16_length, BOOL *unaligned = NULL);
 486
 487/**
 488 * @brief Properly truncate a utf8 string to a maximum byte count.
 489 * 
 490 * The returned string may be less than max_len if the truncation
 491 * happens in the middle of a glyph. If max_len is longer than the
 492 * string passed in, the return value == utf8str.
 493 * @param utf8str A valid utf8 string to truncate.
 494 * @param max_len The maximum number of bytes in the return value.
 495 * @return Returns a valid utf8 string with byte count <= max_len.
 496 */
 497LL_COMMON_API std::string utf8str_truncate(const std::string& utf8str, const S32 max_len);
 498
 499LL_COMMON_API std::string utf8str_trim(const std::string& utf8str);
 500
 501LL_COMMON_API S32 utf8str_compare_insensitive(
 502	const std::string& lhs,
 503	const std::string& rhs);
 504
 505/**
 506 * @brief Replace all occurences of target_char with replace_char
 507 *
 508 * @param utf8str A utf8 string to process.
 509 * @param target_char The wchar to be replaced
 510 * @param replace_char The wchar which is written on replace
 511 */
 512LL_COMMON_API std::string utf8str_substChar(
 513	const std::string& utf8str,
 514	const llwchar target_char,
 515	const llwchar replace_char);
 516
 517LL_COMMON_API std::string utf8str_makeASCII(const std::string& utf8str);
 518
 519// Hack - used for evil notecards.
 520LL_COMMON_API std::string mbcsstring_makeASCII(const std::string& str); 
 521
 522LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
 523
 524
 525#if LL_WINDOWS
 526/* @name Windows string helpers
 527 */
 528//@{
 529
 530/**
 531 * @brief Implementation the expected snprintf interface.
 532 *
 533 * If the size of the passed in buffer is not large enough to hold the string,
 534 * two bad things happen:
 535 * 1. resulting formatted string is NOT null terminated
 536 * 2. Depending on the platform, the return value could be a) the required
 537 *    size of the buffer to copy the entire formatted string or b) -1.
 538 *    On Windows with VS.Net 2003, it returns -1 e.g. 
 539 *
 540 * safe_snprintf always adds a NULL terminator so that the caller does not
 541 * need to check for return value or need to add the NULL terminator.
 542 * It does not, however change the return value - to let the caller know
 543 * that the passed in buffer size was not large enough to hold the
 544 * formatted string.
 545 *
 546 */
 547
 548// Deal with the differeneces on Windows
 549namespace snprintf_hack
 550{
 551	LL_COMMON_API int snprintf(char *str, size_t size, const char *format, ...);
 552}
 553
 554using snprintf_hack::snprintf;
 555
 556/**
 557 * @brief Convert a wide string to std::string
 558 *
 559 * This replaces the unsafe W2A macro from ATL.
 560 */
 561LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
 562
 563/**
 564 * Converts a string to wide string.
 565 *
 566 * It will allocate memory for result string with "new []". Don't forget to release it with "delete []".
 567 */
 568LL_COMMON_API wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page);
 569
 570/**
 571 * Converts incoming string into urf8 string
 572 *
 573 */
 574LL_COMMON_API std::string ll_convert_string_to_utf8_string(const std::string& in);
 575
 576//@}
 577#endif // LL_WINDOWS
 578
 579/**
 580 * Many of the 'strip' and 'replace' methods of LLStringUtilBase need
 581 * specialization to work with the signed char type.
 582 * Sadly, it is not possible (AFAIK) to specialize a single method of
 583 * a template class.
 584 * That stuff should go here.
 585 */
 586namespace LLStringFn
 587{
 588	/**
 589	 * @brief Replace all non-printable characters with replacement in
 590	 * string.
 591	 * NOTE - this will zap non-ascii
 592	 *
 593	 * @param [in,out] string the to modify. out value is the string
 594	 * with zero non-printable characters.
 595	 * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
 596	 */
 597	LL_COMMON_API void replace_nonprintable_in_ascii(
 598		std::basic_string<char>& string,
 599		char replacement);
 600
 601
 602	/**
 603	 * @brief Replace all non-printable characters and pipe characters
 604	 * with replacement in a string.
 605	 * NOTE - this will zap non-ascii
 606	 *
 607	 * @param [in,out] the string to modify. out value is the string
 608	 * with zero non-printable characters and zero pipe characters.
 609	 * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
 610	 */
 611	LL_COMMON_API void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
 612									   char replacement);
 613
 614
 615	/**
 616	 * @brief Remove all characters that are not allowed in XML 1.0.
 617	 * Returns a copy of the string with those characters removed.
 618	 * Works with US ASCII and UTF-8 encoded strings.  JC
 619	 */
 620	LL_COMMON_API std::string strip_invalid_xml(const std::string& input);
 621
 622
 623	/**
 624	 * @brief Replace all control characters (0 <= c < 0x20) with replacement in
 625	 * string.   This is safe for utf-8
 626	 *
 627	 * @param [in,out] string the to modify. out value is the string
 628	 * with zero non-printable characters.
 629	 * @param The replacement character. use LL_UNKNOWN_CHAR if unsure.
 630	 */
 631	LL_COMMON_API void replace_ascii_controlchars(
 632		std::basic_string<char>& string,
 633		char replacement);
 634}
 635
 636////////////////////////////////////////////////////////////
 637// NOTE: LLStringUtil::format, getTokens, and support functions moved to llstring.cpp.
 638// There is no LLWStringUtil::format implementation currently.
 639// Calling thse for anything other than LLStringUtil will produce link errors.
 640
 641////////////////////////////////////////////////////////////
 642
 643
 644// static
 645template<class T> 
 646S32 LLStringUtilBase<T>::compareStrings(const T* lhs, const T* rhs)
 647{	
 648	S32 result;
 649	if( lhs == rhs )
 650	{
 651		result = 0;
 652	}
 653	else
 654	if ( !lhs || !lhs[0] )
 655	{
 656		result = ((!rhs || !rhs[0]) ? 0 : 1);
 657	}
 658	else
 659	if ( !rhs || !rhs[0])
 660	{
 661		result = -1;
 662	}
 663	else
 664	{
 665		result = LLStringOps::collate(lhs, rhs);
 666	}
 667	return result;
 668}
 669
 670//static 
 671template<class T> 
 672S32 LLStringUtilBase<T>::compareStrings(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs)
 673{
 674	return LLStringOps::collate(lhs.c_str(), rhs.c_str());
 675}
 676
 677// static
 678template<class T> 
 679S32 LLStringUtilBase<T>::compareInsensitive(const T* lhs, const T* rhs )
 680{
 681	S32 result;
 682	if( lhs == rhs )
 683	{
 684		result = 0;
 685	}
 686	else
 687	if ( !lhs || !lhs[0] )
 688	{
 689		result = ((!rhs || !rhs[0]) ? 0 : 1);
 690	}
 691	else
 692	if ( !rhs || !rhs[0] )
 693	{
 694		result = -1;
 695	}
 696	else
 697	{
 698		std::basic_string<T> lhs_string(lhs);
 699		std::basic_string<T> rhs_string(rhs);
 700		LLStringUtilBase<T>::toUpper(lhs_string);
 701		LLStringUtilBase<T>::toUpper(rhs_string);
 702		result = LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
 703	}
 704	return result;
 705}
 706
 707//static 
 708template<class T> 
 709S32 LLStringUtilBase<T>::compareInsensitive(const std::basic_string<T>& lhs, const std::basic_string<T>& rhs)
 710{
 711	std::basic_string<T> lhs_string(lhs);
 712	std::basic_string<T> rhs_string(rhs);
 713	LLStringUtilBase<T>::toUpper(lhs_string);
 714	LLStringUtilBase<T>::toUpper(rhs_string);
 715	return LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
 716}
 717
 718// Case sensitive comparison with good handling of numbers.  Does not use current locale.
 719// a.k.a. strdictcmp()
 720
 721//static 
 722template<class T>
 723S32 LLStringUtilBase<T>::compareDict(const std::basic_string<T>& astr, const std::basic_string<T>& bstr)
 724{
 725	const T* a = astr.c_str();
 726	const T* b = bstr.c_str();
 727	T ca, cb;
 728	S32 ai, bi, cnt = 0;
 729	S32 bias = 0;
 730
 731	ca = *(a++);
 732	cb = *(b++);
 733	while( ca && cb ){
 734		if( bias==0 ){
 735			if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); bias--; }
 736			if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); bias++; }
 737		}else{
 738			if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); }
 739			if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); }
 740		}
 741		if( LLStringOps::isDigit(ca) ){
 742			if( cnt-->0 ){
 743				if( cb!=ca ) break;
 744			}else{
 745				if( !LLStringOps::isDigit(cb) ) break;
 746				for(ai=0; LLStringOps::isDigit(a[ai]); ai++);
 747				for(bi=0; LLStringOps::isDigit(b[bi]); bi++);
 748				if( ai<bi ){ ca=0; break; }
 749				if( bi<ai ){ cb=0; break; }
 750				if( ca!=cb ) break;
 751				cnt = ai;
 752			}
 753		}else if( ca!=cb ){   break;
 754		}
 755		ca = *(a++);
 756		cb = *(b++);
 757	}
 758	if( ca==cb ) ca += bias;
 759	return ca-cb;
 760}
 761
 762// static
 763template<class T>
 764S32 LLStringUtilBase<T>::compareDictInsensitive(const std::basic_string<T>& astr, const std::basic_string<T>& bstr)
 765{
 766	const T* a = astr.c_str();
 767	const T* b = bstr.c_str();
 768	T ca, cb;
 769	S32 ai, bi, cnt = 0;
 770
 771	ca = *(a++);
 772	cb = *(b++);
 773	while( ca && cb ){
 774		if( LLStringOps::isUpper(ca) ){ ca = LLStringOps::toLower(ca); }
 775		if( LLStringOps::isUpper(cb) ){ cb = LLStringOps::toLower(cb); }
 776		if( LLStringOps::isDigit(ca) ){
 777			if( cnt-->0 ){
 778				if( cb!=ca ) break;
 779			}else{
 780				if( !LLStringOps::isDigit(cb) ) break;
 781				for(ai=0; LLStringOps::isDigit(a[ai]); ai++);
 782				for(bi=0; LLStringOps::isDigit(b[bi]); bi++);
 783				if( ai<bi ){ ca=0; break; }
 784				if( bi<ai ){ cb=0; break; }
 785				if( ca!=cb ) break;
 786				cnt = ai;
 787			}
 788		}else if( ca!=cb ){   break;
 789		}
 790		ca = *(a++);
 791		cb = *(b++);
 792	}
 793	return ca-cb;
 794}
 795
 796// Puts compareDict() in a form appropriate for LL container classes to use for sorting.
 797// static 
 798template<class T> 
 799BOOL LLStringUtilBase<T>::precedesDict( const std::basic_string<T>& a, const std::basic_string<T>& b )
 800{
 801	if( a.size() && b.size() )
 802	{
 803		return (LLStringUtilBase<T>::compareDict(a.c_str(), b.c_str()) < 0);
 804	}
 805	else
 806	{
 807		return (!b.empty());
 808	}
 809}
 810
 811//static
 812template<class T> 
 813void LLStringUtilBase<T>::toUpper(std::basic_string<T>& string)	
 814{ 
 815	if( !string.empty() )
 816	{ 
 817		std::transform(
 818			string.begin(),
 819			string.end(),
 820			string.begin(),
 821			(T(*)(T)) &LLStringOps::toUpper);
 822	}
 823}
 824
 825//static
 826template<class T> 
 827void LLStringUtilBase<T>::toLower(std::basic_string<T>& string)
 828{ 
 829	if( !string.empty() )
 830	{ 
 831		std::transform(
 832			string.begin(),
 833			string.end(),
 834			string.begin(),
 835			(T(*)(T)) &LLStringOps::toLower);
 836	}
 837}
 838
 839//static
 840template<class T> 
 841void LLStringUtilBase<T>::trimHead(std::basic_string<T>& string)
 842{			
 843	if( !string.empty() )
 844	{
 845		size_type i = 0;
 846		while( i < string.length() && LLStringOps::isSpace( string[i] ) )
 847		{
 848			i++;
 849		}
 850		string.erase(0, i);
 851	}
 852}
 853
 854//static
 855template<class T> 
 856void LLStringUtilBase<T>::trimTail(std::basic_string<T>& string)
 857{			
 858	if( string.size() )
 859	{
 860		size_type len = string.length();
 861		size_type i = len;
 862		while( i > 0 && LLStringOps::isSpace( string[i-1] ) )
 863		{
 864			i--;
 865		}
 866
 867		string.erase( i, len - i );
 868	}
 869}
 870
 871
 872// Replace line feeds with carriage return-line feed pairs.
 873//static
 874template<class T>
 875void LLStringUtilBase<T>::addCRLF(std::basic_string<T>& string)
 876{
 877	const T LF = 10;
 878	const T CR = 13;
 879
 880	// Count the number of line feeds
 881	size_type count = 0;
 882	size_type len = string.size();
 883	size_type i;
 884	for( i = 0; i < len; i++ )
 885	{
 886		if( string[i] == LF )
 887		{
 888			count++;
 889		}
 890	}
 891
 892	// Insert a carriage return before each line feed
 893	if( count )
 894	{
 895		size_type size = len + count;
 896		T *t = new T[size];
 897		size_type j = 0;
 898		for( i = 0; i < len; ++i )
 899		{
 900			if( string[i] == LF )
 901			{
 902				t[j] = CR;
 903				++j;
 904			}
 905			t[j] = string[i];
 906			++j;
 907		}
 908
 909		string.assign(t, size);
 910		delete[] t;
 911	}
 912}
 913
 914// Remove all carriage returns
 915//static
 916template<class T> 
 917void LLStringUtilBase<T>::removeCRLF(std::basic_string<T>& string)
 918{
 919	const T CR = 13;
 920
 921	size_type cr_count = 0;
 922	size_type len = string.size();
 923	size_type i;
 924	for( i = 0; i < len - cr_count; i++ )
 925	{
 926		if( string[i+cr_count] == CR )
 927		{
 928			cr_count++;
 929		}
 930
 931		string[i] = string[i+cr_count];
 932	}
 933	string.erase(i, cr_count);
 934}
 935
 936//static
 937template<class T> 
 938void LLStringUtilBase<T>::replaceChar( std::basic_string<T>& string, T target, T replacement )
 939{
 940	size_type found_pos = 0;
 941	while( (found_pos = string.find(target, found_pos)) != std::basic_string<T>::npos ) 
 942	{
 943		string[found_pos] = replacement;
 944		found_pos++; // avoid infinite defeat if target == replacement
 945	}
 946}
 947
 948//static
 949template<class T> 
 950void LLStringUtilBase<T>::replaceString( std::basic_string<T>& string, std::basic_string<T> target, std::basic_string<T> replacement )
 951{
 952	size_type found_pos = 0;
 953	while( (found_pos = string.find(target, found_pos)) != std::basic_string<T>::npos )
 954	{
 955		string.replace( found_pos, target.length(), replacement );
 956		found_pos += replacement.length(); // avoid infinite defeat if replacement contains target
 957	}
 958}
 959
 960//static
 961template<class T> 
 962void LLStringUtilBase<T>::replaceNonstandardASCII( std::basic_string<T>& string, T replacement )
 963{
 964	const char LF = 10;
 965	const S8 MIN = 32;
 966//	const S8 MAX = 127;
 967
 968	size_type len = string.size();
 969	for( size_type i = 0; i < len; i++ )
 970	{
 971		// No need to test MAX < mText[i] because we treat mText[i] as a signed char,
 972		// which has a max value of 127.
 973		if( ( S8(string[i]) < MIN ) && (string[i] != LF) )
 974		{
 975			string[i] = replacement;
 976		}
 977	}
 978}
 979
 980//static
 981template<class T> 
 982void LLStringUtilBase<T>::replaceTabsWithSpaces( std::basic_string<T>& str, size_type spaces_per_tab )
 983{
 984	const T TAB = '\t';
 985	const T SPACE = ' ';
 986
 987	std::basic_string<T> out_str;
 988	// Replace tabs with spaces
 989	for (size_type i = 0; i < str.length(); i++)
 990	{
 991		if (str[i] == TAB)
 992		{
 993			for (size_type j = 0; j < spaces_per_tab; j++)
 994				out_str += SPACE;
 995		}
 996		else
 997		{
 998			out_str += str[i];
 999		}
1000	}
1001	str = out_str;
1002}
1003
1004//static
1005template<class T> 
1006BOOL LLStringUtilBase<T>::containsNonprintable(const std::basic_string<T>& string)
1007{
1008	const char MIN = 32;
1009	BOOL rv = FALSE;
1010	for (size_type i = 0; i < string.size(); i++)
1011	{
1012		if(string[i] < MIN)
1013		{
1014			rv = TRUE;
1015			break;
1016		}
1017	}
1018	return rv;
1019}
1020
1021//static
1022template<class T> 
1023void LLStringUtilBase<T>::stripNonprintable(std::basic_string<T>& string)
1024{
1025	const char MIN = 32;
1026	size_type j = 0;
1027	if (string.empty())
1028	{
1029		return;
1030	}
1031	size_t src_size = string.size();
1032	char* c_string = new char[src_size + 1];
1033	if(c_string == NULL)
1034	{
1035		return;
1036	}
1037	copy(c_string, string.c_str(), src_size+1);
1038	char* write_head = &c_string[0];
1039	for (size_type i = 0; i < src_size; i++)
1040	{
1041		char* read_head = &string[i];
1042		write_head = &c_string[j];
1043		if(!(*read_head < MIN))
1044		{
1045			*write_head = *read_head;
1046			++j;
1047		}
1048	}
1049	c_string[j]= '\0';
1050	string = c_string;
1051	delete []c_string;
1052}
1053
1054template<class T> 
1055void LLStringUtilBase<T>::_makeASCII(std::basic_string<T>& string)
1056{
1057	// Replace non-ASCII chars with LL_UNKNOWN_CHAR
1058	for (size_type i = 0; i < string.length(); i++)
1059	{
1060		if (string[i] > 0x7f)
1061		{
1062			string[i] = LL_UNKNOWN_CHAR;
1063		}
1064	}
1065}
1066
1067// static
1068template<class T> 
1069void LLStringUtilBase<T>::copy( T* dst, const T* src, size_type dst_size )
1070{
1071	if( dst_size > 0 )
1072	{
1073		size_type min_len = 0;
1074		if( src )
1075		{
1076			min_len = llmin( dst_size - 1, strlen( src ) );  /* Flawfinder: ignore */
1077			memcpy(dst, src, min_len * sizeof(T));		/* Flawfinder: ignore */
1078		}
1079		dst[min_len] = '\0';
1080	}
1081}
1082
1083// static
1084template<class T> 
1085void LLStringUtilBase<T>::copyInto(std::basic_string<T>& dst, const std::basic_string<T>& src, size_type offset)
1086{
1087	if ( offset == dst.length() )
1088	{
1089		// special case - append to end of string and avoid expensive
1090		// (when strings are large) string manipulations
1091		dst += src;
1092	}
1093	else
1094	{
1095		std::basic_string<T> tail = dst.substr(offset);
1096
1097		dst = dst.substr(0, offset);
1098		dst += src;
1099		dst += tail;
1100	};
1101}
1102
1103// True if this is the head of s.
1104//static
1105template<class T> 
1106BOOL LLStringUtilBase<T>::isHead( const std::basic_string<T>& string, const T* s ) 
1107{ 
1108	if( string.empty() )
1109	{
1110		// Early exit
1111		return FALSE;
1112	}
1113	else
1114	{
1115		return (strncmp( s, string.c_str(), string.size() ) == 0);
1116	}
1117}
1118
1119// static
1120template<class T> 
1121bool LLStringUtilBase<T>::startsWith(
1122	const std::basic_string<T>& string,
1123	const std::basic_string<T>& substr)
1124{
1125	if(string.empty() || (substr.empty())) return false;
1126	if(0 == string.find(substr)) return true;
1127	return false;
1128}
1129
1130// static
1131template<class T> 
1132bool LLStringUtilBase<T>::endsWith(
1133	const std::basic_string<T>& string,
1134	const std::basic_string<T>& substr)
1135{
1136	if(string.empty() || (substr.empty())) return false;
1137	std::string::size_type idx = string.rfind(substr);
1138	if(std::string::npos == idx) return false;
1139	return (idx == (string.size() - substr.size()));
1140}
1141
1142
1143template<class T> 
1144BOOL LLStringUtilBase<T>::convertToBOOL(const std::basic_string<T>& string, BOOL& value)
1145{
1146	if( string.empty() )
1147	{
1148		return FALSE;
1149	}
1150
1151	std::basic_string<T> temp( string );
1152	trim(temp);
1153	if( 
1154		(temp == "1") || 
1155		(temp == "T") || 
1156		(temp == "t") || 
1157		(temp == "TRUE") || 
1158		(temp == "true") || 
1159		(temp == "True") )
1160	{
1161		value = TRUE;
1162		return TRUE;
1163	}
1164	else
1165	if( 
1166		(temp == "0") || 
1167		(temp == "F") || 
1168		(temp == "f") || 
1169		(temp == "FALSE") || 
1170		(temp == "false") || 
1171		(temp == "False") )
1172	{
1173		value = FALSE;
1174		return TRUE;
1175	}
1176
1177	return FALSE;
1178}
1179
1180template<class T> 
1181BOOL LLStringUtilBase<T>::convertToU8(const std::basic_string<T>& string, U8& value) 
1182{
1183	S32 value32 = 0;
1184	BOOL success = convertToS32(string, value32);
1185	if( success && (U8_MIN <= value32) && (value32 <= U8_MAX) )
1186	{
1187		value = (U8) value32;
1188		return TRUE;
1189	}
1190	return FALSE;
1191}
1192
1193template<class T> 
1194BOOL LLStringUtilBase<T>::convertToS8(const std::basic_string<T>& string, S8& value) 
1195{
1196	S32 value32 = 0;
1197	BOOL success = convertToS32(string, value32);
1198	if( success && (S8_MIN <= value32) && (value32 <= S8_MAX) )
1199	{
1200		value = (S8) value32;
1201		return TRUE;
1202	}
1203	return FALSE;
1204}
1205
1206template<class T> 
1207BOOL LLStringUtilBase<T>::convertToS16(const std::basic_string<T>& string, S16& value) 
1208{
1209	S32 value32 = 0;
1210	BOOL success = convertToS32(string, value32);
1211	if( success && (S16_MIN <= value32) && (value32 <= S16_MAX) )
1212	{
1213		value = (S16) value32;
1214		return TRUE;
1215	}
1216	return FALSE;
1217}
1218
1219template<class T> 
1220BOOL LLStringUtilBase<T>::convertToU16(const std::basic_string<T>& string, U16& value) 
1221{
1222	S32 value32 = 0;
1223	BOOL success = convertToS32(string, value32);
1224	if( success && (U16_MIN <= value32) && (value32 <= U16_MAX) )
1225	{
1226		value = (U16) value32;
1227		return TRUE;
1228	}
1229	return FALSE;
1230}
1231
1232template<class T> 
1233BOOL LLStringUtilBase<T>::convertToU32(const std::basic_string<T>& string, U32& value) 
1234{
1235	if( string.empty() )
1236	{
1237		return FALSE;
1238	}
1239
1240	std::basic_string<T> temp( string );
1241	trim(temp);
1242	U32 v;
1243	std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
1244	if(i_stream >> v)
1245	{
1246		value = v;
1247		return TRUE;
1248	}
1249	return FALSE;
1250}
1251
1252template<class T> 
1253BOOL LLStringUtilBase<T>::convertToS32(const std::basic_string<T>& string, S32& value) 
1254{
1255	if( string.empty() )
1256	{
1257		return FALSE;
1258	}
1259
1260	std::basic_string<T> temp( string );
1261	trim(temp);
1262	S32 v;
1263	std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
1264	if(i_stream >> v)
1265	{
1266		//TODO: figure out overflow and underflow reporting here
1267		//if((LONG_MAX == v) || (LONG_MIN == v))
1268		//{
1269		//	// Underflow or overflow
1270		//	return FALSE;
1271		//}
1272
1273		value = v;
1274		return TRUE;
1275	}
1276	return FALSE;
1277}
1278
1279template<class T> 
1280BOOL LLStringUtilBase<T>::convertToF32(const std::basic_string<T>& string, F32& value) 
1281{
1282	F64 value64 = 0.0;
1283	BOOL success = convertToF64(string, value64);
1284	if( success && (-F32_MAX <= value64) && (value64 <= F32_MAX) )
1285	{
1286		value = (F32) value64;
1287		return TRUE;
1288	}
1289	return FALSE;
1290}
1291
1292template<class T> 
1293BOOL LLStringUtilBase<T>::convertToF64(const std::basic_string<T>& string, F64& value)
1294{
1295	if( string.empty() )
1296	{
1297		return FALSE;
1298	}
1299
1300	std::basic_string<T> temp( string );
1301	trim(temp);
1302	F64 v;
1303	std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
1304	if(i_stream >> v)
1305	{
1306		//TODO: figure out overflow and underflow reporting here
1307		//if( ((-HUGE_VAL == v) || (HUGE_VAL == v))) )
1308		//{
1309		//	// Underflow or overflow
1310		//	return FALSE;
1311		//}
1312
1313		value = v;
1314		return TRUE;
1315	}
1316	return FALSE;
1317}
1318
1319template<class T> 
1320void LLStringUtilBase<T>::truncate(std::basic_string<T>& string, size_type count)
1321{
1322	size_type cur_size = string.size();
1323	string.resize(count < cur_size ? count : cur_size);
1324}
1325
1326#endif  // LL_STRING_H