PageRenderTime 110ms CodeModel.GetById 15ms app.highlight 85ms RepoModel.GetById 1ms app.codeStats 0ms

/indra/llcommon/llstring.cpp

https://bitbucket.org/lindenlab/viewer-beta/
C++ | 1421 lines | 1121 code | 174 blank | 126 comment | 266 complexity | d56a42315a97ec5d8e1f20179d482309 MD5 | raw file
   1/** 
   2 * @file llstring.cpp
   3 * @brief String utility functions and the std::string class.
   4 *
   5 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
   6 * Second Life Viewer Source Code
   7 * Copyright (C) 2010, Linden Research, Inc.
   8 * 
   9 * This library is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU Lesser General Public
  11 * License as published by the Free Software Foundation;
  12 * version 2.1 of the License only.
  13 * 
  14 * This library is distributed in the hope that it will be useful,
  15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 * Lesser General Public License for more details.
  18 * 
  19 * You should have received a copy of the GNU Lesser General Public
  20 * License along with this library; if not, write to the Free Software
  21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
  22 * 
  23 * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
  24 * $/LicenseInfo$
  25 */
  26
  27#include "linden_common.h"
  28
  29#include "llstring.h"
  30#include "llerror.h"
  31
  32#if LL_WINDOWS
  33#define WIN32_LEAN_AND_MEAN
  34#include <winsock2.h>
  35#include <windows.h>
  36#include <winnls.h> // for WideCharToMultiByte
  37#endif
  38
  39LLFastTimer::DeclareTimer FT_STRING_FORMAT("String Format");
  40
  41
  42std::string ll_safe_string(const char* in)
  43{
  44	if(in) return std::string(in);
  45	return std::string();
  46}
  47
  48std::string ll_safe_string(const char* in, S32 maxlen)
  49{
  50	if(in) return std::string(in, maxlen);
  51	return std::string();
  52}
  53
  54U8 hex_as_nybble(char hex)
  55{
  56	if((hex >= '0') && (hex <= '9'))
  57	{
  58		return (U8)(hex - '0');
  59	}
  60	else if((hex >= 'a') && (hex <='f'))
  61	{
  62		return (U8)(10 + hex - 'a');
  63	}
  64	else if((hex >= 'A') && (hex <='F'))
  65	{
  66		return (U8)(10 + hex - 'A');
  67	}
  68	return 0; // uh - oh, not hex any more...
  69}
  70
  71bool iswindividual(llwchar elem)
  72{   
  73	U32 cur_char = (U32)elem;
  74	bool result = false;
  75	if (0x2E80<= cur_char && cur_char <= 0x9FFF)
  76	{
  77		result = true;
  78	}
  79	else if (0xAC00<= cur_char && cur_char <= 0xD7A0 )
  80	{
  81		result = true;
  82	}
  83	else if (0xF900<= cur_char && cur_char <= 0xFA60 )
  84	{
  85		result = true;
  86	}
  87	return result;
  88}
  89
  90bool _read_file_into_string(std::string& str, const std::string& filename)
  91{
  92	llifstream ifs(filename, llifstream::binary);
  93	if (!ifs.is_open())
  94	{
  95		llinfos << "Unable to open file " << filename << llendl;
  96		return false;
  97	}
  98
  99	std::ostringstream oss;
 100
 101	oss << ifs.rdbuf();
 102	str = oss.str();
 103	ifs.close();
 104	return true;
 105}
 106
 107
 108
 109
 110// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
 111// for the Unicode implementation - this doesn't match because it was written before finding
 112// it.
 113
 114
 115std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
 116{
 117	std::string utf8_str = wstring_to_utf8str(wstr);
 118	s << utf8_str;
 119	return s;
 120}
 121
 122std::string rawstr_to_utf8(const std::string& raw)
 123{
 124	LLWString wstr(utf8str_to_wstring(raw));
 125	return wstring_to_utf8str(wstr);
 126}
 127
 128S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
 129{
 130	U32 cur_char = (U32)in_char;
 131	char* base = outchars;
 132	if (cur_char < 0x80)
 133	{
 134		*outchars++ = (U8)cur_char;
 135	}
 136	else if (cur_char < 0x800)
 137	{
 138		*outchars++ = 0xC0 | (cur_char >> 6);
 139		*outchars++ = 0x80 | (cur_char & 0x3F);
 140	}
 141	else if (cur_char < 0x10000)
 142	{
 143		*outchars++ = 0xE0 | (cur_char >> 12);
 144		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
 145		*outchars++ = 0x80 | (cur_char & 0x3F);
 146	}
 147	else if (cur_char < 0x200000)
 148	{
 149		*outchars++ = 0xF0 | (cur_char >> 18);
 150		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
 151		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
 152		*outchars++ = 0x80 | (cur_char & 0x3F);
 153	}
 154	else if (cur_char < 0x4000000)
 155	{
 156		*outchars++ = 0xF8 | (cur_char >> 24);
 157		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
 158		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
 159		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
 160		*outchars++ = 0x80 | (cur_char & 0x3F);
 161	}
 162	else if (cur_char < 0x80000000)
 163	{
 164		*outchars++ = 0xFC | (cur_char >> 30);
 165		*outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
 166		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
 167		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
 168		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
 169		*outchars++ = 0x80 | (cur_char & 0x3F);
 170	}
 171	else
 172	{
 173		llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
 174		*outchars++ = LL_UNKNOWN_CHAR;
 175	}
 176	return outchars - base;
 177}	
 178
 179S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
 180{
 181	const U16* base = inchars;
 182	U16 cur_char = *inchars++;
 183	llwchar char32 = cur_char;
 184	if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
 185	{
 186		// Surrogates
 187		char32 = ((llwchar)(cur_char - 0xD800)) << 10;
 188		cur_char = *inchars++;
 189		char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
 190	}
 191	else
 192	{
 193		char32 = (llwchar)cur_char;
 194	}
 195	*outchar = char32;
 196	return inchars - base;
 197}
 198
 199llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
 200{
 201	llutf16string out;
 202
 203	S32 i = 0;
 204	while (i < len)
 205	{
 206		U32 cur_char = utf32str[i];
 207		if (cur_char > 0xFFFF)
 208		{
 209			out += (0xD7C0 + (cur_char >> 10));
 210			out += (0xDC00 | (cur_char & 0x3FF));
 211		}
 212		else
 213		{
 214			out += cur_char;
 215		}
 216		i++;
 217	}
 218	return out;
 219}
 220
 221llutf16string wstring_to_utf16str(const LLWString &utf32str)
 222{
 223	const S32 len = (S32)utf32str.length();
 224	return wstring_to_utf16str(utf32str, len);
 225}
 226
 227llutf16string utf8str_to_utf16str ( const std::string& utf8str )
 228{
 229	LLWString wstr = utf8str_to_wstring ( utf8str );
 230	return wstring_to_utf16str ( wstr );
 231}
 232
 233
 234LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
 235{
 236	LLWString wout;
 237	if((len <= 0) || utf16str.empty()) return wout;
 238
 239	S32 i = 0;
 240	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
 241	const U16* chars16 = &(*(utf16str.begin()));
 242	while (i < len)
 243	{
 244		llwchar cur_char;
 245		i += utf16chars_to_wchar(chars16+i, &cur_char);
 246		wout += cur_char;
 247	}
 248	return wout;
 249}
 250
 251LLWString utf16str_to_wstring(const llutf16string &utf16str)
 252{
 253	const S32 len = (S32)utf16str.length();
 254	return utf16str_to_wstring(utf16str, len);
 255}
 256
 257// Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
 258S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
 259{
 260	S32 surrogate_pairs = 0;
 261	// ... craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
 262	const U16 *const utf16_chars = &(*(utf16str.begin()));
 263	S32 i = 0;
 264	while (i < utf16_len)
 265	{
 266		const U16 c = utf16_chars[i++];
 267		if (c >= 0xD800 && c <= 0xDBFF)		// See http://en.wikipedia.org/wiki/UTF-16
 268		{   // Have first byte of a surrogate pair
 269			if (i >= utf16_len)
 270			{
 271				break;
 272			}
 273			const U16 d = utf16_chars[i];
 274			if (d >= 0xDC00 && d <= 0xDFFF)
 275			{   // Have valid second byte of a surrogate pair
 276				surrogate_pairs++;
 277				i++;
 278			}
 279		}
 280	}
 281	return utf16_len - surrogate_pairs;
 282}
 283
 284// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
 285S32 wstring_utf16_length(const LLWString &wstr, const S32 woffset, const S32 wlen)
 286{
 287	const S32 end = llmin((S32)wstr.length(), woffset + wlen);
 288	if (end < woffset)
 289	{
 290		return 0;
 291	}
 292	else
 293	{
 294		S32 length = end - woffset;
 295		for (S32 i = woffset; i < end; i++)
 296		{
 297			if (wstr[i] >= 0x10000)
 298			{
 299				length++;
 300			}
 301		}
 302		return length;
 303	}
 304}
 305
 306// Given a wstring and an offset in it, returns the length as wstring (i.e.,
 307// number of llwchars) of the longest substring that starts at the offset
 308// and whose equivalent utf-16 string does not exceeds the given utf16_length.
 309S32 wstring_wstring_length_from_utf16_length(const LLWString & wstr, const S32 woffset, const S32 utf16_length, BOOL *unaligned)
 310{
 311	const S32 end = wstr.length();
 312	BOOL u = FALSE;
 313	S32 n = woffset + utf16_length;
 314	S32 i = woffset;
 315	while (i < end)
 316	{
 317		if (wstr[i] >= 0x10000)
 318		{
 319			--n;
 320		}
 321		if (i >= n)
 322		{
 323			u = (i > n);
 324			break;
 325		}
 326		i++;
 327	}
 328	if (unaligned)
 329	{
 330		*unaligned = u;
 331	}
 332	return i - woffset;
 333}
 334
 335S32 wchar_utf8_length(const llwchar wc)
 336{
 337	if (wc < 0x80)
 338	{
 339		// This case will also catch negative values which are
 340		// technically invalid.
 341		return 1;
 342	}
 343	else if (wc < 0x800)
 344	{
 345		return 2;
 346	}
 347	else if (wc < 0x10000)
 348	{
 349		return 3;
 350	}
 351	else if (wc < 0x200000)
 352	{
 353		return 4;
 354	}
 355	else if (wc < 0x4000000)
 356	{
 357		return 5;
 358	}
 359	else
 360	{
 361		return 6;
 362	}
 363}
 364
 365
 366S32 wstring_utf8_length(const LLWString& wstr)
 367{
 368	S32 len = 0;
 369	for (S32 i = 0; i < (S32)wstr.length(); i++)
 370	{
 371		len += wchar_utf8_length(wstr[i]);
 372	}
 373	return len;
 374}
 375
 376
 377LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
 378{
 379	LLWString wout;
 380
 381	S32 i = 0;
 382	while (i < len)
 383	{
 384		llwchar unichar;
 385		U8 cur_char = utf8str[i];
 386
 387		if (cur_char < 0x80)
 388		{
 389			// Ascii character, just add it
 390			unichar = cur_char;
 391		}
 392		else
 393		{
 394			S32 cont_bytes = 0;
 395			if ((cur_char >> 5) == 0x6)			// Two byte UTF8 -> 1 UTF32
 396			{
 397				unichar = (0x1F&cur_char);
 398				cont_bytes = 1;
 399			}
 400			else if ((cur_char >> 4) == 0xe)	// Three byte UTF8 -> 1 UTF32
 401			{
 402				unichar = (0x0F&cur_char);
 403				cont_bytes = 2;
 404			}
 405			else if ((cur_char >> 3) == 0x1e)	// Four byte UTF8 -> 1 UTF32
 406			{
 407				unichar = (0x07&cur_char);
 408				cont_bytes = 3;
 409			}
 410			else if ((cur_char >> 2) == 0x3e)	// Five byte UTF8 -> 1 UTF32
 411			{
 412				unichar = (0x03&cur_char);
 413				cont_bytes = 4;
 414			}
 415			else if ((cur_char >> 1) == 0x7e)	// Six byte UTF8 -> 1 UTF32
 416			{
 417				unichar = (0x01&cur_char);
 418				cont_bytes = 5;
 419			}
 420			else
 421			{
 422				wout += LL_UNKNOWN_CHAR;
 423				++i;
 424				continue;
 425			}
 426
 427			// Check that this character doesn't go past the end of the string
 428			S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
 429			do
 430			{
 431				++i;
 432
 433				cur_char = utf8str[i];
 434				if ( (cur_char >> 6) == 0x2 )
 435				{
 436					unichar <<= 6;
 437					unichar += (0x3F&cur_char);
 438				}
 439				else
 440				{
 441					// Malformed sequence - roll back to look at this as a new char
 442					unichar = LL_UNKNOWN_CHAR;
 443					--i;
 444					break;
 445				}
 446			} while(i < end);
 447
 448			// Handle overlong characters and NULL characters
 449			if ( ((cont_bytes == 1) && (unichar < 0x80))
 450				|| ((cont_bytes == 2) && (unichar < 0x800))
 451				|| ((cont_bytes == 3) && (unichar < 0x10000))
 452				|| ((cont_bytes == 4) && (unichar < 0x200000))
 453				|| ((cont_bytes == 5) && (unichar < 0x4000000)) )
 454			{
 455				unichar = LL_UNKNOWN_CHAR;
 456			}
 457		}
 458
 459		wout += unichar;
 460		++i;
 461	}
 462	return wout;
 463}
 464
 465LLWString utf8str_to_wstring(const std::string& utf8str)
 466{
 467	const S32 len = (S32)utf8str.length();
 468	return utf8str_to_wstring(utf8str, len);
 469}
 470
 471std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
 472{
 473	std::string out;
 474
 475	S32 i = 0;
 476	while (i < len)
 477	{
 478		char tchars[8];		/* Flawfinder: ignore */
 479		S32 n = wchar_to_utf8chars(utf32str[i], tchars);
 480		tchars[n] = 0;
 481		out += tchars;
 482		i++;
 483	}
 484	return out;
 485}
 486
 487std::string wstring_to_utf8str(const LLWString& utf32str)
 488{
 489	const S32 len = (S32)utf32str.length();
 490	return wstring_to_utf8str(utf32str, len);
 491}
 492
 493std::string utf16str_to_utf8str(const llutf16string& utf16str)
 494{
 495	return wstring_to_utf8str(utf16str_to_wstring(utf16str));
 496}
 497
 498std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
 499{
 500	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
 501}
 502
 503std::string utf8str_trim(const std::string& utf8str)
 504{
 505	LLWString wstr = utf8str_to_wstring(utf8str);
 506	LLWStringUtil::trim(wstr);
 507	return wstring_to_utf8str(wstr);
 508}
 509
 510
 511std::string utf8str_tolower(const std::string& utf8str)
 512{
 513	LLWString out_str = utf8str_to_wstring(utf8str);
 514	LLWStringUtil::toLower(out_str);
 515	return wstring_to_utf8str(out_str);
 516}
 517
 518
 519S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
 520{
 521	LLWString wlhs = utf8str_to_wstring(lhs);
 522	LLWString wrhs = utf8str_to_wstring(rhs);
 523	return LLWStringUtil::compareInsensitive(wlhs, wrhs);
 524}
 525
 526std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
 527{
 528	if (0 == max_len)
 529	{
 530		return std::string();
 531	}
 532	if ((S32)utf8str.length() <= max_len)
 533	{
 534		return utf8str;
 535	}
 536	else
 537	{
 538		S32 cur_char = max_len;
 539
 540		// If we're ASCII, we don't need to do anything
 541		if ((U8)utf8str[cur_char] > 0x7f)
 542		{
 543			// If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
 544			// to the first character
 545			while (0x80 == (0xc0 & utf8str[cur_char]))
 546			{
 547				cur_char--;
 548				// Keep moving forward until we hit the first char;
 549				if (cur_char == 0)
 550				{
 551					// Make sure we don't trash memory if we've got a bogus string.
 552					break;
 553				}
 554			}
 555		}
 556		// The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
 557		return utf8str.substr(0, cur_char);
 558	}
 559}
 560
 561std::string utf8str_substChar(
 562	const std::string& utf8str,
 563	const llwchar target_char,
 564	const llwchar replace_char)
 565{
 566	LLWString wstr = utf8str_to_wstring(utf8str);
 567	LLWStringUtil::replaceChar(wstr, target_char, replace_char);
 568	//wstr = wstring_substChar(wstr, target_char, replace_char);
 569	return wstring_to_utf8str(wstr);
 570}
 571
 572std::string utf8str_makeASCII(const std::string& utf8str)
 573{
 574	LLWString wstr = utf8str_to_wstring(utf8str);
 575	LLWStringUtil::_makeASCII(wstr);
 576	return wstring_to_utf8str(wstr);
 577}
 578
 579std::string mbcsstring_makeASCII(const std::string& wstr)
 580{
 581	// Replace non-ASCII chars with replace_char
 582	std::string out_str = wstr;
 583	for (S32 i = 0; i < (S32)out_str.length(); i++)
 584	{
 585		if ((U8)out_str[i] > 0x7f)
 586		{
 587			out_str[i] = LL_UNKNOWN_CHAR;
 588		}
 589	}
 590	return out_str;
 591}
 592std::string utf8str_removeCRLF(const std::string& utf8str)
 593{
 594	if (0 == utf8str.length())
 595	{
 596		return std::string();
 597	}
 598	const char CR = 13;
 599
 600	std::string out;
 601	out.reserve(utf8str.length());
 602	const S32 len = (S32)utf8str.length();
 603	for( S32 i = 0; i < len; i++ )
 604	{
 605		if( utf8str[i] != CR )
 606		{
 607			out.push_back(utf8str[i]);
 608		}
 609	}
 610	return out;
 611}
 612
 613#if LL_WINDOWS
 614// documentation moved to header. Phoenix 2007-11-27
 615namespace snprintf_hack
 616{
 617	int snprintf(char *str, size_t size, const char *format, ...)
 618	{
 619		va_list args;
 620		va_start(args, format);
 621
 622		int num_written = _vsnprintf(str, size, format, args); /* Flawfinder: ignore */
 623		va_end(args);
 624		
 625		str[size-1] = '\0'; // always null terminate
 626		return num_written;
 627	}
 628}
 629
 630std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
 631{
 632	std::string out;
 633	if(in)
 634	{
 635		int len_in = wcslen(in);
 636		int len_out = WideCharToMultiByte(
 637			code_page,
 638			0,
 639			in,
 640			len_in,
 641			NULL,
 642			0,
 643			0,
 644			0);
 645		// We will need two more bytes for the double NULL ending
 646		// created in WideCharToMultiByte().
 647		char* pout = new char [len_out + 2];
 648		memset(pout, 0, len_out + 2);
 649		if(pout)
 650		{
 651			WideCharToMultiByte(
 652				code_page,
 653				0,
 654				in,
 655				len_in,
 656				pout,
 657				len_out,
 658				0,
 659				0);
 660			out.assign(pout);
 661			delete[] pout;
 662		}
 663	}
 664	return out;
 665}
 666
 667wchar_t* ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
 668{
 669	// From review:
 670	// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
 671	// plus one for a null terminator, and be guaranteed to not overflow.
 672
 673	//	Normally, I'd call that sort of thing premature optimization,
 674	// but we *are* seeing string operations taking a bunch of time, especially when constructing widgets.
 675//	int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(), NULL, 0);
 676
 677	// reserve place to NULL terminator
 678	int output_str_len = in.length();
 679	wchar_t* w_out = new wchar_t[output_str_len + 1];
 680
 681	memset(w_out, 0, output_str_len + 1);
 682	int real_output_str_len = MultiByteToWideChar (code_page, 0, in.c_str(), in.length(), w_out, output_str_len);
 683
 684	//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
 685	w_out[real_output_str_len] = 0;
 686
 687	return w_out;
 688}
 689
 690std::string ll_convert_string_to_utf8_string(const std::string& in)
 691{
 692	wchar_t* w_mesg = ll_convert_string_to_wide(in, CP_ACP);
 693	std::string out_utf8(ll_convert_wide_to_string(w_mesg, CP_UTF8));
 694	delete[] w_mesg;
 695
 696	return out_utf8;
 697}
 698#endif // LL_WINDOWS
 699
 700long LLStringOps::sPacificTimeOffset = 0;
 701long LLStringOps::sLocalTimeOffset = 0;
 702bool LLStringOps::sPacificDaylightTime = 0;
 703std::map<std::string, std::string> LLStringOps::datetimeToCodes;
 704
 705std::vector<std::string> LLStringOps::sWeekDayList;
 706std::vector<std::string> LLStringOps::sWeekDayShortList;
 707std::vector<std::string> LLStringOps::sMonthList;
 708std::vector<std::string> LLStringOps::sMonthShortList;
 709
 710
 711std::string LLStringOps::sDayFormat;
 712std::string LLStringOps::sAM;
 713std::string LLStringOps::sPM;
 714
 715
 716S32	LLStringOps::collate(const llwchar* a, const llwchar* b)
 717{ 
 718	#if LL_WINDOWS
 719		// in Windows, wide string functions operator on 16-bit strings, 
 720		// not the proper 32 bit wide string
 721		return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
 722	#else
 723		return wcscoll(a, b);
 724	#endif
 725}
 726
 727void LLStringOps::setupDatetimeInfo (bool daylight)
 728{
 729	time_t nowT, localT, gmtT;
 730	struct tm * tmpT;
 731
 732	nowT = time (NULL);
 733
 734	tmpT = gmtime (&nowT);
 735	gmtT = mktime (tmpT);
 736
 737	tmpT = localtime (&nowT);
 738	localT = mktime (tmpT);
 739	
 740	sLocalTimeOffset = (long) (gmtT - localT);
 741	if (tmpT->tm_isdst)
 742	{
 743		sLocalTimeOffset -= 60 * 60;	// 1 hour
 744	}
 745
 746	sPacificDaylightTime = daylight;
 747	sPacificTimeOffset = (sPacificDaylightTime? 7 : 8 ) * 60 * 60;
 748
 749	datetimeToCodes["wkday"]	= "%a";		// Thu
 750	datetimeToCodes["weekday"]	= "%A";		// Thursday
 751	datetimeToCodes["year4"]	= "%Y";		// 2009
 752	datetimeToCodes["year"]		= "%Y";		// 2009
 753	datetimeToCodes["year2"]	= "%y";		// 09
 754	datetimeToCodes["mth"]		= "%b";		// Aug
 755	datetimeToCodes["month"]	= "%B";		// August
 756	datetimeToCodes["mthnum"]	= "%m";		// 08
 757	datetimeToCodes["day"]		= "%d";		// 31
 758	datetimeToCodes["sday"]		= "%-d";	// 9
 759	datetimeToCodes["hour24"]	= "%H";		// 14
 760	datetimeToCodes["hour"]		= "%H";		// 14
 761	datetimeToCodes["hour12"]	= "%I";		// 02
 762	datetimeToCodes["min"]		= "%M";		// 59
 763	datetimeToCodes["ampm"]		= "%p";		// AM
 764	datetimeToCodes["second"]	= "%S";		// 59
 765	datetimeToCodes["timezone"]	= "%Z";		// PST
 766}
 767
 768void tokenizeStringToArray(const std::string& data, std::vector<std::string>& output)
 769{
 770	output.clear();
 771	size_t length = data.size();
 772	
 773	// tokenize it and put it in the array
 774	std::string cur_word;
 775	for(size_t i = 0; i < length; ++i)
 776	{
 777		if(data[i] == ':')
 778		{
 779			output.push_back(cur_word);
 780			cur_word.clear();
 781		}
 782		else
 783		{
 784			cur_word.append(1, data[i]);
 785		}
 786	}
 787	output.push_back(cur_word);
 788}
 789
 790void LLStringOps::setupWeekDaysNames(const std::string& data)
 791{
 792	tokenizeStringToArray(data,sWeekDayList);
 793}
 794void LLStringOps::setupWeekDaysShortNames(const std::string& data)
 795{
 796	tokenizeStringToArray(data,sWeekDayShortList);
 797}
 798void LLStringOps::setupMonthNames(const std::string& data)
 799{
 800	tokenizeStringToArray(data,sMonthList);
 801}
 802void LLStringOps::setupMonthShortNames(const std::string& data)
 803{
 804	tokenizeStringToArray(data,sMonthShortList);
 805}
 806void LLStringOps::setupDayFormat(const std::string& data)
 807{
 808	sDayFormat = data;
 809}
 810
 811
 812std::string LLStringOps::getDatetimeCode (std::string key)
 813{
 814	std::map<std::string, std::string>::iterator iter;
 815
 816	iter = datetimeToCodes.find (key);
 817	if (iter != datetimeToCodes.end())
 818	{
 819		return iter->second;
 820	}
 821	else
 822	{
 823		return std::string("");
 824	}
 825}
 826
 827
 828namespace LLStringFn
 829{
 830	// NOTE - this restricts output to ascii
 831	void replace_nonprintable_in_ascii(std::basic_string<char>& string, char replacement)
 832	{
 833		const char MIN = 0x20;
 834		std::basic_string<char>::size_type len = string.size();
 835		for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
 836		{
 837			if(string[ii] < MIN)
 838			{
 839				string[ii] = replacement;
 840			}
 841		}
 842	}
 843
 844
 845	// NOTE - this restricts output to ascii
 846	void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
 847									   char replacement)
 848	{
 849		const char MIN  = 0x20;
 850		const char PIPE = 0x7c;
 851		std::basic_string<char>::size_type len = str.size();
 852		for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
 853		{
 854			if( (str[ii] < MIN) || (str[ii] == PIPE) )
 855			{
 856				str[ii] = replacement;
 857			}
 858		}
 859	}
 860
 861	// https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
 862	// allowable code points for XML. Specifically, they are:
 863	// 0x09, 0x0a, 0x0d, and 0x20 on up.  JC
 864	std::string strip_invalid_xml(const std::string& instr)
 865	{
 866		std::string output;
 867		output.reserve( instr.size() );
 868		std::string::const_iterator it = instr.begin();
 869		while (it != instr.end())
 870		{
 871			// Must compare as unsigned for >=
 872			// Test most likely match first
 873			const unsigned char c = (unsigned char)*it;
 874			if (   c >= (unsigned char)0x20   // SPACE
 875				|| c == (unsigned char)0x09   // TAB
 876				|| c == (unsigned char)0x0a   // LINE_FEED
 877				|| c == (unsigned char)0x0d ) // CARRIAGE_RETURN
 878			{
 879				output.push_back(c);
 880			}
 881			++it;
 882		}
 883		return output;
 884	}
 885
 886	/**
 887	 * @brief Replace all control characters (c < 0x20) with replacement in
 888	 * string.
 889	 */
 890	void replace_ascii_controlchars(std::basic_string<char>& string, char replacement)
 891	{
 892		const unsigned char MIN = 0x20;
 893		std::basic_string<char>::size_type len = string.size();
 894		for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
 895		{
 896			const unsigned char c = (unsigned char) string[ii];
 897			if(c < MIN)
 898			{
 899				string[ii] = replacement;
 900			}
 901		}
 902	}
 903}
 904
 905////////////////////////////////////////////////////////////
 906
 907// Forward specialization of LLStringUtil::format before use in LLStringUtil::formatDatetime.
 908template<>
 909S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
 910
 911//static
 912template<> 
 913void LLStringUtil::getTokens(const std::string& instr, std::vector<std::string >& tokens, const std::string& delims)
 914{
 915	std::string currToken;
 916	std::string::size_type begIdx, endIdx;
 917
 918	begIdx = instr.find_first_not_of (delims);
 919	while (begIdx != std::string::npos)
 920	{
 921		endIdx = instr.find_first_of (delims, begIdx);
 922		if (endIdx == std::string::npos)
 923		{
 924			endIdx = instr.length();
 925		}
 926
 927		currToken = instr.substr(begIdx, endIdx - begIdx);
 928		LLStringUtil::trim (currToken);
 929		tokens.push_back(currToken);
 930		begIdx = instr.find_first_not_of (delims, endIdx);
 931	}
 932}
 933
 934template<> 
 935LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr, size_type& start, std::vector<std::string>& tokens)
 936{
 937	const std::string delims (",");
 938	
 939	// Find the first [
 940	size_type pos1 = instr.find('[', start);
 941	if (pos1 == std::string::npos)
 942		return std::string::npos;
 943
 944	//Find the first ] after the initial [
 945	size_type pos2 = instr.find(']', pos1);
 946	if (pos2 == std::string::npos)
 947		return std::string::npos;
 948
 949	// Find the last [ before ] in case of nested [[]]
 950	pos1 = instr.find_last_of('[', pos2-1);
 951	if (pos1 == std::string::npos || pos1 < start)
 952		return std::string::npos;
 953	
 954	getTokens(std::string(instr,pos1+1,pos2-pos1-1), tokens, delims);
 955	start = pos2+1;
 956	
 957	return pos1;
 958}
 959
 960// static
 961template<> 
 962bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const format_map_t& substitutions)
 963{
 964	// see if we have a replacement for the bracketed string (without the brackets)
 965	// test first using has() because if we just look up with operator[] we get back an
 966	// empty string even if the value is missing. We want to distinguish between 
 967	// missing replacements and deliberately empty replacement strings.
 968	format_map_t::const_iterator iter = substitutions.find(token);
 969	if (iter != substitutions.end())
 970	{
 971		replacement = iter->second;
 972		return true;
 973	}
 974	// if not, see if there's one WITH brackets
 975	iter = substitutions.find(std::string("[" + token + "]"));
 976	if (iter != substitutions.end())
 977	{
 978		replacement = iter->second;
 979		return true;
 980	}
 981
 982	return false;
 983}
 984
 985// static
 986template<> 
 987bool LLStringUtil::simpleReplacement(std::string &replacement, std::string token, const LLSD& substitutions)
 988{
 989	// see if we have a replacement for the bracketed string (without the brackets)
 990	// test first using has() because if we just look up with operator[] we get back an
 991	// empty string even if the value is missing. We want to distinguish between 
 992	// missing replacements and deliberately empty replacement strings.
 993	if (substitutions.has(token))
 994	{
 995		replacement = substitutions[token].asString();
 996		return true;
 997	}
 998	// if not, see if there's one WITH brackets
 999	else if (substitutions.has(std::string("[" + token + "]")))
1000	{
1001		replacement = substitutions[std::string("[" + token + "]")].asString();
1002		return true;
1003	}
1004
1005	return false;
1006}
1007
1008//static
1009template<>
1010void LLStringUtil::setLocale(std::string inLocale)
1011{
1012	sLocale = inLocale;
1013};
1014
1015//static
1016template<>
1017std::string LLStringUtil::getLocale(void)
1018{
1019	return sLocale;
1020};
1021
1022// static
1023template<> 
1024void LLStringUtil::formatNumber(std::string& numStr, std::string decimals)
1025{
1026	std::stringstream strStream;
1027	S32 intDecimals = 0;
1028
1029	convertToS32 (decimals, intDecimals);
1030	if (!sLocale.empty())
1031	{
1032		// std::locale() throws if the locale is unknown! (EXT-7926)
1033		try
1034		{
1035			strStream.imbue(std::locale(sLocale.c_str()));
1036		} catch (const std::exception &)
1037		{
1038			LL_WARNS_ONCE("Locale") << "Cannot set locale to " << sLocale << LL_ENDL;
1039		}
1040	}
1041
1042	if (!intDecimals)
1043	{
1044		S32 intStr;
1045
1046		if (convertToS32(numStr, intStr))
1047		{
1048			strStream << intStr;
1049			numStr = strStream.str();
1050		}
1051	}
1052	else
1053	{
1054		F32 floatStr;
1055
1056		if (convertToF32(numStr, floatStr))
1057		{
1058			strStream << std::fixed << std::showpoint << std::setprecision(intDecimals) << floatStr;
1059			numStr = strStream.str();
1060		}
1061	}
1062}
1063
1064// static
1065template<> 
1066bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
1067								  std::string param, S32 secFromEpoch)
1068{
1069	if (param == "local")   // local
1070	{
1071		secFromEpoch -= LLStringOps::getLocalTimeOffset();
1072	}
1073	else if (param != "utc") // slt
1074	{
1075		secFromEpoch -= LLStringOps::getPacificTimeOffset();
1076	}
1077		
1078	// if never fell into those two ifs above, param must be utc
1079	if (secFromEpoch < 0) secFromEpoch = 0;
1080
1081	LLDate datetime((F64)secFromEpoch);
1082	std::string code = LLStringOps::getDatetimeCode (token);
1083
1084	// special case to handle timezone
1085	if (code == "%Z") {
1086		if (param == "utc")
1087		{
1088			replacement = "GMT";
1089		}
1090		else if (param == "local")
1091		{
1092			replacement = "";		// user knows their own timezone
1093		}
1094		else
1095		{
1096			// "slt" = Second Life Time, which is deprecated.
1097			// If not utc or user local time, fallback to Pacific time
1098			replacement = LLStringOps::getPacificDaylightTime() ? "PDT" : "PST";
1099		}
1100		return true;
1101	}
1102
1103	//EXT-7013
1104	//few codes are not suppotred by strtime function (example - weekdays for Japanise)
1105	//so use predefined ones
1106	
1107	//if sWeekDayList is not empty than current locale doesn't support
1108        //weekday name.
1109	time_t loc_seconds = (time_t) secFromEpoch;
1110	if(LLStringOps::sWeekDayList.size() == 7 && code == "%A")
1111	{
1112		struct tm * gmt = gmtime (&loc_seconds);
1113		replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
1114	}
1115	else if(LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
1116	{
1117		struct tm * gmt = gmtime (&loc_seconds);
1118		replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
1119	}
1120	else if(LLStringOps::sMonthList.size() == 12 && code == "%B")
1121	{
1122		struct tm * gmt = gmtime (&loc_seconds);
1123		replacement = LLStringOps::sMonthList[gmt->tm_mon];
1124	}
1125	else if( !LLStringOps::sDayFormat.empty() && code == "%d" )
1126	{
1127		struct tm * gmt = gmtime (&loc_seconds);
1128		LLStringUtil::format_map_t args;
1129		args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
1130		replacement = LLStringOps::sDayFormat;
1131		LLStringUtil::format(replacement, args);
1132	}
1133	else if (code == "%-d")
1134	{
1135		struct tm * gmt = gmtime (&loc_seconds);
1136		replacement = llformat ("%d", gmt->tm_mday); // day of the month without leading zero
1137	}
1138	else if( !LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() && code == "%p" )
1139	{
1140		struct tm * gmt = gmtime (&loc_seconds);
1141		if(gmt->tm_hour<12)
1142		{
1143			replacement = LLStringOps::sAM;
1144		}
1145		else
1146		{
1147			replacement = LLStringOps::sPM;
1148		}
1149	}
1150	else
1151	{
1152		replacement = datetime.toHTTPDateString(code);
1153	}
1154
1155	// *HACK: delete leading zero from hour string in case 'hour12' (code = %I) time format
1156	// to show time without leading zero, e.g. 08:16 -> 8:16 (EXT-2738).
1157	// We could have used '%l' format instead, but it's not supported by Windows.
1158	if(code == "%I" && token == "hour12" && replacement.at(0) == '0')
1159	{
1160		replacement = replacement.at(1);
1161	}
1162
1163	return !code.empty();
1164}
1165
1166// LLStringUtil::format recogizes the following patterns.
1167// All substitutions *must* be encased in []'s in the input string.
1168// The []'s are optional in the substitution map.
1169// [FOO_123]
1170// [FOO,number,precision]
1171// [FOO,datetime,format]
1172
1173
1174// static
1175template<> 
1176S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
1177{
1178	LLFastTimer ft(FT_STRING_FORMAT);
1179	S32 res = 0;
1180
1181	std::string output;
1182	std::vector<std::string> tokens;
1183
1184	std::string::size_type start = 0;
1185	std::string::size_type prev_start = 0;
1186	std::string::size_type key_start = 0;
1187	while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
1188	{
1189		output += std::string(s, prev_start, key_start-prev_start);
1190		prev_start = start;
1191		
1192		bool found_replacement = false;
1193		std::string replacement;
1194
1195		if (tokens.size() == 0)
1196		{
1197			found_replacement = false;
1198		}
1199		else if (tokens.size() == 1)
1200		{
1201			found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
1202		}
1203		else if (tokens[1] == "number")
1204		{
1205			std::string param = "0";
1206
1207			if (tokens.size() > 2) param = tokens[2];
1208			found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
1209			if (found_replacement) formatNumber (replacement, param);
1210		}
1211		else if (tokens[1] == "datetime")
1212		{
1213			std::string param;
1214			if (tokens.size() > 2) param = tokens[2];
1215			
1216			format_map_t::const_iterator iter = substitutions.find("datetime");
1217			if (iter != substitutions.end())
1218			{
1219				S32 secFromEpoch = 0;
1220				BOOL r = LLStringUtil::convertToS32(iter->second, secFromEpoch);
1221				if (r)
1222				{
1223					found_replacement = formatDatetime(replacement, tokens[0], param, secFromEpoch);
1224				}
1225			}
1226		}
1227
1228		if (found_replacement)
1229		{
1230			output += replacement;
1231			res++;
1232		}
1233		else
1234		{
1235			// we had no replacement, use the string as is
1236			// e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
1237			output += std::string(s, key_start, start-key_start);
1238		}
1239		tokens.clear();
1240	}
1241	// send the remainder of the string (with no further matches for bracketed names)
1242	output += std::string(s, start);
1243	s = output;
1244	return res;
1245}
1246
1247//static
1248template<> 
1249S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
1250{
1251	LLFastTimer ft(FT_STRING_FORMAT);
1252	S32 res = 0;
1253
1254	if (!substitutions.isMap()) 
1255	{
1256		return res;
1257	}
1258
1259	std::string output;
1260	std::vector<std::string> tokens;
1261
1262	std::string::size_type start = 0;
1263	std::string::size_type prev_start = 0;
1264	std::string::size_type key_start = 0;
1265	while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
1266	{
1267		output += std::string(s, prev_start, key_start-prev_start);
1268		prev_start = start;
1269		
1270		bool found_replacement = false;
1271		std::string replacement;
1272
1273		if (tokens.size() == 0)
1274		{
1275			found_replacement = false;
1276		}
1277		else if (tokens.size() == 1)
1278		{
1279			found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
1280		}
1281		else if (tokens[1] == "number")
1282		{
1283			std::string param = "0";
1284
1285			if (tokens.size() > 2) param = tokens[2];
1286			found_replacement = simpleReplacement (replacement, tokens[0], substitutions);
1287			if (found_replacement) formatNumber (replacement, param);
1288		}
1289		else if (tokens[1] == "datetime")
1290		{
1291			std::string param;
1292			if (tokens.size() > 2) param = tokens[2];
1293			
1294			S32 secFromEpoch = (S32) substitutions["datetime"].asInteger();
1295			found_replacement = formatDatetime (replacement, tokens[0], param, secFromEpoch);
1296		}
1297
1298		if (found_replacement)
1299		{
1300			output += replacement;
1301			res++;
1302		}
1303		else
1304		{
1305			// we had no replacement, use the string as is
1306			// e.g. "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
1307			output += std::string(s, key_start, start-key_start);
1308		}
1309		tokens.clear();
1310	}
1311	// send the remainder of the string (with no further matches for bracketed names)
1312	output += std::string(s, start);
1313	s = output;
1314	return res;
1315}
1316
1317////////////////////////////////////////////////////////////
1318// Testing
1319
1320#ifdef _DEBUG
1321
1322template<class T> 
1323void LLStringUtilBase<T>::testHarness()
1324{
1325	std::string s1;
1326	
1327	llassert( s1.c_str() == NULL );
1328	llassert( s1.size() == 0 );
1329	llassert( s1.empty() );
1330	
1331	std::string s2( "hello");
1332	llassert( !strcmp( s2.c_str(), "hello" ) );
1333	llassert( s2.size() == 5 ); 
1334	llassert( !s2.empty() );
1335	std::string s3( s2 );
1336
1337	llassert( "hello" == s2 );
1338	llassert( s2 == "hello" );
1339	llassert( s2 > "gello" );
1340	llassert( "gello" < s2 );
1341	llassert( "gello" != s2 );
1342	llassert( s2 != "gello" );
1343
1344	std::string s4 = s2;
1345	llassert( !s4.empty() );
1346	s4.empty();
1347	llassert( s4.empty() );
1348	
1349	std::string s5("");
1350	llassert( s5.empty() );
1351	
1352	llassert( isValidIndex(s5, 0) );
1353	llassert( !isValidIndex(s5, 1) );
1354	
1355	s3 = s2;
1356	s4 = "hello again";
1357	
1358	s4 += "!";
1359	s4 += s4;
1360	llassert( s4 == "hello again!hello again!" );
1361	
1362	
1363	std::string s6 = s2 + " " + s2;
1364	std::string s7 = s6;
1365	llassert( s6 == s7 );
1366	llassert( !( s6 != s7) );
1367	llassert( !(s6 < s7) );
1368	llassert( !(s6 > s7) );
1369	
1370	llassert( !(s6 == "hi"));
1371	llassert( s6 == "hello hello");
1372	llassert( s6 < "hi");
1373	
1374	llassert( s6[1] == 'e' );
1375	s6[1] = 'f';
1376	llassert( s6[1] == 'f' );
1377	
1378	s2.erase( 4, 1 );
1379	llassert( s2 == "hell");
1380	s2.insert( 0, 'y' );
1381	llassert( s2 == "yhell");
1382	s2.erase( 1, 3 );
1383	llassert( s2 == "yl");
1384	s2.insert( 1, "awn, don't yel");
1385	llassert( s2 == "yawn, don't yell");
1386	
1387	std::string s8 = s2.substr( 6, 5 );
1388	llassert( s8 == "don't"  );
1389	
1390	std::string s9 = "   \t\ntest  \t\t\n  ";
1391	trim(s9);
1392	llassert( s9 == "test"  );
1393
1394	s8 = "abc123&*(ABC";
1395
1396	s9 = s8;
1397	toUpper(s9);
1398	llassert( s9 == "ABC123&*(ABC"  );
1399
1400	s9 = s8;
1401	toLower(s9);
1402	llassert( s9 == "abc123&*(abc"  );
1403
1404
1405	std::string s10( 10, 'x' );
1406	llassert( s10 == "xxxxxxxxxx" );
1407
1408	std::string s11( "monkey in the middle", 7, 2 );
1409	llassert( s11 == "in" );
1410
1411	std::string s12;  //empty
1412	s12 += "foo";
1413	llassert( s12 == "foo" );
1414
1415	std::string s13;  //empty
1416	s13 += 'f';
1417	llassert( s13 == "f" );
1418}
1419
1420
1421#endif  // _DEBUG